This is an automated email from the ASF dual-hosted git repository.

jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sedona.git


The following commit(s) were added to refs/heads/master by this push:
     new 22307f4125 [GH-2269] Geopandas: Fix geodataframe.copy() to properly 
create a copy (#2270)
22307f4125 is described below

commit 22307f4125abb46a78f0a30ba3fc3045aa21e91e
Author: Peter Nguyen <[email protected]>
AuthorDate: Mon Aug 11 22:58:08 2025 -0700

    [GH-2269] Geopandas: Fix geodataframe.copy() to properly create a copy 
(#2270)
---
 python/sedona/spark/geopandas/geodataframe.py | 14 +++++-----
 python/tests/geopandas/test_geodataframe.py   | 39 +++++++++++++++------------
 2 files changed, 28 insertions(+), 25 deletions(-)

diff --git a/python/sedona/spark/geopandas/geodataframe.py 
b/python/sedona/spark/geopandas/geodataframe.py
index 2470f310a0..6e379dca69 100644
--- a/python/sedona/spark/geopandas/geodataframe.py
+++ b/python/sedona/spark/geopandas/geodataframe.py
@@ -574,7 +574,7 @@ class GeoDataFrame(GeoFrame, pspd.DataFrame):
         if inplace:
             frame = self
         else:
-            frame = self.copy(deep=False)
+            frame = self.copy()
 
         geo_column_name = self._geometry_column_name
         new_series = False
@@ -785,7 +785,7 @@ class GeoDataFrame(GeoFrame, pspd.DataFrame):
         """
         return pspd.DataFrame(self._internal)
 
-    def copy(self, deep=False):
+    def copy(self, deep=False) -> GeoDataFrame:
         """
         Make a copy of this GeoDataFrame object.
 
@@ -810,12 +810,10 @@ class GeoDataFrame(GeoFrame, pspd.DataFrame):
            geometry  value1  value2
         0  POINT (1 1)       2       3
         """
-        if deep:
-            return GeoDataFrame(
-                self._anchor.copy(), dtype=self.dtypes, index=self._col_label
-            )
-        else:
-            return self  # GeoDataFrame(self._internal.spark_frame.copy())  
"this parameter is not supported but just dummy parameter to match pandas."
+        # Note: The deep parameter is a dummy parameter just as it is in 
PySpark pandas
+        return GeoDataFrame(
+            pspd.DataFrame(self._internal.copy()), 
geometry=self.active_geometry_name
+        )
 
     def _safe_get_crs(self):
         """
diff --git a/python/tests/geopandas/test_geodataframe.py 
b/python/tests/geopandas/test_geodataframe.py
index 47b5eb388b..5780a9e36c 100644
--- a/python/tests/geopandas/test_geodataframe.py
+++ b/python/tests/geopandas/test_geodataframe.py
@@ -277,9 +277,12 @@ class TestGeoDataFrame(TestGeopandasBase):
         )
 
     def test_copy(self):
-        df = GeoDataFrame([Point(x, x) for x in range(3)], name="test_df")
-        df_copy = df.copy()
-        assert type(df_copy) is GeoDataFrame
+        df = GeoDataFrame(
+            {"test": [Point(x, x) for x in range(3)]}, index=[1, 2, 3], 
geometry="test"
+        )
+        result = df.copy()
+        self.check_sgpd_df_equals_gpd_df(result, df.to_geopandas())
+        self.check_sgpd_df_equals_gpd_df(df, result.to_geopandas())
 
     def test_set_crs(self):
         sgpd_df = sgpd.GeoDataFrame({"geometry": [Point(0, 0), Point(1, 1)]})
@@ -297,9 +300,12 @@ class TestGeoDataFrame(TestGeopandasBase):
         assert sgpd_df.crs is None
 
         with self.ps_allow_diff_frames():
-            sgpd_df = sgpd_df.set_crs(4326, allow_override=True)
-        assert isinstance(sgpd_df, GeoDataFrame)
-        assert sgpd_df.crs.to_epsg() == 4326
+            result = sgpd_df.set_crs(4326, allow_override=True)
+        assert result.crs.to_epsg() == 4326
+        assert isinstance(result, GeoDataFrame)
+
+        # Ensure set_crs without inplace modifies a copy and not current df
+        assert sgpd_df.crs is None
 
     def test_to_crs(self):
         from pyproj import CRS
@@ -313,6 +319,8 @@ class TestGeoDataFrame(TestGeopandasBase):
         with self.ps_allow_diff_frames():
             result = gdf.to_crs(3857)
         assert isinstance(result.crs, CRS) and result.crs.to_epsg() == 3857
+        # Ensure original df is not modified
+        assert gdf.crs.to_epsg() == 4326
 
         expected = gpd.GeoSeries(
             [
@@ -338,21 +346,18 @@ class TestGeoDataFrame(TestGeopandasBase):
         with pytest.raises(MissingGeometryColumnError):
             _ = sgpd_df.geometry
 
-        # TODO: Try to optimize this with self.ps_allow_diff_frames() away
-        with self.ps_allow_diff_frames():
-            sgpd_df = sgpd_df.set_geometry("geometry1")
+        sgpd_df.set_geometry("geometry1", inplace=True)
 
         assert sgpd_df.geometry.name == "geometry1"
 
-        # TODO: Try to optimize this with self.ps_allow_diff_frames() away
-        with self.ps_allow_diff_frames():
-            sgpd_df.set_geometry("geometry2", inplace=True)
-        assert sgpd_df.geometry.name == "geometry2"
+        result = sgpd_df.set_geometry("geometry2")
+        assert result.geometry.name == "geometry2"
 
-        # Test the actual values of the geometry column
-        assert_series_equal(
-            sgpd_df.geometry.area.to_pandas(), 
sgpd_df["geometry2"].area.to_pandas()
-        )
+        # Ensure original df is not modified
+        assert sgpd_df.geometry.name == "geometry1"
+
+        # Test the actual values of the geometry column equal for an area 
calculation
+        self.check_pd_series_equal(result.area, 
sgpd_df["geometry2"].area.to_pandas())
 
         # unknown column
         with pytest.raises(ValueError):

Reply via email to