This is an automated email from the ASF dual-hosted git repository.

jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sedona.git


The following commit(s) were added to refs/heads/master by this push:
     new 406e35212e [GH-2050] Geopandas.GeoSeries: Implement align=False for 
_row_wise_operation + support indexes (#2057)
406e35212e is described below

commit 406e35212ea903f59aab2271b49e2808bc966898
Author: Peter Nguyen <[email protected]>
AuthorDate: Thu Jul 3 21:48:44 2025 -0700

    [GH-2050] Geopandas.GeoSeries: Implement align=False for 
_row_wise_operation + support indexes (#2057)
---
 python/sedona/geopandas/geoseries.py               | 138 +++++++++++++--------
 python/tests/geopandas/test_geoseries.py           |  68 +++++++++-
 .../tests/geopandas/test_match_geopandas_series.py |  30 ++++-
 3 files changed, 183 insertions(+), 53 deletions(-)

diff --git a/python/sedona/geopandas/geoseries.py 
b/python/sedona/geopandas/geoseries.py
index b3bbdb418a..805b827fb8 100644
--- a/python/sedona/geopandas/geoseries.py
+++ b/python/sedona/geopandas/geoseries.py
@@ -39,7 +39,10 @@ from sedona.geopandas.base import GeoFrame
 from sedona.geopandas.geodataframe import GeoDataFrame
 from sedona.geopandas.geoindex import GeoIndex
 
-from pyspark.pandas.internal import SPARK_DEFAULT_INDEX_NAME  # 
__index_level_0__
+from pyspark.pandas.internal import (
+    SPARK_DEFAULT_INDEX_NAME,  # __index_level_0__
+    NATURAL_ORDER_COLUMN_NAME,
+)
 
 
 class GeoSeries(GeoFrame, pspd.Series):
@@ -169,14 +172,7 @@ class GeoSeries(GeoFrame, pspd.Series):
                 gs.apply(lambda geom: geom.wkb if geom is not None else None)
             )
             # initialize the parent class pyspark Series with the pandas Series
-            super().__init__(
-                data=pdf,
-                index=index,
-                dtype=dtype,
-                name=name,
-                copy=copy,
-                fastpath=fastpath,
-            )
+            super().__init__(data=pdf)
 
         if crs:
             self.set_crs(crs, inplace=True)
@@ -950,7 +946,7 @@ class GeoSeries(GeoFrame, pspd.Series):
         An object is said to intersect `other` if its `boundary` and `interior`
         intersects in any way with those of the other.
 
-        The operation works on a 1-to-1 row-wise manner:
+        The operation works on a 1-to-1 row-wise manner.
 
         Parameters
         ----------
@@ -959,7 +955,7 @@ class GeoSeries(GeoFrame, pspd.Series):
             intersected.
         align : bool | None (default None)
             If True, automatically aligns GeoSeries based on their indices. 
None defaults to True.
-            If False, the order of elements is preserved. (not supported in 
Sedona Geopandas)
+            If False, the order of elements is preserved.
 
         Returns
         -------
@@ -982,23 +978,26 @@ class GeoSeries(GeoFrame, pspd.Series):
         ...         LineString([(1, 0), (1, 3)]),
         ...         LineString([(2, 0), (0, 2)]),
         ...         Point(1, 1),
-        ...         Point(-100, -100),
+        ...         Point(0, 1),
         ...     ],
         ...     index=range(1, 5),
         ... )
 
-        We can check two GeoSeries against each other, row by row.
-        The GeoSeries above have different indices. We align both GeoSeries
-        based on index values and compare elements with the same index:
+        >>> s
+        0    POLYGON ((0 0, 2 2, 0 2, 0 0))
+        1             LINESTRING (0 0, 2 2)
+        2             LINESTRING (2 0, 0 2)
+        3                       POINT (0 1)
+        dtype: geometry
 
-        >>> s.intersects(s2)
-        0     True
-        1     True
-        2     True
-        3    False
-        dtype: bool
+        >>> s2
+        1    LINESTRING (1 0, 1 3)
+        2    LINESTRING (2 0, 0 2)
+        3              POINT (1 1)
+        4              POINT (0 1)
+        dtype: geometry
 
-        We can also check if each geometry of GeoSeries intersects a single
+        We can check if each geometry of GeoSeries crosses a single
         geometry:
 
         >>> line = LineString([(-1, 1), (3, 1)])
@@ -1009,6 +1008,27 @@ class GeoSeries(GeoFrame, pspd.Series):
         3    True
         dtype: bool
 
+        We can also check two GeoSeries against each other, row by row.
+        The GeoSeries above have different indices. We can either align both 
GeoSeries
+        based on index values and compare elements with the same index using
+        ``align=True`` or ignore index and compare elements based on their 
matching
+        order using ``align=False``:
+
+        >>> s.intersects(s2, align=True)
+        0    False
+        1     True
+        2     True
+        3    False
+        4    False
+        dtype: bool
+
+        >>> s.intersects(s2, align=False)
+        0    True
+        1    True
+        2    True
+        3    True
+        dtype: bool
+
         Notes
         -----
         This method works in a row-wise manner. It does not check if an element
@@ -1035,7 +1055,7 @@ class GeoSeries(GeoFrame, pspd.Series):
         """Returns a ``GeoSeries`` of the intersection of points in each
         aligned geometry with `other`.
 
-        The operation works on a 1-to-1 row-wise manner:
+        The operation works on a 1-to-1 row-wise manner.
 
         Parameters
         ----------
@@ -1044,7 +1064,7 @@ class GeoSeries(GeoFrame, pspd.Series):
             intersection with.
         align : bool | None (default None)
             If True, automatically aligns GeoSeries based on their indices. 
None defaults to True.
-            If False, the order of elements is preserved. (not supported in 
Sedona Geopandas)
+            If False, the order of elements is preserved.
 
         Returns
         -------
@@ -1069,43 +1089,62 @@ class GeoSeries(GeoFrame, pspd.Series):
         ...         LineString([(1, 0), (1, 3)]),
         ...         LineString([(2, 0), (0, 2)]),
         ...         Point(1, 1),
-        ...         Point(-100, -100),
+        ...         Point(0, 1),
         ...     ],
+        ...     index=range(1, 6),
         ... )
 
-        We can do an intersection of each geometry and a single
-        shapely geometry:
+        >>> s
+        0    POLYGON ((0 0, 2 2, 0 2, 0 0))
+        1    POLYGON ((0 0, 2 2, 0 2, 0 0))
+        2             LINESTRING (0 0, 2 2)
+        3             LINESTRING (2 0, 0 2)
+        4                       POINT (0 1)
+        dtype: geometry
 
-        >>> geom = Polygon([(-0.5, -0.5), (-0.5, 2.5), (2.5, 2.5), (2.5, 
-0.5), (-0.5, -0.5)])
-        >>> s.intersection(geom)
-            Polygon([(0, 0), (2, 2), (0, 2)]),
-            Polygon([(0, 0), (2, 2), (0, 2)]),
-            LineString([(0, 0), (2, 2)]),
-            LineString([(2, 0), (0, 2)]),
-            Point(0, 1),
+        >>> s2
+        1    POLYGON ((0 0, 1 1, 0 1, 0 0))
+        2             LINESTRING (1 0, 1 3)
+        3             LINESTRING (2 0, 0 2)
+        4                       POINT (1 1)
+        5                       POINT (0 1)
         dtype: geometry
 
-        >>> geom = Polygon([(-0.5, -0.5), (-0.5, 2.5), (2.5, 2.5), (2.5, 
-0.5), (-0.5, -0.5)])
+        We can also do intersection of each geometry and a single
+        shapely geometry:
+
         >>> s.intersection(Polygon([(0, 0), (1, 1), (0, 1)]))
-        0         POLYGON ((0 0, 2 2, 0 2))
-        1         POLYGON ((0 0, 2 2, 0 2))
-        2             LINESTRING (0 0, 2 2)
-        3             LINESTRING (2 0, 0 2)
+        0    POLYGON ((0 0, 0 1, 1 1, 0 0))
+        1    POLYGON ((0 0, 0 1, 1 1, 0 0))
+        2             LINESTRING (0 0, 1 1)
+        3                       POINT (1 1)
         4                       POINT (0 1)
         dtype: geometry
 
         We can also check two GeoSeries against each other, row by row.
-        The GeoSeries above have different indices. We align both GeoSeries
-        based on index values and compare elements with the same index.
+        The GeoSeries above have different indices. We can either align both 
GeoSeries
+        based on index values and compare elements with the same index using
+        ``align=True`` or ignore index and compare elements based on their 
matching
+        order using ``align=False``:
+
+        >>> s.intersection(s2, align=True)
+        0                              None
+        1    POLYGON ((0 0, 0 1, 1 1, 0 0))
+        2                       POINT (1 1)
+        3             LINESTRING (2 0, 0 2)
+        4                       POINT EMPTY
+        5                              None
+        dtype: geometry
 
-        >>> s.intersection(s2)
-        0    POLYGON ((0 0, 1 1, 0 1, 0 0))
+        >>> s.intersection(s2, align=False)
+        0    POLYGON ((0 0, 0 1, 1 1, 0 0))
         1             LINESTRING (1 1, 1 2)
         2                       POINT (1 1)
         3                       POINT (1 1)
-        4                     POLYGON EMPTY
+        4                       POINT (0 1)
         dtype: geometry
 
+
         See Also
         --------
         GeoSeries.difference
@@ -1130,8 +1169,9 @@ class GeoSeries(GeoFrame, pspd.Series):
         from pyspark.sql.functions import col
 
         # Note: this is specifically False. None is valid since it defaults to 
True similar to geopandas
-        if align is False:
-            raise NotImplementedError("Sedona Geopandas does not support 
align=False")
+        index_col = (
+            NATURAL_ORDER_COLUMN_NAME if align is False else 
SPARK_DEFAULT_INDEX_NAME
+        )
 
         if isinstance(other, BaseGeometry):
             other = GeoSeries([other] * len(self))
@@ -1141,13 +1181,13 @@ class GeoSeries(GeoFrame, pspd.Series):
         # TODO: this does not yet support multi-index
         df = self._internal.spark_frame.select(
             col(self.get_first_geometry_column()).alias("L"),
-            col(SPARK_DEFAULT_INDEX_NAME),
+            col(index_col),
         )
         other_df = other._internal.spark_frame.select(
             col(other.get_first_geometry_column()).alias("R"),
-            col(SPARK_DEFAULT_INDEX_NAME),
+            col(index_col),
         )
-        joined_df = df.join(other_df, on=SPARK_DEFAULT_INDEX_NAME, how="outer")
+        joined_df = df.join(other_df, on=index_col, how="outer")
         return self._query_geometry_column(
             select,
             cols=["L", "R"],
diff --git a/python/tests/geopandas/test_geoseries.py 
b/python/tests/geopandas/test_geoseries.py
index 21b84ca7e5..6aabd495dc 100644
--- a/python/tests/geopandas/test_geoseries.py
+++ b/python/tests/geopandas/test_geoseries.py
@@ -59,7 +59,10 @@ class TestGeoSeries(TestBase):
         assert len(actual) == len(expected)
         sgpd_result = actual.to_geopandas()
         for a, e in zip(sgpd_result, expected):
-            if a.is_empty and e.is_empty:
+            if a is None or e is None:
+                assert a is None and e is None
+                continue
+            elif a.is_empty and e.is_empty:
                 continue
             self.assert_geometry_almost_equal(a, e)
 
@@ -466,6 +469,23 @@ class TestGeoSeries(TestBase):
         expected = pd.Series([True, True, True, True])
         assert_series_equal(result.to_pandas(), expected)
 
+        # from the original doc string
+        s2 = sgpd.GeoSeries(
+            [
+                LineString([(1, 0), (1, 3)]),
+                LineString([(2, 0), (0, 2)]),
+                Point(1, 1),
+                Point(0, 1),
+            ],
+            index=range(1, 5),
+        )
+
+        result = s.intersects(s2, align=True)
+        expected = pd.Series([False, True, True, False, False])
+
+        result = s.intersects(s2, align=False)
+        expected = pd.Series([True, True, True, True])
+
     def test_intersection(self):
         s = sgpd.GeoSeries(
             [
@@ -513,8 +533,50 @@ class TestGeoSeries(TestBase):
         )
         self.check_sgpd_equals_gpd(result, expected)
 
-        with pytest.raises(NotImplementedError):
-            s.intersection(s2, align=False)
+        # from the original doc string
+        s = sgpd.GeoSeries(
+            [
+                Polygon([(0, 0), (2, 2), (0, 2)]),
+                Polygon([(0, 0), (2, 2), (0, 2)]),
+                LineString([(0, 0), (2, 2)]),
+                LineString([(2, 0), (0, 2)]),
+                Point(0, 1),
+            ],
+        )
+        s2 = sgpd.GeoSeries(
+            [
+                Polygon([(0, 0), (1, 1), (0, 1)]),
+                LineString([(1, 0), (1, 3)]),
+                LineString([(2, 0), (0, 2)]),
+                Point(1, 1),
+                Point(0, 1),
+            ],
+            index=range(1, 6),
+        )
+        result = s.intersection(s2, align=True)
+        expected = gpd.GeoSeries(
+            [
+                None,
+                Polygon([(0, 0), (0, 1), (1, 1), (0, 0)]),
+                Point(1, 1),
+                LineString([(2, 0), (0, 2)]),
+                Point(),
+                None,
+            ]
+        )
+        self.check_sgpd_equals_gpd(result, expected)
+
+        result = s.intersection(s2, align=False)
+        expected = gpd.GeoSeries(
+            [
+                Polygon([(0, 0), (0, 1), (1, 1), (0, 0)]),
+                LineString([(1, 1), (1, 2)]),
+                Point(1, 1),
+                Point(1, 1),
+                Point(0, 1),
+            ]
+        )
+        self.check_sgpd_equals_gpd(result, expected)
 
     def test_intersection_all(self):
         pass
diff --git a/python/tests/geopandas/test_match_geopandas_series.py 
b/python/tests/geopandas/test_match_geopandas_series.py
index 1990b87074..289da18401 100644
--- a/python/tests/geopandas/test_match_geopandas_series.py
+++ b/python/tests/geopandas/test_match_geopandas_series.py
@@ -496,6 +496,15 @@ class TestMatchGeopandasSeries(TestBase):
                 gpd_result = 
gpd.GeoSeries(geom).intersects(gpd.GeoSeries(geom2))
                 self.check_pd_series_equal(sgpd_result, gpd_result)
 
+                if len(geom) == len(geom2):
+                    sgpd_result = GeoSeries(geom).intersects(
+                        GeoSeries(geom2), align=False
+                    )
+                    gpd_result = gpd.GeoSeries(geom).intersects(
+                        gpd.GeoSeries(geom2), align=False
+                    )
+                    self.check_pd_series_equal(sgpd_result, gpd_result)
+
     def test_intersection(self):
         geometries = [
             Polygon([(0, 0), (1, 0), (1, 1)]),
@@ -511,6 +520,22 @@ class TestMatchGeopandasSeries(TestBase):
                 gpd_result = gpd.GeoSeries(g1).intersection(gpd.GeoSeries(g2))
                 self.check_sgpd_equals_gpd(sgpd_result, gpd_result)
 
+        # Ensure both align True and False work correctly
+        for _, g1 in self.geoms:
+            for _, g2 in self.geoms:
+                gpd_series1, gpd_series2 = gpd.GeoSeries(g1), gpd.GeoSeries(g2)
+                # The original geopandas intersection method fails on invalid 
geometries
+                if not gpd_series1.is_valid.all() or not 
gpd_series2.is_valid.all():
+                    continue
+                sgpd_result = GeoSeries(g1).intersection(GeoSeries(g2))
+                gpd_result = gpd_series1.intersection(gpd_series2)
+                self.check_sgpd_equals_gpd(sgpd_result, gpd_result)
+
+                if len(g1) == len(g2):
+                    sgpd_result = GeoSeries(g1).intersects(GeoSeries(g2), 
align=False)
+                    gpd_result = gpd_series1.intersects(gpd_series2, 
align=False)
+                    self.check_pd_series_equal(sgpd_result, gpd_result)
+
     def test_intersection_all(self):
         pass
 
@@ -554,8 +579,11 @@ class TestMatchGeopandasSeries(TestBase):
         assert isinstance(expected, gpd.GeoSeries)
         sgpd_result = actual.to_geopandas()
         for a, e in zip(sgpd_result, expected):
+            if a is None or e is None:
+                assert a is None and e is None
+                continue
             # Sometimes sedona and geopandas both return empty geometries but 
of different types (e.g Point and Polygon)
-            if a.is_empty and e.is_empty:
+            elif a.is_empty and e.is_empty:
                 continue
             self.assert_geometry_almost_equal(
                 a, e, tolerance=1e-2

Reply via email to