This is an automated email from the ASF dual-hosted git repository.

jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sedona.git


The following commit(s) were added to refs/heads/master by this push:
     new b535bc4b5c [GH-2116] Geopandas.GeoSeries: Implement `difference` and 
`dwithin` (#2117)
b535bc4b5c is described below

commit b535bc4b5c3745d209910c0af9622c24f587ef70
Author: Peter Nguyen <[email protected]>
AuthorDate: Sun Jul 20 22:56:53 2025 -0700

    [GH-2116] Geopandas.GeoSeries: Implement `difference` and `dwithin` (#2117)
    
    * Implement functions
    
    * Skip covers and covered_by tests for old versions
    
    * Skip covered by for shapely < 2
    
    * Implement dwithin and difference, and fix index bug in row_wise_operations
---
 python/sedona/geopandas/geoseries.py               | 246 ++++++++++++++++++++-
 python/tests/geopandas/test_geoseries.py           |  95 ++++++++
 .../tests/geopandas/test_match_geopandas_series.py |  49 ++++
 3 files changed, 384 insertions(+), 6 deletions(-)

diff --git a/python/sedona/geopandas/geoseries.py 
b/python/sedona/geopandas/geoseries.py
index e9fe4d3795..f038b9693c 100644
--- a/python/sedona/geopandas/geoseries.py
+++ b/python/sedona/geopandas/geoseries.py
@@ -1131,6 +1131,232 @@ class GeoSeries(GeoFrame, pspd.Series):
             )
         )
 
+    def dwithin(self, other, distance, align=None):
+        """Returns a ``Series`` of ``dtype('bool')`` with value ``True`` for
+        each aligned geometry that is within a set distance from ``other``.
+
+        The operation works on a 1-to-1 row-wise manner:
+
+        Parameters
+        ----------
+        other : GeoSeries or geometric object
+            The GeoSeries (elementwise) or geometric object to test for
+            equality.
+        distance : float, np.array, pd.Series
+            Distance(s) to test if each geometry is within. A scalar distance 
will be
+            applied to all geometries. An array or Series will be applied 
elementwise.
+            If np.array or pd.Series are used then it must have same length as 
the
+            GeoSeries.
+        align : bool | None (default None)
+            If True, automatically aligns GeoSeries based on their indices.
+            If False, the order of elements is preserved. None defaults to 
True.
+
+        Returns
+        -------
+        Series (bool)
+
+        Examples
+        --------
+        >>> from sedona.geopandas import GeoSeries
+        >>> from shapely.geometry import Polygon, LineString, Point
+        >>> s = GeoSeries(
+        ...     [
+        ...         Polygon([(0, 0), (1, 1), (0, 1)]),
+        ...         LineString([(0, 0), (0, 2)]),
+        ...         LineString([(0, 0), (0, 1)]),
+        ...         Point(0, 1),
+        ...     ],
+        ...     index=range(0, 4),
+        ... )
+        >>> s2 = GeoSeries(
+        ...     [
+        ...         Polygon([(1, 0), (4, 2), (2, 2)]),
+        ...         Polygon([(2, 0), (3, 2), (2, 2)]),
+        ...         LineString([(2, 0), (2, 2)]),
+        ...         Point(1, 1),
+        ...     ],
+        ...     index=range(1, 5),
+        ... )
+
+        >>> s
+        0    POLYGON ((0 0, 1 1, 0 1, 0 0))
+        1             LINESTRING (0 0, 0 2)
+        2             LINESTRING (0 0, 0 1)
+        3                       POINT (0 1)
+        dtype: geometry
+
+        >>> s2
+        1    POLYGON ((1 0, 4 2, 2 2, 1 0))
+        2    POLYGON ((2 0, 3 2, 2 2, 2 0))
+        3             LINESTRING (2 0, 2 2)
+        4                       POINT (1 1)
+        dtype: geometry
+
+        We can check if each geometry of GeoSeries contains a single
+        geometry:
+
+        >>> point = Point(0, 1)
+        >>> s2.dwithin(point, 1.8)
+        1     True
+        2    False
+        3    False
+        4     True
+        dtype: bool
+
+        We can also check two GeoSeries against each other, row by row.
+        The GeoSeries above have different indices. We can either align both 
GeoSeries
+        based on index values and compare elements with the same index using
+        ``align=True`` or ignore index and compare elements based on their 
matching
+        order using ``align=False``:
+
+        >>> s.dwithin(s2, distance=1, align=True)
+        0    False
+        1     True
+        2    False
+        3    False
+        4    False
+        dtype: bool
+
+        >>> s.dwithin(s2, distance=1, align=False)
+        0     True
+        1    False
+        2    False
+        3     True
+        dtype: bool
+
+        Notes
+        -----
+        This method works in a row-wise manner. It does not check if an element
+        of one GeoSeries is within the set distance of *any* element of the 
other one.
+
+        See also
+        --------
+        GeoSeries.within
+        """
+
+        if not isinstance(distance, (float, int)):
+            raise NotImplementedError(
+                "Array-like distance for dwithin not implemented yet."
+            )
+
+        return self._row_wise_operation(
+            f"ST_DWithin(`L`, `R`, {distance})",
+            other,
+            align,
+            rename="dwithin",
+            returns_geom=False,
+            default_val="FALSE",
+        )
+
+    def difference(self, other, align=None) -> "GeoSeries":
+        """Returns a ``GeoSeries`` of the points in each aligned geometry that
+        are not in `other`.
+
+        The operation works on a 1-to-1 row-wise manner:
+
+        Unlike Geopandas, Sedona does not support this operation for 
GeometryCollections.
+
+        Parameters
+        ----------
+        other : Geoseries or geometric object
+            The Geoseries (elementwise) or geometric object to find the
+            difference to.
+        align : bool | None (default None)
+            If True, automatically aligns GeoSeries based on their indices. 
None defaults to True.
+            If False, the order of elements is preserved.
+
+        Returns
+        -------
+        GeoSeries
+
+        Examples
+        --------
+        >>> from sedona.geopandas import GeoSeries
+        >>> from shapely.geometry import Polygon, LineString, Point
+        >>> s = GeoSeries(
+        ...     [
+        ...         Polygon([(0, 0), (2, 2), (0, 2)]),
+        ...         Polygon([(0, 0), (2, 2), (0, 2)]),
+        ...         LineString([(0, 0), (2, 2)]),
+        ...         LineString([(2, 0), (0, 2)]),
+        ...         Point(0, 1),
+        ...     ],
+        ... )
+        >>> s2 = GeoSeries(
+        ...     [
+        ...         Polygon([(0, 0), (1, 1), (0, 1)]),
+        ...         LineString([(1, 0), (1, 3)]),
+        ...         LineString([(2, 0), (0, 2)]),
+        ...         Point(1, 1),
+        ...         Point(0, 1),
+        ...     ],
+        ...     index=range(1, 6),
+        ... )
+
+        >>> s
+        0    POLYGON ((0 0, 2 2, 0 2, 0 0))
+        1    POLYGON ((0 0, 2 2, 0 2, 0 0))
+        2             LINESTRING (0 0, 2 2)
+        3             LINESTRING (2 0, 0 2)
+        4                       POINT (0 1)
+        dtype: geometry
+
+        >>> s2
+        1    POLYGON ((0 0, 1 1, 0 1, 0 0))
+        2             LINESTRING (1 0, 1 3)
+        3             LINESTRING (2 0, 0 2)
+        4                       POINT (1 1)
+        5                       POINT (0 1)
+        dtype: geometry
+
+        We can do difference of each geometry and a single
+        shapely geometry:
+
+        >>> s.difference(Polygon([(0, 0), (1, 1), (0, 1)]))
+        0       POLYGON ((0 2, 2 2, 1 1, 0 1, 0 2))
+        1         POLYGON ((0 2, 2 2, 1 1, 0 1, 0 2))
+        2                       LINESTRING (1 1, 2 2)
+        3    MULTILINESTRING ((2 0, 1 1), (1 1, 0 2))
+        4                                 POINT EMPTY
+        dtype: geometry
+
+        We can also check two GeoSeries against each other, row by row.
+        The GeoSeries above have different indices. We can either align both 
GeoSeries
+        based on index values and compare elements with the same index using
+        ``align=True`` or ignore index and compare elements based on their 
matching
+        order using ``align=False``:
+
+        >>> s.difference(s2, align=True)
+        0                                        None
+        1         POLYGON ((0 2, 2 2, 1 1, 0 1, 0 2))
+        2    MULTILINESTRING ((0 0, 1 1), (1 1, 2 2))
+        3                            LINESTRING EMPTY
+        4                                 POINT (0 1)
+        5                                        None
+        dtype: geometry
+
+        >>> s.difference(s2, align=False)
+        0         POLYGON ((0 2, 2 2, 1 1, 0 1, 0 2))
+        1    POLYGON ((0 0, 0 2, 1 2, 2 2, 1 1, 0 0))
+        2    MULTILINESTRING ((0 0, 1 1), (1 1, 2 2))
+        3                       LINESTRING (2 0, 0 2)
+        4                                 POINT EMPTY
+        dtype: geometry
+
+        See Also
+        --------
+        GeoSeries.symmetric_difference
+        GeoSeries.union
+        GeoSeries.intersection
+        """
+        return self._row_wise_operation(
+            "ST_Difference(`L`, `R`)",
+            other,
+            align,
+            rename="difference",
+            returns_geom=True,
+        )
+
     @property
     def is_simple(self) -> pspd.Series:
         """Returns a ``Series`` of ``dtype('bool')`` with value ``True`` for
@@ -2698,14 +2924,22 @@ class GeoSeries(GeoFrame, pspd.Series):
             NATURAL_ORDER_COLUMN_NAME if align is False else 
SPARK_DEFAULT_INDEX_NAME
         )
 
-        if isinstance(other, BaseGeometry):
-            other = GeoSeries([other] * len(self))
-
-        # e.g int input
         if not isinstance(other, pspd.Series):
-            other = pspd.Series([other] * len(self))
+            # generator instead of a in-memory list
+            data = [other for _ in range(len(self))]
+
+            # e.g int, Geom, etc
+            other = (
+                GeoSeries(data)
+                if isinstance(other, BaseGeometry)
+                else pspd.Series(data)
+            )
 
-        assert isinstance(other, pspd.Series), f"Invalid type for other: 
{type(other)}"
+            # To make sure the result is the same length, we set natural 
column as the index
+            # in case the index is not the default range index from 0.
+            # Alternatively, we could create 'other' using the same index as 
self,
+            # but that would require index=self.index.to_pandas() which is 
less scalable.
+            index_col = NATURAL_ORDER_COLUMN_NAME
 
         # This code assumes there is only one index (SPARK_DEFAULT_INDEX_NAME)
         # and would need to be updated if Sedona later supports multi-index
diff --git a/python/tests/geopandas/test_geoseries.py 
b/python/tests/geopandas/test_geoseries.py
index 2db3ed317d..e3621da080 100644
--- a/python/tests/geopandas/test_geoseries.py
+++ b/python/tests/geopandas/test_geoseries.py
@@ -526,6 +526,101 @@ class TestGeoSeries(TestGeopandasBase):
     def test_count_interior_rings(self):
         pass
 
+    def test_dwithin(self):
+        s = GeoSeries(
+            [
+                Polygon([(0, 0), (1, 1), (0, 1)]),
+                LineString([(0, 0), (0, 2)]),
+                LineString([(0, 0), (0, 1)]),
+                Point(0, 1),
+            ],
+            index=range(0, 4),
+        )
+        s2 = GeoSeries(
+            [
+                Polygon([(1, 0), (4, 2), (2, 2)]),
+                Polygon([(2, 0), (3, 2), (2, 2)]),
+                LineString([(2, 0), (2, 2)]),
+                Point(1, 1),
+            ],
+            index=range(1, 5),
+        )
+
+        result = s2.dwithin(Point(0, 1), 1.8)
+        expected = pd.Series([True, False, False, True], index=range(1, 5))
+        assert_series_equal(result.to_pandas(), expected)
+
+        result = s.dwithin(s2, distance=1, align=True)
+        expected = pd.Series([False, True, False, False, False])
+
+        result = s.dwithin(s2, distance=1, align=False)
+        expected = pd.Series([True, False, False, True])
+        assert_series_equal(result.to_pandas(), expected)
+
+    def test_difference(self):
+        s = GeoSeries(
+            [
+                Polygon([(0, 0), (2, 2), (0, 2)]),
+                Polygon([(0, 0), (2, 2), (0, 2)]),
+                LineString([(0, 0), (2, 2)]),
+                LineString([(2, 0), (0, 2)]),
+                Point(0, 1),
+            ],
+        )
+        s2 = GeoSeries(
+            [
+                Polygon([(0, 0), (1, 1), (0, 1)]),
+                LineString([(1, 0), (1, 3)]),
+                LineString([(2, 0), (0, 2)]),
+                Point(1, 1),
+                Point(0, 1),
+            ],
+            index=range(1, 6),
+        )
+
+        result = s.difference(Polygon([(0, 0), (1, 1), (0, 1)]))
+        expected = gpd.GeoSeries(
+            [
+                Polygon([(0, 2), (2, 2), (1, 1), (0, 1), (0, 2)]),
+                Polygon([(0, 2), (2, 2), (1, 1), (0, 1), (0, 2)]),
+                LineString([(1, 1), (2, 2)]),
+                MultiLineString(
+                    [LineString([(2, 0), (1, 1)]), LineString([(1, 1), (0, 
2)])]
+                ),
+                Point(),
+            ]
+        )
+        self.check_sgpd_equals_gpd(result, expected)
+
+        result = s.difference(s2, align=True)
+        expected = gpd.GeoSeries(
+            [
+                None,
+                Polygon([(0, 2), (2, 2), (1, 1), (0, 1), (0, 2)]),
+                MultiLineString(
+                    [LineString([(0, 0), (1, 1)]), LineString([(1, 1), (2, 
2)])]
+                ),
+                LineString(),
+                Point(0, 1),
+                None,
+            ]
+        )
+        self.check_sgpd_equals_gpd(result, expected)
+
+        result = s.difference(s2, align=False)
+        expected = gpd.GeoSeries(
+            [
+                None,
+                Polygon([(0, 2), (2, 2), (1, 1), (0, 1), (0, 2)]),
+                Polygon([(0, 0), (0, 2), (1, 2), (2, 2), (1, 1), (0, 0)]),
+                MultiLineString(
+                    [LineString([(0, 0), (1, 1)]), LineString([(1, 1), (2, 
2)])]
+                ),
+                LineString([(2, 0), (0, 2)]),
+                Point(),
+            ]
+        )
+
     def test_is_simple(self):
         s = sgpd.GeoSeries(
             [
diff --git a/python/tests/geopandas/test_match_geopandas_series.py 
b/python/tests/geopandas/test_match_geopandas_series.py
index cfc1b587ba..a23d9db1f4 100644
--- a/python/tests/geopandas/test_match_geopandas_series.py
+++ b/python/tests/geopandas/test_match_geopandas_series.py
@@ -478,6 +478,55 @@ class TestMatchGeopandasSeries(TestGeopandasBase):
     def test_count_interior_rings(self):
         pass
 
+    def test_dwithin(self):
+        if parse_version(gpd.__version__) < parse_version("1.0.0"):
+            pytest.skip("geopandas < 1.0.0 does not support dwithin")
+
+        for i, (_, geom) in enumerate(self.geoms):
+            for _, geom2 in self.geoms[i:]:
+                sgpd_result = GeoSeries(geom).dwithin(GeoSeries(geom2), 
distance=1)
+                gpd_result = gpd.GeoSeries(geom).dwithin(
+                    gpd.GeoSeries(geom2), distance=1
+                )
+                self.check_pd_series_equal(sgpd_result, gpd_result)
+
+                if len(geom) == len(geom2):
+                    sgpd_result = GeoSeries(geom).dwithin(
+                        GeoSeries(geom2), distance=1, align=False
+                    )
+                    gpd_result = gpd.GeoSeries(geom).dwithin(
+                        gpd.GeoSeries(geom2), distance=1, align=False
+                    )
+                    self.check_pd_series_equal(sgpd_result, gpd_result)
+
+    def test_difference(self):
+        for i, (_, geom) in enumerate(self.geoms):
+            for _, geom2 in self.geoms[i:]:
+                # Sedona doesn't support difference for GeometryCollections
+                if isinstance(geom[0], GeometryCollection) or isinstance(
+                    geom2[0], GeometryCollection
+                ):
+                    continue
+                # Operation doesn't work on invalid geometries
+                if (
+                    not gpd.GeoSeries(geom).is_valid.all()
+                    or not gpd.GeoSeries(geom2).is_valid.all()
+                ):
+                    continue
+
+                sgpd_result = GeoSeries(geom).difference(GeoSeries(geom2))
+                gpd_result = 
gpd.GeoSeries(geom).difference(gpd.GeoSeries(geom2))
+                self.check_sgpd_equals_gpd(sgpd_result, gpd_result)
+
+                if len(geom) == len(geom2):
+                    sgpd_result = GeoSeries(geom).difference(
+                        GeoSeries(geom2), align=False
+                    )
+                    gpd_result = gpd.GeoSeries(geom).difference(
+                        gpd.GeoSeries(geom2), align=False
+                    )
+                    self.check_sgpd_equals_gpd(sgpd_result, gpd_result)
+
     def test_is_simple(self):
         data = [
             LineString([(0, 0), (0, 0)]),

Reply via email to