This is an automated email from the ASF dual-hosted git repository.

jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sedona.git


The following commit(s) were added to refs/heads/master by this push:
     new 69aff58410 [GH-2394] Implement `symmetric_difference` (#2395)
69aff58410 is described below

commit 69aff5841000efdbe152e0141d5fe75d55e5a7f4
Author: Yunchi Pang <[email protected]>
AuthorDate: Mon Oct 13 21:10:23 2025 -0700

    [GH-2394] Implement `symmetric_difference` (#2395)
    
    Co-authored-by: Peter Nguyen <[email protected]>
---
 python/sedona/spark/geopandas/base.py              | 104 +++++++++++++++++++++
 python/sedona/spark/geopandas/geoseries.py         |  12 +++
 python/tests/geopandas/test_geoseries.py           |  96 +++++++++++++++++++
 .../tests/geopandas/test_match_geopandas_series.py |  22 +++++
 4 files changed, 234 insertions(+)

diff --git a/python/sedona/spark/geopandas/base.py 
b/python/sedona/spark/geopandas/base.py
index 912b3c35f4..5c1d12c90f 100644
--- a/python/sedona/spark/geopandas/base.py
+++ b/python/sedona/spark/geopandas/base.py
@@ -2140,6 +2140,110 @@ class GeoFrame(metaclass=ABCMeta):
         """
         return _delegate_to_geometry_column("difference", self, other, align)
 
+    def symmetric_difference(self, other, align=None):
+        """Return a ``GeoSeries`` of the symmetric difference of points in
+        each aligned geometry with `other`.
+
+        For each geometry, the symmetric difference consists of points in the
+        geometry not in `other`, and points in `other` not in the geometry.
+
+        The operation works on a 1-to-1 row-wise manner.
+
+        Parameters
+        ----------
+        other : Geoseries or geometric object
+            The Geoseries (elementwise) or geometric object to find the
+            symmetric difference to.
+        align : bool | None (default None)
+            If True, automatically aligns GeoSeries based on their indices.
+            If False, the order of elements is preserved. None defaults to 
True.
+
+        Returns
+        -------
+        GeoSeries
+
+        Examples
+        --------
+        >>> from sedona.spark.geopandas import GeoSeries
+        >>> from shapely.geometry import Polygon, LineString, Point
+        >>> s = GeoSeries(
+        ...     [
+        ...         Polygon([(0, 0), (2, 2), (0, 2)]),
+        ...         Polygon([(0, 0), (2, 2), (0, 2)]),
+        ...         LineString([(0, 0), (2, 2)]),
+        ...         LineString([(2, 0), (0, 2)]),
+        ...         Point(0, 1),
+        ...     ],
+        ... )
+        >>> s2 = GeoSeries(
+        ...     [
+        ...         Polygon([(0, 0), (1, 1), (0, 1)]),
+        ...         LineString([(1, 0), (1, 3)]),
+        ...         LineString([(2, 0), (0, 2)]),
+        ...         Point(1, 1),
+        ...         Point(0, 1),
+        ...     ],
+        ...     index=range(1, 6),
+        ... )
+
+        >>> s
+        0    POLYGON ((0 0, 2 2, 0 2, 0 0))
+        1    POLYGON ((0 0, 2 2, 0 2, 0 0))
+        2             LINESTRING (0 0, 2 2)
+        3             LINESTRING (2 0, 0 2)
+        4                       POINT (0 1)
+        dtype: geometry
+
+        >>> s2
+        1    POLYGON ((0 0, 1 1, 0 1, 0 0))
+        2             LINESTRING (1 0, 1 3)
+        3             LINESTRING (2 0, 0 2)
+        4                       POINT (1 1)
+        5                       POINT (0 1)
+        dtype: geometry
+
+        We can do symmetric difference of each geometry and a single
+        shapely geometry:
+
+        >>> s.symmetric_difference(Polygon([(0, 0), (1, 1), (0, 1)]))
+        0                  POLYGON ((0 2, 2 2, 1 1, 0 1, 0 2))
+        1                  POLYGON ((0 2, 2 2, 1 1, 0 1, 0 2))
+        2    GEOMETRYCOLLECTION (POLYGON ((0 0, 0 1, 1 1, 0...
+        3    GEOMETRYCOLLECTION (POLYGON ((0 0, 0 1, 1 1, 0...
+        4                       POLYGON ((0 1, 1 1, 0 0, 0 1))
+        dtype: geometry
+
+        We can also check two GeoSeries against each other, row by row.
+        The GeoSeries above have different indices. We can either align both 
GeoSeries
+        based on index values and compare elements with the same index using
+        ``align=True`` or ignore index and compare elements based on their 
matching
+        order using ``align=False``:
+
+        >>> s.symmetric_difference(s2, align=True)
+        0                                                 None
+        1                  POLYGON ((0 2, 2 2, 1 1, 0 1, 0 2))
+        2    MULTILINESTRING ((0 0, 1 1), (1 1, 2 2), (1 0,...
+        3                                     LINESTRING EMPTY
+        4                            MULTIPOINT ((0 1), (1 1))
+        5                                                 None
+        dtype: geometry
+
+        >>> s.symmetric_difference(s2, align=False)
+        0                  POLYGON ((0 2, 2 2, 1 1, 0 1, 0 2))
+        1    GEOMETRYCOLLECTION (POLYGON ((0 0, 0 2, 1 2, 2...
+        2    MULTILINESTRING ((0 0, 1 1), (1 1, 2 2), (2 0,...
+        3                                LINESTRING (2 0, 0 2)
+        4                                          POINT EMPTY
+        dtype: geometry
+
+        See also
+        --------
+        GeoSeries.difference
+        GeoSeries.union
+        GeoSeries.intersection
+        """
+        return _delegate_to_geometry_column("symmetric_difference", self, 
other, align)
+
     def intersection_all(self):
         raise NotImplementedError("This method is not implemented yet.")
 
diff --git a/python/sedona/spark/geopandas/geoseries.py 
b/python/sedona/spark/geopandas/geoseries.py
index 361b46b197..7fde6aaea7 100644
--- a/python/sedona/spark/geopandas/geoseries.py
+++ b/python/sedona/spark/geopandas/geoseries.py
@@ -946,6 +946,18 @@ class GeoSeries(GeoFrame, pspd.Series):
             returns_geom=True,
         )
 
+    def symmetric_difference(self, other, align=None) -> "GeoSeries":
+        other_series, extended = self._make_series_of_val(other)
+        align = False if extended else align
+
+        spark_expr = stf.ST_SymDifference(F.col("L"), F.col("R"))
+        return self._row_wise_operation(
+            spark_expr,
+            other_series,
+            align=align,
+            returns_geom=True,
+        )
+
     @property
     def is_simple(self) -> pspd.Series:
         spark_expr = stf.ST_IsSimple(self.spark.column)
diff --git a/python/tests/geopandas/test_geoseries.py 
b/python/tests/geopandas/test_geoseries.py
index d9c5fd7db3..2e3f786077 100644
--- a/python/tests/geopandas/test_geoseries.py
+++ b/python/tests/geopandas/test_geoseries.py
@@ -848,6 +848,102 @@ e": "Feature", "properties": {}, "geometry": {"type": 
"Point", "coordinates": [3
         df_result = s.to_geoframe().difference(s2, align=False)
         self.check_sgpd_equals_gpd(df_result, expected)
 
+    def test_symmetric_difference(self):
+        s = GeoSeries(
+            [
+                Polygon([(0, 0), (2, 2), (0, 2)]),
+                Polygon([(0, 0), (2, 2), (0, 2)]),
+                LineString([(0, 0), (2, 2)]),
+                LineString([(2, 0), (0, 2)]),
+                Point(0, 1),
+            ],
+        )
+        s2 = GeoSeries(
+            [
+                Polygon([(0, 0), (1, 1), (0, 1)]),
+                LineString([(1, 0), (1, 3)]),
+                LineString([(2, 0), (0, 2)]),
+                Point(1, 1),
+                Point(0, 1),
+            ],
+            index=range(1, 6),
+        )
+
+        # Test with single geometry
+        result = s.symmetric_difference(Polygon([(0, 0), (1, 1), (0, 1)]))
+        expected = gpd.GeoSeries(
+            [
+                Polygon([(0, 2), (2, 2), (1, 1), (0, 1), (0, 2)]),
+                Polygon([(0, 2), (2, 2), (1, 1), (0, 1), (0, 2)]),
+                GeometryCollection(
+                    [
+                        Polygon([(0, 0), (0, 1), (1, 1), (0, 0)]),
+                        LineString([(1, 1), (2, 2)]),
+                    ]
+                ),
+                GeometryCollection(
+                    [
+                        Polygon([(0, 0), (0, 1), (1, 1), (0, 0)]),
+                        LineString([(2, 0), (1, 1)]),
+                        LineString([(1, 1), (0, 2)]),
+                    ]
+                ),
+                Polygon([(0, 1), (1, 1), (0, 0), (0, 1)]),
+            ]
+        )
+        self.check_sgpd_equals_gpd(result, expected)
+
+        # Test with align=True
+        result = s.symmetric_difference(s2, align=True)
+        expected = gpd.GeoSeries(
+            [
+                None,
+                Polygon([(0, 2), (2, 2), (1, 1), (0, 1), (0, 2)]),
+                MultiLineString(
+                    [
+                        LineString([(0, 0), (1, 1)]),
+                        LineString([(1, 1), (2, 2)]),
+                        LineString([(1, 0), (1, 1)]),
+                        LineString([(1, 1), (1, 3)]),
+                    ]
+                ),
+                LineString(),
+                MultiPoint([Point(0, 1), Point(1, 1)]),
+                None,
+            ]
+        )
+        self.check_sgpd_equals_gpd(result, expected)
+
+        # Test with align=False
+        result = s.symmetric_difference(s2, align=False)
+        expected = gpd.GeoSeries(
+            [
+                Polygon([(0, 2), (2, 2), (1, 1), (0, 1), (0, 2)]),
+                GeometryCollection(
+                    [
+                        Polygon([(0, 0), (0, 2), (1, 2), (2, 2), (1, 1), (0, 
0)]),
+                        LineString([(1, 0), (1, 1)]),
+                        LineString([(1, 1), (1, 3)]),
+                    ]
+                ),
+                MultiLineString(
+                    [
+                        LineString([(0, 0), (1, 1)]),
+                        LineString([(1, 1), (2, 2)]),
+                        LineString([(2, 0), (1, 1)]),
+                        LineString([(1, 1), (0, 2)]),
+                    ]
+                ),
+                LineString([(2, 0), (0, 2)]),
+                Point(),
+            ]
+        )
+        self.check_sgpd_equals_gpd(result, expected)
+
+        # Check that GeoDataFrame works too
+        df_result = s.to_geoframe().symmetric_difference(s2, align=False)
+        self.check_sgpd_equals_gpd(df_result, expected)
+
     def test_is_simple(self):
         s = sgpd.GeoSeries(
             [
diff --git a/python/tests/geopandas/test_match_geopandas_series.py 
b/python/tests/geopandas/test_match_geopandas_series.py
index ebbc699ca3..3ec311b0d9 100644
--- a/python/tests/geopandas/test_match_geopandas_series.py
+++ b/python/tests/geopandas/test_match_geopandas_series.py
@@ -564,6 +564,28 @@ class TestMatchGeopandasSeries(TestGeopandasBase):
                 )
                 self.check_sgpd_equals_gpd(sgpd_result, gpd_result)
 
+    def test_symmetric_difference(self):
+        for geom, geom2 in self.pairs:
+            # Operation doesn't work on invalid geometries
+            if (
+                not gpd.GeoSeries(geom).is_valid.all()
+                or not gpd.GeoSeries(geom2).is_valid.all()
+            ):
+                continue
+
+            sgpd_result = 
GeoSeries(geom).symmetric_difference(GeoSeries(geom2))
+            gpd_result = 
gpd.GeoSeries(geom).symmetric_difference(gpd.GeoSeries(geom2))
+            self.check_sgpd_equals_gpd(sgpd_result, gpd_result)
+
+            if len(geom) == len(geom2):
+                sgpd_result = GeoSeries(geom).symmetric_difference(
+                    GeoSeries(geom2), align=False
+                )
+                gpd_result = gpd.GeoSeries(geom).symmetric_difference(
+                    gpd.GeoSeries(geom2), align=False
+                )
+                self.check_sgpd_equals_gpd(sgpd_result, gpd_result)
+
     def test_is_simple(self):
         # 'is_simple' is meaningful only for `LineStrings` and `LinearRings`
         data = [

Reply via email to