This is an automated email from the ASF dual-hosted git repository.

jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sedona.git


The following commit(s) were added to refs/heads/master by this push:
     new 073488d3e0 [GH-2398] Implement geopandas `union` (#2399)
073488d3e0 is described below

commit 073488d3e09fe5abee6a831a245dee4acfb9d0dd
Author: Yunchi Pang <[email protected]>
AuthorDate: Tue Oct 14 22:00:10 2025 -0700

    [GH-2398] Implement geopandas `union` (#2399)
---
 python/sedona/spark/geopandas/base.py              | 99 ++++++++++++++++++++++
 python/sedona/spark/geopandas/geoseries.py         | 12 +++
 python/tests/geopandas/test_geoseries.py           | 95 +++++++++++++++++++++
 .../tests/geopandas/test_match_geopandas_series.py | 20 +++++
 4 files changed, 226 insertions(+)

diff --git a/python/sedona/spark/geopandas/base.py 
b/python/sedona/spark/geopandas/base.py
index 5c1d12c90f..1f5d67f5a5 100644
--- a/python/sedona/spark/geopandas/base.py
+++ b/python/sedona/spark/geopandas/base.py
@@ -2244,6 +2244,105 @@ class GeoFrame(metaclass=ABCMeta):
         """
         return _delegate_to_geometry_column("symmetric_difference", self, 
other, align)
 
+    def union(self, other, align=None):
+        """Return a ``GeoSeries`` of the union of points in each aligned 
geometry
+        with `other`.
+
+        The operation works on a 1-to-1 row-wise manner.
+
+        Parameters
+        ----------
+        other : Geoseries or geometric object
+            The Geoseries (elementwise) or geometric object to find the
+            union with.
+        align : bool | None (default None)
+            If True, automatically aligns GeoSeries based on their indices.
+            If False, the order of elements is preserved. None defaults to 
True.
+
+        Returns
+        -------
+        GeoSeries
+
+        Examples
+        --------
+        >>> from sedona.spark.geopandas import GeoSeries
+        >>> from shapely.geometry import Polygon, LineString, Point
+        >>> s = GeoSeries(
+        ...     [
+        ...         Polygon([(0, 0), (2, 2), (0, 2)]),
+        ...         Polygon([(0, 0), (2, 2), (0, 2)]),
+        ...         LineString([(0, 0), (2, 2)]),
+        ...         LineString([(2, 0), (0, 2)]),
+        ...         Point(0, 1),
+        ...     ],
+        ... )
+        >>> s2 = GeoSeries(
+        ...     [
+        ...         Polygon([(0, 0), (1, 1), (0, 1)]),
+        ...         LineString([(1, 0), (1, 3)]),
+        ...         LineString([(2, 0), (0, 2)]),
+        ...         Point(1, 1),
+        ...         Point(0, 1),
+        ...     ],
+        ...     index=range(1, 6),
+        ... )
+
+        >>> s
+        0    POLYGON ((0 0, 2 2, 0 2, 0 0))
+        1    POLYGON ((0 0, 2 2, 0 2, 0 0))
+        2             LINESTRING (0 0, 2 2)
+        3             LINESTRING (2 0, 0 2)
+        4                       POINT (0 1)
+        dtype: geometry
+
+        >>> s2
+        1    POLYGON ((0 0, 1 1, 0 1, 0 0))
+        2             LINESTRING (1 0, 1 3)
+        3             LINESTRING (2 0, 0 2)
+        4                       POINT (1 1)
+        5                       POINT (0 1)
+        dtype: geometry
+
+        We can do union of each geometry and a single shapely geometry:
+
+        >>> s.union(Polygon([(0, 0), (1, 1), (0, 1)]))
+        0             POLYGON ((0 0, 0 1, 0 2, 2 2, 1 1, 0 0))
+        1             POLYGON ((0 0, 0 1, 0 2, 2 2, 1 1, 0 0))
+        2    GEOMETRYCOLLECTION (POLYGON ((0 0, 0 1, 1 1, 0...
+        3    GEOMETRYCOLLECTION (POLYGON ((0 0, 0 1, 1 1, 0...
+        4                       POLYGON ((0 1, 1 1, 0 0, 0 1))
+        dtype: geometry
+
+        We can also check two GeoSeries against each other, row by row.
+        The GeoSeries above have different indices. We can either align both 
GeoSeries
+        based on index values and compare elements with the same index using
+        ``align=True`` or ignore index and compare elements based on their 
matching
+        order using ``align=False``:
+
+        >>> s.union(s2, align=True)
+        0             POLYGON ((0 0, 0 1, 0 2, 2 2, 1 1, 0 0))
+        1             POLYGON ((0 0, 0 1, 0 2, 2 2, 1 1, 0 0))
+        2    MULTILINESTRING ((0 0, 1 1), (1 1, 2 2), (1 0,...
+        3                                LINESTRING (2 0, 0 2)
+        4                            MULTIPOINT ((0 1), (1 1))
+        dtype: geometry
+
+        >>> s.union(s2, align=False)
+        0             POLYGON ((0 0, 0 1, 0 2, 2 2, 1 1, 0 0))
+        1    GEOMETRYCOLLECTION (POLYGON ((0 0, 0 2, 1 2, 2...
+        2    MULTILINESTRING ((0 0, 1 1), (1 1, 2 2), (2 0,...
+        3                                LINESTRING (2 0, 0 2)
+        4                                          POINT (0 1)
+        dtype: geometry
+
+        See Also
+        --------
+        GeoSeries.symmetric_difference
+        GeoSeries.difference
+        GeoSeries.intersection
+        """
+        return _delegate_to_geometry_column("union", self, other, align)
+
     def intersection_all(self):
         raise NotImplementedError("This method is not implemented yet.")
 
diff --git a/python/sedona/spark/geopandas/geoseries.py 
b/python/sedona/spark/geopandas/geoseries.py
index 7fde6aaea7..21ffd05654 100644
--- a/python/sedona/spark/geopandas/geoseries.py
+++ b/python/sedona/spark/geopandas/geoseries.py
@@ -958,6 +958,18 @@ class GeoSeries(GeoFrame, pspd.Series):
             returns_geom=True,
         )
 
+    def union(self, other, align=None) -> "GeoSeries":
+        other_series, extended = self._make_series_of_val(other)
+        align = False if extended else align
+
+        spark_expr = stf.ST_Union(F.col("L"), F.col("R"))
+        return self._row_wise_operation(
+            spark_expr,
+            other_series,
+            align=align,
+            returns_geom=True,
+        )
+
     @property
     def is_simple(self) -> pspd.Series:
         spark_expr = stf.ST_IsSimple(self.spark.column)
diff --git a/python/tests/geopandas/test_geoseries.py 
b/python/tests/geopandas/test_geoseries.py
index 2e3f786077..08e5a1779c 100644
--- a/python/tests/geopandas/test_geoseries.py
+++ b/python/tests/geopandas/test_geoseries.py
@@ -944,6 +944,101 @@ e": "Feature", "properties": {}, "geometry": {"type": 
"Point", "coordinates": [3
         df_result = s.to_geoframe().symmetric_difference(s2, align=False)
         self.check_sgpd_equals_gpd(df_result, expected)
 
+    def test_union(self):
+        s = GeoSeries(
+            [
+                Polygon([(0, 0), (2, 2), (0, 2)]),
+                Polygon([(0, 0), (2, 2), (0, 2)]),
+                LineString([(0, 0), (2, 2)]),
+                LineString([(2, 0), (0, 2)]),
+                Point(0, 1),
+            ],
+        )
+        s2 = GeoSeries(
+            [
+                Polygon([(0, 0), (1, 1), (0, 1)]),
+                LineString([(1, 0), (1, 3)]),
+                LineString([(2, 0), (0, 2)]),
+                Point(1, 1),
+                Point(0, 1),
+            ],
+            index=range(1, 6),
+        )
+
+        # Test with single geometry
+        result = s.union(Polygon([(0, 0), (1, 1), (0, 1)]))
+        expected = gpd.GeoSeries(
+            [
+                Polygon([(0, 0), (0, 1), (0, 2), (2, 2), (1, 1), (0, 0)]),
+                Polygon([(0, 0), (0, 1), (0, 2), (2, 2), (1, 1), (0, 0)]),
+                GeometryCollection(
+                    [
+                        Polygon([(0, 0), (0, 1), (1, 1), (0, 0)]),
+                        LineString([(0, 0), (2, 2)]),
+                    ]
+                ),
+                GeometryCollection(
+                    [
+                        Polygon([(0, 0), (0, 1), (1, 1), (0, 0)]),
+                        LineString([(2, 0), (0, 2)]),
+                    ]
+                ),
+                Polygon([(0, 1), (1, 1), (0, 0), (0, 1)]),
+            ]
+        )
+        self.check_sgpd_equals_gpd(result, expected)
+
+        # Test with align=True
+        result = s.union(s2, align=True)
+        expected = gpd.GeoSeries(
+            [
+                None,
+                Polygon([(0, 0), (0, 1), (0, 2), (2, 2), (1, 1), (0, 0)]),
+                MultiLineString(
+                    [
+                        LineString([(0, 0), (1, 1)]),
+                        LineString([(1, 1), (2, 2)]),
+                        LineString([(1, 0), (1, 1)]),
+                        LineString([(1, 1), (1, 3)]),
+                    ]
+                ),
+                LineString([(2, 0), (0, 2)]),
+                MultiPoint([Point(0, 1), Point(1, 1)]),
+                None,
+            ]
+        )
+        self.check_sgpd_equals_gpd(result, expected)
+
+        # Test with align=False
+        result = s.union(s2, align=False)
+        expected = gpd.GeoSeries(
+            [
+                Polygon([(0, 0), (0, 1), (0, 2), (2, 2), (1, 1), (0, 0)]),
+                GeometryCollection(
+                    [
+                        Polygon([(0, 0), (0, 2), (1, 2), (2, 2), (1, 1), (0, 
0)]),
+                        LineString([(1, 0), (1, 1)]),
+                        LineString([(1, 1), (1, 3)]),
+                    ]
+                ),
+                MultiLineString(
+                    [
+                        LineString([(0, 0), (1, 1)]),
+                        LineString([(1, 1), (2, 2)]),
+                        LineString([(2, 0), (1, 1)]),
+                        LineString([(1, 1), (0, 2)]),
+                    ]
+                ),
+                LineString([(2, 0), (0, 2)]),
+                Point(0, 1),
+            ]
+        )
+        self.check_sgpd_equals_gpd(result, expected)
+
+        # Check that GeoDataFrame works too
+        df_result = s.to_geoframe().union(s2, align=False)
+        self.check_sgpd_equals_gpd(df_result, expected)
+
     def test_is_simple(self):
         s = sgpd.GeoSeries(
             [
diff --git a/python/tests/geopandas/test_match_geopandas_series.py 
b/python/tests/geopandas/test_match_geopandas_series.py
index 3ec311b0d9..b1692c444f 100644
--- a/python/tests/geopandas/test_match_geopandas_series.py
+++ b/python/tests/geopandas/test_match_geopandas_series.py
@@ -586,6 +586,26 @@ class TestMatchGeopandasSeries(TestGeopandasBase):
                 )
                 self.check_sgpd_equals_gpd(sgpd_result, gpd_result)
 
+    def test_union(self):
+        for geom, geom2 in self.pairs:
+            # Operation doesn't work on invalid geometries
+            if (
+                not gpd.GeoSeries(geom).is_valid.all()
+                or not gpd.GeoSeries(geom2).is_valid.all()
+            ):
+                continue
+
+            sgpd_result = GeoSeries(geom).union(GeoSeries(geom2))
+            gpd_result = gpd.GeoSeries(geom).union(gpd.GeoSeries(geom2))
+            self.check_sgpd_equals_gpd(sgpd_result, gpd_result)
+
+            if len(geom) == len(geom2):
+                sgpd_result = GeoSeries(geom).union(GeoSeries(geom2), 
align=False)
+                gpd_result = gpd.GeoSeries(geom).union(
+                    gpd.GeoSeries(geom2), align=False
+                )
+                self.check_sgpd_equals_gpd(sgpd_result, gpd_result)
+
     def test_is_simple(self):
         # 'is_simple' is meaningful only for `LineStrings` and `LinearRings`
         data = [

Reply via email to