This is an automated email from the ASF dual-hosted git repository.

jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sedona.git


The following commit(s) were added to refs/heads/master by this push:
     new 3cf4f4385a [GH-2043] Geopandas.GeoSeries: Implement is_valid_reason, 
make_valid (#2044)
3cf4f4385a is described below

commit 3cf4f4385a1f1b92f139e493697beb002553118c
Author: Peter Nguyen <[email protected]>
AuthorDate: Sun Jul 6 10:06:45 2025 -0700

    [GH-2043] Geopandas.GeoSeries: Implement is_valid_reason, make_valid (#2044)
    
    * Implement is_valid_reason
    
    * Implement make_valid
    
    * Skip match make_valid tests for shapely < 2.1.0
    
    * Skip match is_valid_reason for gpd version < 1.0.0
    
    * Copy over modified check_sgpd_equals_gpd
    
    * Use sedona instead of geopandas in doc strings
---
 python/sedona/geopandas/geoseries.py               | 118 +++++++++++++++++++--
 python/tests/geopandas/test_geoseries.py           |  74 ++++++++++++-
 .../tests/geopandas/test_match_geopandas_series.py |  50 ++++++++-
 3 files changed, 232 insertions(+), 10 deletions(-)

diff --git a/python/sedona/geopandas/geoseries.py 
b/python/sedona/geopandas/geoseries.py
index f7ad400806..146edaca8e 100644
--- a/python/sedona/geopandas/geoseries.py
+++ b/python/sedona/geopandas/geoseries.py
@@ -692,9 +692,50 @@ class GeoSeries(GeoFrame, pspd.Series):
             .astype("bool")
         )
 
-    def is_valid_reason(self):
-        # Implementation of the abstract method
-        raise NotImplementedError("This method is not implemented yet.")
+    def is_valid_reason(self) -> pspd.Series:
+        """Returns a ``Series`` of strings with the reason for invalidity of
+        each geometry.
+
+        Examples
+        --------
+
+        An example with one invalid polygon (a bowtie geometry crossing itself)
+        and one missing geometry:
+
+        >>> from sedona.geopandas import GeoSeries
+        >>> from shapely.geometry import Polygon
+        >>> s = GeoSeries(
+        ...     [
+        ...         Polygon([(0, 0), (1, 1), (0, 1)]),
+        ...         Polygon([(0,0), (1, 1), (1, 0), (0, 1)]),  # bowtie 
geometry
+        ...         Polygon([(0, 0), (2, 2), (2, 0)]),
+        ...         Polygon([(0, 0), (2, 0), (1, 1), (2, 2), (0, 2), (1, 1), 
(0, 0)]),
+        ...         None
+        ...     ]
+        ... )
+        >>> s
+        0         POLYGON ((0 0, 1 1, 0 1, 0 0))
+        1    POLYGON ((0 0, 1 1, 1 0, 0 1, 0 0))
+        2         POLYGON ((0 0, 2 2, 2 0, 0 0))
+        3                                   None
+        dtype: geometry
+
+        >>> s.is_valid_reason()
+        0    Valid Geometry
+        1    Self-intersection at or near point (0.5, 0.5, NaN)
+        2    Valid Geometry
+        3    Ring Self-intersection at or near point (1.0, 1.0)
+        4    None
+        dtype: object
+
+        See also
+        --------
+        GeoSeries.is_valid : detect invalid geometries
+        GeoSeries.make_valid : fix invalid geometries
+        """
+        return self._process_geometry_column(
+            "ST_IsValidReason", rename="is_valid_reason"
+        ).to_spark_pandas()
 
     @property
     def is_empty(self) -> pspd.Series:
@@ -915,9 +956,74 @@ class GeoSeries(GeoFrame, pspd.Series):
         # Implementation of the abstract method
         raise NotImplementedError("This method is not implemented yet.")
 
-    def make_valid(self):
-        # Implementation of the abstract method
-        raise NotImplementedError("This method is not implemented yet.")
+    def make_valid(self, *, method="linework", keep_collapsed=True) -> 
"GeoSeries":
+        """Repairs invalid geometries.
+
+        Returns a ``GeoSeries`` with valid geometries.
+
+        If the input geometry is already valid, then it will be preserved.
+        In many cases, in order to create a valid geometry, the input
+        geometry must be split into multiple parts or multiple geometries.
+        If the geometry must be split into multiple parts of the same type
+        to be made valid, then a multi-part geometry will be returned
+        (e.g. a MultiPolygon).
+        If the geometry must be split into multiple parts of different types
+        to be made valid, then a GeometryCollection will be returned.
+
+        In Sedona, only the 'structure' method is available:
+
+        * the 'structure' algorithm tries to reason from the structure of the
+          input to find the 'correct' repair: exterior rings bound area,
+          interior holes exclude area. It first makes all rings valid, then
+          shells are merged and holes are subtracted from the shells to
+          generate valid result. It assumes that holes and shells are correctly
+          categorized in the input geometry.
+
+        Parameters
+        ----------
+        method : {'linework', 'structure'}, default 'linework'
+            Algorithm to use when repairing geometry. Sedona Geopandas only 
supports the 'structure' method.
+            The default method is "linework" to match compatibility with 
Geopandas, but it must be explicitly set to
+            'structure' to use the Sedona implementation.
+
+        keep_collapsed : bool, default True
+            For the 'structure' method, True will keep components that have
+            collapsed into a lower dimensionality. For example, a ring
+            collapsing to a line, or a line collapsing to a point.
+
+        Examples
+        --------
+
+        >>> from sedona.geopandas import GeoSeries
+        >>> from shapely.geometry import MultiPolygon, Polygon, LineString, 
Point
+        >>> s = GeoSeries(
+        ...     [
+        ...         Polygon([(0, 0), (0, 2), (1, 1), (2, 2), (2, 0), (1, 1), 
(0, 0)]),
+        ...         Polygon([(0, 2), (0, 1), (2, 0), (0, 0), (0, 2)]),
+        ...         LineString([(0, 0), (1, 1), (1, 0)]),
+        ...     ],
+        ... )
+        >>> s
+        0    POLYGON ((0 0, 0 2, 1 1, 2 2, 2 0, 1 1, 0 0))
+        1              POLYGON ((0 2, 0 1, 2 0, 0 0, 0 2))
+        2                       LINESTRING (0 0, 1 1, 1 0)
+        dtype: geometry
+
+        >>> s.make_valid()
+        0    MULTIPOLYGON (((1 1, 0 0, 0 2, 1 1)), ((2 0, 1...
+        1                       POLYGON ((0 1, 2 0, 0 0, 0 1))
+        2                           LINESTRING (0 0, 1 1, 1 0)
+        dtype: geometry
+        """
+
+        if method != "structure":
+            raise ValueError(
+                "Sedona only supports the 'structure' method for make_valid"
+            )
+
+        col = self.get_first_geometry_column()
+        select = f"ST_MakeValid(`{col}`, {keep_collapsed})"
+        return self._query_geometry_column(select, col, rename="make_valid")
 
     def reverse(self):
         # Implementation of the abstract method
diff --git a/python/tests/geopandas/test_geoseries.py 
b/python/tests/geopandas/test_geoseries.py
index 6aabd495dc..9d73082dfd 100644
--- a/python/tests/geopandas/test_geoseries.py
+++ b/python/tests/geopandas/test_geoseries.py
@@ -297,7 +297,28 @@ class TestGeoSeries(TestBase):
         assert_series_equal(result.to_pandas(), expected)
 
     def test_is_valid_reason(self):
-        pass
+        s = sgpd.GeoSeries(
+            [
+                Polygon([(0, 0), (1, 1), (0, 1)]),
+                Polygon([(0, 0), (1, 1), (1, 0), (0, 1)]),  # bowtie geometry
+                Polygon([(0, 0), (2, 2), (2, 0)]),
+                Polygon(
+                    [(0, 0), (2, 0), (1, 1), (2, 2), (0, 2), (1, 1), (0, 0)]
+                ),  # ring intersection
+                None,
+            ]
+        )
+        result = s.is_valid_reason().to_pandas()
+        expected = pd.Series(
+            [
+                "Valid Geometry",
+                "Self-intersection at or near point (0.5, 0.5, NaN)",
+                "Valid Geometry",
+                "Ring Self-intersection at or near point (1.0, 1.0)",
+                None,
+            ]
+        )
+        assert_series_equal(result, expected)
 
     def test_is_empty(self):
         geoseries = sgpd.GeoSeries(
@@ -416,7 +437,56 @@ class TestGeoSeries(TestBase):
         pass
 
     def test_make_valid(self):
-        pass
+        s = sgpd.GeoSeries(
+            [
+                Polygon([(0, 0), (0, 2), (1, 1), (2, 2), (2, 0), (1, 1), (0, 
0)]),
+                Polygon([(0, 2), (0, 1), (2, 0), (0, 0), (0, 2)]),
+                LineString([(0, 0), (1, 1), (1, 0)]),
+            ],
+        )
+        result = s.make_valid(method="structure")
+
+        expected = gpd.GeoSeries(
+            [
+                MultiPolygon(
+                    [
+                        Polygon([(1, 1), (0, 0), (0, 2), (1, 1)]),
+                        Polygon([(2, 0), (1, 1), (2, 2), (2, 0)]),
+                    ]
+                ),
+                Polygon([(0, 1), (2, 0), (0, 0), (0, 1)]),
+                LineString([(0, 0), (1, 1), (1, 0)]),
+            ]
+        )
+
+        self.check_sgpd_equals_gpd(result, expected)
+
+        result = s.make_valid(method="structure", keep_collapsed=False)
+        expected = gpd.GeoSeries(
+            [
+                MultiPolygon(
+                    [
+                        Polygon([(1, 1), (0, 0), (0, 2), (1, 1)]),
+                        Polygon([(2, 0), (1, 1), (2, 2), (2, 0)]),
+                    ]
+                ),
+                Polygon([(0, 1), (2, 0), (0, 0), (0, 1)]),
+                LineString([(0, 0), (1, 1), (1, 0)]),
+            ]
+        )
+        self.check_sgpd_equals_gpd(result, expected)
+
+        result = GeoSeries(
+            [Polygon([(0, 0), (1, 1), (1, 2), (1, 1), (0, 0)])]
+        ).make_valid(method="structure", keep_collapsed=True)
+        expected = gpd.GeoSeries([LineString([(0, 0), (1, 1), (1, 2), (1, 1), 
(0, 0)])])
+        self.check_sgpd_equals_gpd(result, expected)
+
+        result = GeoSeries(
+            [Polygon([(0, 0), (1, 1), (1, 2), (1, 1), (0, 0)])]
+        ).make_valid(method="structure", keep_collapsed=False)
+        expected = gpd.GeoSeries([Polygon()])
+        self.check_sgpd_equals_gpd(result, expected)
 
     def test_reverse(self):
         pass
diff --git a/python/tests/geopandas/test_match_geopandas_series.py 
b/python/tests/geopandas/test_match_geopandas_series.py
index 289da18401..3c3695aed7 100644
--- a/python/tests/geopandas/test_match_geopandas_series.py
+++ b/python/tests/geopandas/test_match_geopandas_series.py
@@ -354,7 +354,30 @@ class TestMatchGeopandasSeries(TestBase):
             self.check_pd_series_equal(sgpd_result, gpd_result)
 
     def test_is_valid_reason(self):
-        pass
+        # is_valid_reason was added in geopandas 1.0.0
+        if gpd.__version__ < "1.0.0":
+            return
+        data = [
+            Polygon([(0, 0), (1, 1), (0, 1)]),
+            Polygon([(0, 0), (1, 1), (1, 0), (0, 1)]),  # bowtie geometry
+            Polygon([(0, 0), (2, 2), (2, 0)]),
+            Polygon(
+                [(0, 0), (2, 0), (1, 1), (2, 2), (0, 2), (1, 1), (0, 0)]
+            ),  # ring intersection
+            None,
+        ]
+        sgpd_result = GeoSeries(data).is_valid_reason()
+        assert isinstance(sgpd_result, ps.Series)
+        gpd_result = gpd.GeoSeries(data).is_valid_reason()
+        for a, e in zip(sgpd_result.to_pandas(), gpd_result):
+            if a is None and e is None:
+                continue
+            if a == "Valid Geometry":
+                assert e == "Valid Geometry"
+            elif "Self-intersection" in a:
+                assert "Self-intersection" in e
+            else:
+                raise ValueError(f"Unexpected result: {a} not equivalent to 
{e}")
 
     def test_is_empty(self):
         for _, geom in self.geoms:
@@ -463,7 +486,30 @@ class TestMatchGeopandasSeries(TestBase):
         pass
 
     def test_make_valid(self):
-        pass
+        import shapely
+
+        # 'structure' method requires shapely >= 2.1.0
+        if shapely.__version__ < "2.1.0":
+            return
+        for _, geom in self.geoms:
+            sgpd_result = GeoSeries(geom).make_valid(method="structure")
+            gpd_result = gpd.GeoSeries(geom).make_valid(method="structure")
+            self.check_sgpd_equals_gpd(sgpd_result, gpd_result)
+
+        for _, geom in self.geoms:
+            sgpd_result = GeoSeries(geom).make_valid(
+                method="structure", keep_collapsed=False
+            )
+            gpd_result = gpd.GeoSeries(geom).make_valid(
+                method="structure", keep_collapsed=False
+            )
+            self.check_sgpd_equals_gpd(sgpd_result, gpd_result)
+
+        # Ensure default method="linework" fails
+        with pytest.raises(ValueError):
+            GeoSeries([Point(0, 0)]).make_valid()
+        with pytest.raises(ValueError):
+            GeoSeries([Point(0, 0)]).make_valid(method="linework")
 
     def test_reverse(self):
         pass

Reply via email to