This is an automated email from the ASF dual-hosted git repository.
jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sedona.git
The following commit(s) were added to refs/heads/master by this push:
new 3cf4f4385a [GH-2043] Geopandas.GeoSeries: Implement is_valid_reason,
make_valid (#2044)
3cf4f4385a is described below
commit 3cf4f4385a1f1b92f139e493697beb002553118c
Author: Peter Nguyen <[email protected]>
AuthorDate: Sun Jul 6 10:06:45 2025 -0700
[GH-2043] Geopandas.GeoSeries: Implement is_valid_reason, make_valid (#2044)
* Implement is_valid_reason
* Implement make_valid
* Skip match make_valid tests for shapely < 2.1.0
* Skip match is_valid_reason for gpd version < 1.0.0
* Copy over modified check_sgpd_equals_gpd
* Use sedona instead of geopandas in doc strings
---
python/sedona/geopandas/geoseries.py | 118 +++++++++++++++++++--
python/tests/geopandas/test_geoseries.py | 74 ++++++++++++-
.../tests/geopandas/test_match_geopandas_series.py | 50 ++++++++-
3 files changed, 232 insertions(+), 10 deletions(-)
diff --git a/python/sedona/geopandas/geoseries.py
b/python/sedona/geopandas/geoseries.py
index f7ad400806..146edaca8e 100644
--- a/python/sedona/geopandas/geoseries.py
+++ b/python/sedona/geopandas/geoseries.py
@@ -692,9 +692,50 @@ class GeoSeries(GeoFrame, pspd.Series):
.astype("bool")
)
- def is_valid_reason(self):
- # Implementation of the abstract method
- raise NotImplementedError("This method is not implemented yet.")
+ def is_valid_reason(self) -> pspd.Series:
+ """Returns a ``Series`` of strings with the reason for invalidity of
+ each geometry.
+
+ Examples
+ --------
+
+ An example with one invalid polygon (a bowtie geometry crossing itself)
+ and one missing geometry:
+
+ >>> from sedona.geopandas import GeoSeries
+ >>> from shapely.geometry import Polygon
+ >>> s = GeoSeries(
+ ... [
+ ... Polygon([(0, 0), (1, 1), (0, 1)]),
+ ... Polygon([(0,0), (1, 1), (1, 0), (0, 1)]), # bowtie
geometry
+ ... Polygon([(0, 0), (2, 2), (2, 0)]),
+ ... Polygon([(0, 0), (2, 0), (1, 1), (2, 2), (0, 2), (1, 1),
(0, 0)]),
+ ... None
+ ... ]
+ ... )
+ >>> s
+ 0 POLYGON ((0 0, 1 1, 0 1, 0 0))
+ 1 POLYGON ((0 0, 1 1, 1 0, 0 1, 0 0))
+ 2 POLYGON ((0 0, 2 2, 2 0, 0 0))
+ 3 None
+ dtype: geometry
+
+ >>> s.is_valid_reason()
+ 0 Valid Geometry
+ 1 Self-intersection at or near point (0.5, 0.5, NaN)
+ 2 Valid Geometry
+ 3 Ring Self-intersection at or near point (1.0, 1.0)
+ 4 None
+ dtype: object
+
+ See also
+ --------
+ GeoSeries.is_valid : detect invalid geometries
+ GeoSeries.make_valid : fix invalid geometries
+ """
+ return self._process_geometry_column(
+ "ST_IsValidReason", rename="is_valid_reason"
+ ).to_spark_pandas()
@property
def is_empty(self) -> pspd.Series:
@@ -915,9 +956,74 @@ class GeoSeries(GeoFrame, pspd.Series):
# Implementation of the abstract method
raise NotImplementedError("This method is not implemented yet.")
- def make_valid(self):
- # Implementation of the abstract method
- raise NotImplementedError("This method is not implemented yet.")
+ def make_valid(self, *, method="linework", keep_collapsed=True) ->
"GeoSeries":
+ """Repairs invalid geometries.
+
+ Returns a ``GeoSeries`` with valid geometries.
+
+ If the input geometry is already valid, then it will be preserved.
+ In many cases, in order to create a valid geometry, the input
+ geometry must be split into multiple parts or multiple geometries.
+ If the geometry must be split into multiple parts of the same type
+ to be made valid, then a multi-part geometry will be returned
+ (e.g. a MultiPolygon).
+ If the geometry must be split into multiple parts of different types
+ to be made valid, then a GeometryCollection will be returned.
+
+ In Sedona, only the 'structure' method is available:
+
+ * the 'structure' algorithm tries to reason from the structure of the
+ input to find the 'correct' repair: exterior rings bound area,
+ interior holes exclude area. It first makes all rings valid, then
+ shells are merged and holes are subtracted from the shells to
+ generate valid result. It assumes that holes and shells are correctly
+ categorized in the input geometry.
+
+ Parameters
+ ----------
+ method : {'linework', 'structure'}, default 'linework'
+ Algorithm to use when repairing geometry. Sedona Geopandas only
supports the 'structure' method.
+ The default method is "linework" to match compatibility with
Geopandas, but it must be explicitly set to
+ 'structure' to use the Sedona implementation.
+
+ keep_collapsed : bool, default True
+ For the 'structure' method, True will keep components that have
+ collapsed into a lower dimensionality. For example, a ring
+ collapsing to a line, or a line collapsing to a point.
+
+ Examples
+ --------
+
+ >>> from sedona.geopandas import GeoSeries
+ >>> from shapely.geometry import MultiPolygon, Polygon, LineString,
Point
+ >>> s = GeoSeries(
+ ... [
+ ... Polygon([(0, 0), (0, 2), (1, 1), (2, 2), (2, 0), (1, 1),
(0, 0)]),
+ ... Polygon([(0, 2), (0, 1), (2, 0), (0, 0), (0, 2)]),
+ ... LineString([(0, 0), (1, 1), (1, 0)]),
+ ... ],
+ ... )
+ >>> s
+ 0 POLYGON ((0 0, 0 2, 1 1, 2 2, 2 0, 1 1, 0 0))
+ 1 POLYGON ((0 2, 0 1, 2 0, 0 0, 0 2))
+ 2 LINESTRING (0 0, 1 1, 1 0)
+ dtype: geometry
+
+ >>> s.make_valid()
+ 0 MULTIPOLYGON (((1 1, 0 0, 0 2, 1 1)), ((2 0, 1...
+ 1 POLYGON ((0 1, 2 0, 0 0, 0 1))
+ 2 LINESTRING (0 0, 1 1, 1 0)
+ dtype: geometry
+ """
+
+ if method != "structure":
+ raise ValueError(
+ "Sedona only supports the 'structure' method for make_valid"
+ )
+
+ col = self.get_first_geometry_column()
+ select = f"ST_MakeValid(`{col}`, {keep_collapsed})"
+ return self._query_geometry_column(select, col, rename="make_valid")
def reverse(self):
# Implementation of the abstract method
diff --git a/python/tests/geopandas/test_geoseries.py
b/python/tests/geopandas/test_geoseries.py
index 6aabd495dc..9d73082dfd 100644
--- a/python/tests/geopandas/test_geoseries.py
+++ b/python/tests/geopandas/test_geoseries.py
@@ -297,7 +297,28 @@ class TestGeoSeries(TestBase):
assert_series_equal(result.to_pandas(), expected)
def test_is_valid_reason(self):
- pass
+ s = sgpd.GeoSeries(
+ [
+ Polygon([(0, 0), (1, 1), (0, 1)]),
+ Polygon([(0, 0), (1, 1), (1, 0), (0, 1)]), # bowtie geometry
+ Polygon([(0, 0), (2, 2), (2, 0)]),
+ Polygon(
+ [(0, 0), (2, 0), (1, 1), (2, 2), (0, 2), (1, 1), (0, 0)]
+ ), # ring intersection
+ None,
+ ]
+ )
+ result = s.is_valid_reason().to_pandas()
+ expected = pd.Series(
+ [
+ "Valid Geometry",
+ "Self-intersection at or near point (0.5, 0.5, NaN)",
+ "Valid Geometry",
+ "Ring Self-intersection at or near point (1.0, 1.0)",
+ None,
+ ]
+ )
+ assert_series_equal(result, expected)
def test_is_empty(self):
geoseries = sgpd.GeoSeries(
@@ -416,7 +437,56 @@ class TestGeoSeries(TestBase):
pass
def test_make_valid(self):
- pass
+ s = sgpd.GeoSeries(
+ [
+ Polygon([(0, 0), (0, 2), (1, 1), (2, 2), (2, 0), (1, 1), (0,
0)]),
+ Polygon([(0, 2), (0, 1), (2, 0), (0, 0), (0, 2)]),
+ LineString([(0, 0), (1, 1), (1, 0)]),
+ ],
+ )
+ result = s.make_valid(method="structure")
+
+ expected = gpd.GeoSeries(
+ [
+ MultiPolygon(
+ [
+ Polygon([(1, 1), (0, 0), (0, 2), (1, 1)]),
+ Polygon([(2, 0), (1, 1), (2, 2), (2, 0)]),
+ ]
+ ),
+ Polygon([(0, 1), (2, 0), (0, 0), (0, 1)]),
+ LineString([(0, 0), (1, 1), (1, 0)]),
+ ]
+ )
+
+ self.check_sgpd_equals_gpd(result, expected)
+
+ result = s.make_valid(method="structure", keep_collapsed=False)
+ expected = gpd.GeoSeries(
+ [
+ MultiPolygon(
+ [
+ Polygon([(1, 1), (0, 0), (0, 2), (1, 1)]),
+ Polygon([(2, 0), (1, 1), (2, 2), (2, 0)]),
+ ]
+ ),
+ Polygon([(0, 1), (2, 0), (0, 0), (0, 1)]),
+ LineString([(0, 0), (1, 1), (1, 0)]),
+ ]
+ )
+ self.check_sgpd_equals_gpd(result, expected)
+
+ result = GeoSeries(
+ [Polygon([(0, 0), (1, 1), (1, 2), (1, 1), (0, 0)])]
+ ).make_valid(method="structure", keep_collapsed=True)
+ expected = gpd.GeoSeries([LineString([(0, 0), (1, 1), (1, 2), (1, 1),
(0, 0)])])
+ self.check_sgpd_equals_gpd(result, expected)
+
+ result = GeoSeries(
+ [Polygon([(0, 0), (1, 1), (1, 2), (1, 1), (0, 0)])]
+ ).make_valid(method="structure", keep_collapsed=False)
+ expected = gpd.GeoSeries([Polygon()])
+ self.check_sgpd_equals_gpd(result, expected)
def test_reverse(self):
pass
diff --git a/python/tests/geopandas/test_match_geopandas_series.py
b/python/tests/geopandas/test_match_geopandas_series.py
index 289da18401..3c3695aed7 100644
--- a/python/tests/geopandas/test_match_geopandas_series.py
+++ b/python/tests/geopandas/test_match_geopandas_series.py
@@ -354,7 +354,30 @@ class TestMatchGeopandasSeries(TestBase):
self.check_pd_series_equal(sgpd_result, gpd_result)
def test_is_valid_reason(self):
- pass
+ # is_valid_reason was added in geopandas 1.0.0
+ if gpd.__version__ < "1.0.0":
+ return
+ data = [
+ Polygon([(0, 0), (1, 1), (0, 1)]),
+ Polygon([(0, 0), (1, 1), (1, 0), (0, 1)]), # bowtie geometry
+ Polygon([(0, 0), (2, 2), (2, 0)]),
+ Polygon(
+ [(0, 0), (2, 0), (1, 1), (2, 2), (0, 2), (1, 1), (0, 0)]
+ ), # ring intersection
+ None,
+ ]
+ sgpd_result = GeoSeries(data).is_valid_reason()
+ assert isinstance(sgpd_result, ps.Series)
+ gpd_result = gpd.GeoSeries(data).is_valid_reason()
+ for a, e in zip(sgpd_result.to_pandas(), gpd_result):
+ if a is None and e is None:
+ continue
+ if a == "Valid Geometry":
+ assert e == "Valid Geometry"
+ elif "Self-intersection" in a:
+ assert "Self-intersection" in e
+ else:
+ raise ValueError(f"Unexpected result: {a} not equivalent to
{e}")
def test_is_empty(self):
for _, geom in self.geoms:
@@ -463,7 +486,30 @@ class TestMatchGeopandasSeries(TestBase):
pass
def test_make_valid(self):
- pass
+ import shapely
+
+ # 'structure' method requires shapely >= 2.1.0
+ if shapely.__version__ < "2.1.0":
+ return
+ for _, geom in self.geoms:
+ sgpd_result = GeoSeries(geom).make_valid(method="structure")
+ gpd_result = gpd.GeoSeries(geom).make_valid(method="structure")
+ self.check_sgpd_equals_gpd(sgpd_result, gpd_result)
+
+ for _, geom in self.geoms:
+ sgpd_result = GeoSeries(geom).make_valid(
+ method="structure", keep_collapsed=False
+ )
+ gpd_result = gpd.GeoSeries(geom).make_valid(
+ method="structure", keep_collapsed=False
+ )
+ self.check_sgpd_equals_gpd(sgpd_result, gpd_result)
+
+ # Ensure default method="linework" fails
+ with pytest.raises(ValueError):
+ GeoSeries([Point(0, 0)]).make_valid()
+ with pytest.raises(ValueError):
+ GeoSeries([Point(0, 0)]).make_valid(method="linework")
def test_reverse(self):
pass