This is an automated email from the ASF dual-hosted git repository.
jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sedona.git
The following commit(s) were added to refs/heads/master by this push:
new 60436fead1 [GH-2025] Geopandas.GeoSeries: Implement is_valid,
is_empty, is_simple (#2026)
60436fead1 is described below
commit 60436fead1e4686dd476ffebbc45a8efda6af633
Author: Peter Nguyen <[email protected]>
AuthorDate: Wed Jul 2 15:01:45 2025 -0700
[GH-2025] Geopandas.GeoSeries: Implement is_valid, is_empty, is_simple
(#2026)
* Support elements of None type in GeoSeries
* Implement test_is_valid
* Implement is_empty
* Implement is_simple
---
python/sedona/geopandas/geoseries.py | 119 +++++++++++++++++++--
python/tests/geopandas/test_geoseries.py | 34 +++++-
.../tests/geopandas/test_match_geopandas_series.py | 24 ++++-
3 files changed, 160 insertions(+), 17 deletions(-)
diff --git a/python/sedona/geopandas/geoseries.py
b/python/sedona/geopandas/geoseries.py
index cf17eb31a2..ea75b3da80 100644
--- a/python/sedona/geopandas/geoseries.py
+++ b/python/sedona/geopandas/geoseries.py
@@ -154,7 +154,9 @@ class GeoSeries(GeoFrame, pspd.Series):
fastpath=fastpath,
)
gs = gpd.GeoSeries(s)
- pdf = pd.Series(gs.apply(lambda geom: geom.wkb))
+ pdf = pd.Series(
+ gs.apply(lambda geom: geom.wkb if geom is not None else None)
+ )
# initialize the parent class pyspark Series with the pandas Series
super().__init__(
data=pdf,
@@ -591,18 +593,88 @@ class GeoSeries(GeoFrame, pspd.Series):
).to_spark_pandas()
@property
- def is_valid(self):
- # Implementation of the abstract method
- raise NotImplementedError("This method is not implemented yet.")
+ def is_valid(self) -> pspd.Series:
+ """Returns a ``Series`` of ``dtype('bool')`` with value ``True`` for
+ geometries that are valid.
+
+ Examples
+ --------
+
+ An example with one invalid polygon (a bowtie geometry crossing itself)
+ and one missing geometry:
+
+ >>> from shapely.geometry import Polygon
+ >>> s = geopandas.GeoSeries(
+ ... [
+ ... Polygon([(0, 0), (1, 1), (0, 1)]),
+ ... Polygon([(0,0), (1, 1), (1, 0), (0, 1)]), # bowtie
geometry
+ ... Polygon([(0, 0), (2, 2), (2, 0)]),
+ ... None
+ ... ]
+ ... )
+ >>> s
+ 0 POLYGON ((0 0, 1 1, 0 1, 0 0))
+ 1 POLYGON ((0 0, 1 1, 1 0, 0 1, 0 0))
+ 2 POLYGON ((0 0, 2 2, 2 0, 0 0))
+ 3 None
+ dtype: geometry
+
+ >>> s.is_valid
+ 0 True
+ 1 False
+ 2 True
+ 3 False
+ dtype: bool
+
+ See also
+ --------
+ GeoSeries.is_valid_reason : reason for invalidity
+ """
+ return (
+ self._process_geometry_column("ST_IsValid", rename="is_valid")
+ .to_spark_pandas()
+ .astype("bool")
+ )
def is_valid_reason(self):
# Implementation of the abstract method
raise NotImplementedError("This method is not implemented yet.")
@property
- def is_empty(self):
- # Implementation of the abstract method
- raise NotImplementedError("This method is not implemented yet.")
+ def is_empty(self) -> pspd.Series:
+ """
+ Returns a ``Series`` of ``dtype('bool')`` with value ``True`` for
+ empty geometries.
+
+ Examples
+ --------
+ An example of a GeoDataFrame with one empty point, one point and one
missing
+ value:
+
+ >>> from shapely.geometry import Point
+ >>> d = {'geometry': [Point(), Point(2, 1), None]}
+ >>> gdf = geopandas.GeoDataFrame(d, crs="EPSG:4326")
+ >>> gdf
+ geometry
+ 0 POINT EMPTY
+ 1 POINT (2 1)
+ 2 None
+
+ >>> gdf.is_empty
+ 0 True
+ 1 False
+ 2 False
+ dtype: bool
+
+ See Also
+ --------
+ GeoSeries.isna : detect missing values
+ """
+ return (
+ self._process_geometry_column("ST_IsEmpty", rename="is_empty")
+ .to_spark_pandas()
+ .astype("bool")
+ )
def count_coordinates(self):
# Implementation of the abstract method
@@ -617,9 +689,36 @@ class GeoSeries(GeoFrame, pspd.Series):
raise NotImplementedError("This method is not implemented yet.")
@property
- def is_simple(self):
- # Implementation of the abstract method
- raise NotImplementedError("This method is not implemented yet.")
+ def is_simple(self) -> pspd.Series:
+ """Returns a ``Series`` of ``dtype('bool')`` with value ``True`` for
+ geometries that do not cross themselves.
+
+ This is meaningful only for `LineStrings` and `LinearRings`.
+
+ Examples
+ --------
+ >>> from shapely.geometry import LineString
+ >>> s = geopandas.GeoSeries(
+ ... [
+ ... LineString([(0, 0), (1, 1), (1, -1), (0, 1)]),
+ ... LineString([(0, 0), (1, 1), (1, -1)]),
+ ... ]
+ ... )
+ >>> s
+ 0 LINESTRING (0 0, 1 1, 1 -1, 0 1)
+ 1 LINESTRING (0 0, 1 1, 1 -1)
+ dtype: geometry
+
+ >>> s.is_simple
+ 0 False
+ 1 True
+ dtype: bool
+ """
+ return (
+ self._process_geometry_column("ST_IsSimple", rename="is_simple")
+ .to_spark_pandas()
+ .astype("bool")
+ )
@property
def is_ring(self):
diff --git a/python/tests/geopandas/test_geoseries.py
b/python/tests/geopandas/test_geoseries.py
index 8c0805b5f8..99bc9aa654 100644
--- a/python/tests/geopandas/test_geoseries.py
+++ b/python/tests/geopandas/test_geoseries.py
@@ -23,7 +23,7 @@ import sedona.geopandas as sgpd
from sedona.geopandas import GeoSeries
from tests.test_base import TestBase
from shapely import wkt
-from shapely.geometry import Point, LineString, Polygon, GeometryCollection
+from shapely.geometry import Point, LineString, Polygon, GeometryCollection,
LinearRing
from pandas.testing import assert_series_equal
@@ -192,13 +192,29 @@ class TestGeoSeries(TestBase):
assert_series_equal(result, expected)
def test_is_valid(self):
- pass
+ geoseries = sgpd.GeoSeries(
+ [
+ Polygon([(0, 0), (1, 1), (0, 1)]),
+ Polygon([(0, 0), (1, 1), (1, 0), (0, 1)]), # bowtie geometry
+ Polygon([(0, 0), (2, 2), (2, 0)]),
+ None,
+ ]
+ )
+ result = geoseries.is_valid
+ expected = pd.Series([True, False, True, False])
+ assert_series_equal(result.to_pandas(), expected)
def test_is_valid_reason(self):
pass
def test_is_empty(self):
- pass
+ geoseries = sgpd.GeoSeries(
+ [Point(), Point(2, 1), Polygon([(0, 0), (1, 1), (0, 1)]), None],
+ )
+
+ result = geoseries.is_empty
+ expected = pd.Series([True, False, False, False])
+ assert_series_equal(result.to_pandas(), expected)
def test_count_coordinates(self):
pass
@@ -210,7 +226,17 @@ class TestGeoSeries(TestBase):
pass
def test_is_simple(self):
- pass
+ s = sgpd.GeoSeries(
+ [
+ LineString([(0, 0), (1, 1), (1, -1), (0, 1)]),
+ LineString([(0, 0), (1, 1), (1, -1)]),
+ LinearRing([(0, 0), (1, 1), (1, -1), (0, 1)]),
+ LinearRing([(0, 0), (-1, 1), (-1, -1), (1, -1)]),
+ ]
+ )
+ result = s.is_simple
+ expected = pd.Series([False, True, False, True])
+ assert_series_equal(result.to_pandas(), expected)
def test_is_ring(self):
pass
diff --git a/python/tests/geopandas/test_match_geopandas_series.py
b/python/tests/geopandas/test_match_geopandas_series.py
index ec89ba23bd..c4d6eb9da0 100644
--- a/python/tests/geopandas/test_match_geopandas_series.py
+++ b/python/tests/geopandas/test_match_geopandas_series.py
@@ -32,6 +32,7 @@ from shapely.geometry import (
LineString,
MultiPolygon,
GeometryCollection,
+ LinearRing,
)
from sedona.geopandas import GeoSeries
@@ -310,13 +311,21 @@ class TestMatchGeopandasSeries(TestBase):
self.check_pd_series_equal(sgpd_result, gpd_result)
def test_is_valid(self):
- pass
+ for _, geom in self.geoms:
+ sgpd_result = GeoSeries(geom).is_valid
+ assert isinstance(sgpd_result, ps.Series)
+ gpd_result = gpd.GeoSeries(geom).is_valid
+ self.check_pd_series_equal(sgpd_result, gpd_result)
def test_is_valid_reason(self):
pass
def test_is_empty(self):
- pass
+ for _, geom in self.geoms:
+ sgpd_result = GeoSeries(geom).is_empty
+ assert isinstance(sgpd_result, ps.Series)
+ gpd_result = gpd.GeoSeries(geom).is_empty
+ self.check_pd_series_equal(sgpd_result, gpd_result)
def test_count_coordinates(self):
pass
@@ -328,7 +337,16 @@ class TestMatchGeopandasSeries(TestBase):
pass
def test_is_simple(self):
- pass
+ data = [
+ LineString([(0, 0), (0, 0)]),
+ LineString([(0, 0), (1, 1), (1, -1), (0, 1)]),
+ LineString([(0, 0), (1, 1), (0, 0)]),
+ LinearRing([(0, 0), (1, 1), (1, 0), (0, 1), (0, 0)]),
+ LinearRing([(0, 0), (-1, 1), (-1, -1), (1, -1)]),
+ ]
+ sgpd_result = GeoSeries(data).is_simple
+ gpd_result = gpd.GeoSeries(data).is_simple
+ self.check_pd_series_equal(sgpd_result, gpd_result)
def test_is_ring(self):
pass