This is an automated email from the ASF dual-hosted git repository.
jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sedona.git
The following commit(s) were added to refs/heads/master by this push:
new 9c07af8ca6 [GH-2112] Geopandas.GeoSeries: Implement `to_wkt` and
`to_wkb` (#2113)
9c07af8ca6 is described below
commit 9c07af8ca6015c07775771d2c65438466f972e01
Author: Peter Nguyen <[email protected]>
AuthorDate: Thu Jul 17 08:18:19 2025 -0700
[GH-2112] Geopandas.GeoSeries: Implement `to_wkt` and `to_wkb` (#2113)
* Implement to_wkt and to_wkb in geoseries
* Compare interpreted results rather than strings in to_wkt
* Use '...' instead of '>>>' for multiline
* Support inputting ps.Series into _create_from_select, so to_wkt can be
used with from_wkt
* Skip old shapely versions
---
python/sedona/geopandas/geoseries.py | 117 ++++++++++++++++++++-
python/tests/geopandas/test_geoseries.py | 73 ++++++++++++-
.../tests/geopandas/test_match_geopandas_series.py | 19 +++-
3 files changed, 198 insertions(+), 11 deletions(-)
diff --git a/python/sedona/geopandas/geoseries.py
b/python/sedona/geopandas/geoseries.py
index 821e1ab324..ee54f8704d 100644
--- a/python/sedona/geopandas/geoseries.py
+++ b/python/sedona/geopandas/geoseries.py
@@ -1960,12 +1960,13 @@ class GeoSeries(GeoFrame, pspd.Series):
Examples
--------
+ >>> from sedona.geopandas import GeoSeries
>>> wkts = [
... 'POINT (1 1)',
... 'POINT (2 2)',
... 'POINT (3 3)',
... ]
- >>> s = geopandas.GeoSeries.from_wkt(wkts)
+ >>> s = GeoSeries.from_wkt(wkts)
>>> s
0 POINT (1 1)
1 POINT (2 2)
@@ -2093,7 +2094,15 @@ class GeoSeries(GeoFrame, pspd.Series):
select = f"ST_AsEWKB({select}) as geometry"
- spark_df = default_session().createDataFrame(data, schema=schema)
+ if isinstance(data, pspd.Series):
+ spark_df = data._internal.spark_frame
+ assert len(schema) == 1
+ spark_df = spark_df.withColumnRenamed(
+ _get_first_column_name(data), schema[0].name
+ )
+ else:
+ spark_df = default_session().createDataFrame(data, schema=schema)
+
spark_df = spark_df.selectExpr(select)
internal = InternalFrame(
@@ -2655,10 +2664,110 @@ class GeoSeries(GeoFrame, pspd.Series):
raise NotImplementedError("GeoSeries.to_json() is not implemented
yet.")
def to_wkb(self, hex: bool = False, **kwargs) -> pspd.Series:
- raise NotImplementedError("GeoSeries.to_wkb() is not implemented yet.")
+ """
+ Convert GeoSeries geometries to WKB
+
+ Parameters
+ ----------
+ hex : bool
+ If true, export the WKB as a hexadecimal string.
+ The default is to return a binary bytes object.
+ kwargs
+ Additional keyword args will be passed to
+ :func:`shapely.to_wkb`.
+
+ Returns
+ -------
+ Series
+ WKB representations of the geometries
+
+ See also
+ --------
+ GeoSeries.to_wkt
+
+ Examples
+ --------
+ >>> from shapely.geometry import Point, Polygon
+ >>> s = GeoSeries(
+ ... [
+ ... Point(0, 0),
+ ... Polygon(),
+ ... Polygon([(0, 0), (1, 1), (1, 0)]),
+ ... None,
+ ... ]
+ ... )
+
+ >>> s.to_wkb()
+ 0 b'\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00...
+ 1 b'\x01\x03\x00\x00\x00\x00\x00\x00\x00'
+ 2 b'\x01\x03\x00\x00\x00\x01\x00\x00\x00\x04\x00...
+ 3 None
+ dtype: object
+
+ >>> s.to_wkb(hex=True)
+ 0 010100000000000000000000000000000000000000
+ 1 010300000000000000
+ 2 0103000000010000000400000000000000000000000000...
+ 3 None
+ dtype: object
+
+ """
+ col = self.get_first_geometry_column()
+ select = f"ST_AsBinary(`{col}`)"
+
+ if hex:
+ # this is using pyspark's hex function since Sedona doesn't
support hex WKB conversion at the moment
+ # (it only supports hex EWKB)
+ select = f"hex({select})"
+
+ return self._query_geometry_column(
+ select,
+ cols=col,
+ rename="to_wkb",
+ returns_geom=False,
+ )
def to_wkt(self, **kwargs) -> pspd.Series:
- raise NotImplementedError("GeoSeries.to_wkt() is not implemented yet.")
+ """
+ Convert GeoSeries geometries to WKT
+
+ Note: Using shapely < 1.0.0 may return different geometries for empty
geometries.
+
+ Parameters
+ ----------
+ kwargs
+ Keyword args will be passed to :func:`shapely.to_wkt`.
+
+ Returns
+ -------
+ Series
+ WKT representations of the geometries
+
+ Examples
+ --------
+ >>> from shapely.geometry import Point
+ >>> s = GeoSeries([Point(1, 1), Point(2, 2), Point(3, 3)])
+ >>> s
+ 0 POINT (1 1)
+ 1 POINT (2 2)
+ 2 POINT (3 3)
+ dtype: geometry
+
+ >>> s.to_wkt()
+ 0 POINT (1 1)
+ 1 POINT (2 2)
+ 2 POINT (3 3)
+ dtype: object
+
+ See also
+ --------
+ GeoSeries.to_wkb
+ """
+ return self._process_geometry_column(
+ "ST_AsText",
+ rename="to_wkt",
+ returns_geom=False,
+ )
def to_arrow(self, geometry_encoding="WKB", interleaved=True,
include_z=None):
raise NotImplementedError("GeoSeries.to_arrow() is not implemented
yet.")
diff --git a/python/tests/geopandas/test_geoseries.py
b/python/tests/geopandas/test_geoseries.py
index dbbe53fdb4..d86226e1f8 100644
--- a/python/tests/geopandas/test_geoseries.py
+++ b/python/tests/geopandas/test_geoseries.py
@@ -36,6 +36,7 @@ from shapely.geometry import (
)
from pandas.testing import assert_series_equal
import pytest
+from packaging.version import parse as parse_version
class TestGeoSeries(TestGeopandasBase):
@@ -331,10 +332,73 @@ class TestGeoSeries(TestGeopandasBase):
pass
def test_to_wkb(self):
- pass
+ if parse_version(shapely.__version__) < parse_version("2.0.0"):
+ return
+
+ data = [
+ Point(0, 0),
+ Polygon(),
+ Polygon([(0, 0), (1, 1), (1, 0)]),
+ None,
+ ]
+ result = sgpd.GeoSeries(data).to_wkb()
+ expected = pd.Series(
+ [
+
b"\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
+ b"\x01\x03\x00\x00\x00\x00\x00\x00\x00",
+
b"\x01\x03\x00\x00\x00\x01\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
+ None,
+ ]
+ )
+
+ self.check_pd_series_equal(result, expected)
+
+ result = sgpd.GeoSeries(data).to_wkb(hex=True)
+ expected = pd.Series(
+ [
+ "010100000000000000000000000000000000000000",
+ "010300000000000000",
+
"0103000000010000000400000000000000000000000000000000000000000000000000F03F000000000000F03F000000000000F03F000000000000000000000000000000000000000000000000",
+ None,
+ ]
+ )
+ self.check_pd_series_equal(result, expected)
def test_to_wkt(self):
- pass
+ s = GeoSeries([Point(1, 1), Point(2, 2), Point(3, 3)])
+ result = s.to_wkt()
+ expected = pd.Series(
+ [
+ "POINT (1 1)",
+ "POINT (2 2)",
+ "POINT (3 3)",
+ ]
+ )
+ self.check_pd_series_equal(result, expected)
+
+ s = GeoSeries(
+ [
+ Polygon(),
+ Point(1, 2),
+ LineString([(0, 0), (1, 1)]),
+ None,
+ ]
+ )
+ result = s.to_wkt()
+
+ # Old versions return empty GeometryCollection instead of empty Polygon
+ if parse_version(shapely.__version__) < parse_version("2.0.0"):
+ return
+
+ expected = pd.Series(
+ [
+ "POLYGON EMPTY",
+ "POINT (1 2)",
+ "LINESTRING (0 0, 1 1)",
+ None,
+ ]
+ )
+ self.check_pd_series_equal(result, expected)
def test_to_arrow(self):
pass
@@ -836,9 +900,8 @@ class TestGeoSeries(TestGeopandasBase):
# Ensure the index is preserved when crs is set (previously an issue)
expected_index = ps.Index(range(1, 6))
- ps.set_option("compute.ops_on_diff_frames", True)
- assert s2.index.equals(expected_index)
- ps.reset_option("compute.ops_on_diff_frames")
+ with self.ps_allow_diff_frames():
+ assert s2.index.equals(expected_index)
result = s.intersection(s2, align=True)
expected = gpd.GeoSeries(
diff --git a/python/tests/geopandas/test_match_geopandas_series.py
b/python/tests/geopandas/test_match_geopandas_series.py
index 6e38992f67..acd359bacb 100644
--- a/python/tests/geopandas/test_match_geopandas_series.py
+++ b/python/tests/geopandas/test_match_geopandas_series.py
@@ -376,10 +376,25 @@ class TestMatchGeopandasSeries(TestGeopandasBase):
pass
def test_to_wkb(self):
- pass
+ for _, geom in self.geoms:
+ sgpd_result = GeoSeries(geom).to_wkb()
+ gpd_result = gpd.GeoSeries(geom).to_wkb()
+ self.check_pd_series_equal(sgpd_result, gpd_result)
+
+ sgpd_result = GeoSeries(geom).to_wkb(hex=True)
+ gpd_result = gpd.GeoSeries(geom).to_wkb(hex=True)
+ self.check_pd_series_equal(sgpd_result, gpd_result)
def test_to_wkt(self):
- pass
+ for _, geom in self.geoms:
+ ps_series = GeoSeries(geom).to_wkt()
+ pd_series = gpd.GeoSeries(geom).to_wkt()
+ # There are slight variations of valid wkt (e.g valid parentheses
being optional),
+ # so we check that they can be interpreted as the same geometry
rather than
+ # their strings being exactly equal.
+ sgpd_result = GeoSeries.from_wkt(ps_series)
+ gpd_result = gpd.GeoSeries.from_wkt(pd_series)
+ self.check_sgpd_equals_gpd(sgpd_result, gpd_result)
def test_to_arrow(self):
pass