This is an automated email from the ASF dual-hosted git repository.
jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sedona.git
The following commit(s) were added to refs/heads/master by this push:
new 9685593060 [GH-2273] Fix GeoPandas errors and issues in the examples
(#2274)
9685593060 is described below
commit 968559306094328a23531fcce76dd08c2f89bec8
Author: Feng Zhang <[email protected]>
AuthorDate: Tue Aug 12 16:47:09 2025 -0700
[GH-2273] Fix GeoPandas errors and issues in the examples (#2274)
* [geopandas] Fix GeoPandas errors and issues in the examples
* [GH-2273] fix geodataframe
---
python/sedona/spark/geopandas/geodataframe.py | 25 +++++++++++++++----------
python/sedona/spark/geopandas/geoseries.py | 16 +++++++++++-----
2 files changed, 26 insertions(+), 15 deletions(-)
diff --git a/python/sedona/spark/geopandas/geodataframe.py
b/python/sedona/spark/geopandas/geodataframe.py
index 6e379dca69..0343e8a66f 100644
--- a/python/sedona/spark/geopandas/geodataframe.py
+++ b/python/sedona/spark/geopandas/geodataframe.py
@@ -956,8 +956,9 @@ class GeoDataFrame(GeoFrame, pspd.DataFrame):
Examples
--------
>>> from shapely.geometry import Point
+ >>> from sedona.spark.geopandas import GeoDataFrame
>>> d = {'col1': ['name1', 'name2'], 'geometry': [Point(1, 2),
Point(2, 1)]}
- >>> gdf = geopandas.GeoDataFrame(d, crs=4326)
+ >>> gdf = GeoDataFrame(d, crs=4326)
>>> gdf
col1 geometry
0 name1 POINT (1 2)
@@ -1089,8 +1090,8 @@ class GeoDataFrame(GeoFrame, pspd.DataFrame):
--------
>>> from sedona.spark.geopandas import GeoDataFrame
- >>> import geoarrow.pyarrow as ga
- >>> import pyarrow as pa
+ >>> import geoarrow.pyarrow as ga # requires: pip install
geoarrow-pyarrow
+ >>> import pyarrow as pa # requires: pip install pyarrow
>>> table = pa.Table.from_arrays([
... ga.as_geoarrow([None, "POLYGON ((0 0, 1 1, 0 1, 0 0))",
"LINESTRING (0 0, -1 1, 0 -1)"]),
... pa.array([1, 2, 3]),
@@ -1264,7 +1265,7 @@ class GeoDataFrame(GeoFrame, pspd.DataFrame):
The returned data object needs to be consumed by a library implementing
the Arrow PyCapsule Protocol. For example, wrapping the data as a
pyarrow.Table (requires pyarrow >= 14.0):
- >>> import pyarrow as pa
+ >>> import pyarrow as pa # requires: pip install pyarrow
>>> table = pa.table(arrow_table)
>>> table
pyarrow.Table
@@ -1419,8 +1420,9 @@ class GeoDataFrame(GeoFrame, pspd.DataFrame):
Examples
--------
- >>> import geodatasets
- >>> df = geopandas.read_file(geodatasets.get_path("nybb"))
+ >>> import geodatasets # requires: pip install geodatasets
+ >>> import geopandas as gpd
+ >>> df = gpd.read_file(geodatasets.get_path("nybb"))
>>> df.head() # doctest: +SKIP
BoroCode ... geometry
0 5 ... MULTIPOLYGON (((970217.022 145643.332, 970227....
@@ -1493,6 +1495,9 @@ class GeoDataFrame(GeoFrame, pspd.DataFrame):
... 'value': [1, 2]
... })
>>> joined = points.sjoin(polygons)
+ >>> joined
+ geometry_left value_left geometry_right value_right
+ 0 POINT (0.5 0.5) 1 POLYGON ((0 0, 0 1, 1 1, 1 0, 0 0))
1
"""
from sedona.spark.geopandas.tools.sjoin import sjoin as sjoin_tool
@@ -1607,17 +1612,17 @@ class GeoDataFrame(GeoFrame, pspd.DataFrame):
... "geometry": [Point(0, 0), LineString([(0, 0), (1, 1)])],
... "int": [1, 2]
... })
- >>> gdf.to_file(filepath, driver="geoparquet")
+ >>> gdf.to_file("output.parquet", driver="geoparquet")
With selected drivers you can also append to a file with ``mode="a"``:
- >>> gdf.to_file(filepath, driver="geojson", mode="a")
+ >>> gdf.to_file("output.geojson", driver="geojson", mode="a")
When the index is of non-integer dtype, ``index=None`` (default) is
treated as True,
writing the index to the file.
- >>> gdf = GeoDataFrame({"geometry": [Point(0, 0)]}, index=["a", "b"])
- >>> gdf.to_file(filepath, driver="geoparquet")
+ >>> gdf = GeoDataFrame({"geometry": [Point(0, 0), Point(1, 1)]},
index=["a", "b"])
+ >>> gdf.to_file("output_with_index.parquet", driver="geoparquet")
"""
sgpd.io._to_file(self, path, driver, index, **kwargs)
diff --git a/python/sedona/spark/geopandas/geoseries.py
b/python/sedona/spark/geopandas/geoseries.py
index 7580cf3793..77e6d349c4 100644
--- a/python/sedona/spark/geopandas/geoseries.py
+++ b/python/sedona/spark/geopandas/geoseries.py
@@ -319,9 +319,11 @@ class GeoSeries(GeoFrame, pspd.Series):
- copy: Whether to copy the input data.
- fastpath: Internal parameter for fast initialization.
- Examples:
+ Examples
+ --------
>>> from shapely.geometry import Point
>>> import geopandas as gpd
+ >>> import pandas as pd
>>> from sedona.spark.geopandas import GeoSeries
# Example 1: Initialize with GeoDataFrame
@@ -443,6 +445,9 @@ class GeoSeries(GeoFrame, pspd.Series):
Examples
--------
+ >>> from shapely.geometry import Point
+ >>> from sedona.spark.geopandas import GeoSeries
+ >>> s = GeoSeries([Point(1, 1), Point(2, 2)], crs='EPSG:4326')
>>> s.crs # doctest: +SKIP
<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
@@ -1756,7 +1761,7 @@ class GeoSeries(GeoFrame, pspd.Series):
... b"\x00\x08@\x00\x00\x00\x00\x00\x00\x08@"
... ),
... ]
- >>> s = geopandas.GeoSeries.from_wkb(wkbs)
+ >>> s = GeoSeries.from_wkb(wkbs)
>>> s
0 POINT (1 1)
1 POINT (2 2)
@@ -1899,7 +1904,7 @@ class GeoSeries(GeoFrame, pspd.Series):
>>> x = [2.5, 5, -3.0]
>>> y = [0.5, 1, 1.5]
- >>> s = geopandas.GeoSeries.from_xy(x, y, crs="EPSG:4326")
+ >>> s = GeoSeries.from_xy(x, y, crs="EPSG:4326")
>>> s
0 POINT (2.5 0.5)
1 POINT (5 1)
@@ -2442,8 +2447,9 @@ class GeoSeries(GeoFrame, pspd.Series):
Examples
--------
- >>> import geodatasets
- >>> df = geopandas.read_file(
+ >>> import geodatasets # requires: pip install geodatasets
+ >>> import geopandas as gpd
+ >>> df = gpd.read_file(
... geodatasets.get_path("geoda.chicago_commpop")
... )
>>> df.geometry.values.estimate_utm_crs() # doctest: +SKIP