This is an automated email from the ASF dual-hosted git repository.
jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sedona.git
The following commit(s) were added to refs/heads/master by this push:
new 7a93715097 [GH-2047] Geopandas.GeoSeries: Implement to_crs (#2048)
7a93715097 is described below
commit 7a93715097572ebd043edf84e5a7d899eb8ec203
Author: Peter Nguyen <[email protected]>
AuthorDate: Thu Jul 3 21:44:07 2025 -0700
[GH-2047] Geopandas.GeoSeries: Implement to_crs (#2048)
---
python/sedona/geopandas/geoseries.py | 145 +++++++++++++++++----
python/tests/geopandas/test_geoseries.py | 16 ++-
.../tests/geopandas/test_match_geopandas_series.py | 9 +-
3 files changed, 145 insertions(+), 25 deletions(-)
diff --git a/python/sedona/geopandas/geoseries.py
b/python/sedona/geopandas/geoseries.py
index 6f68c3c056..b3bbdb418a 100644
--- a/python/sedona/geopandas/geoseries.py
+++ b/python/sedona/geopandas/geoseries.py
@@ -282,8 +282,9 @@ class GeoSeries(GeoFrame, pspd.Series):
Examples
--------
+ >>> from sedona.geopandas import GeoSeries
>>> from shapely.geometry import Point
- >>> s = geopandas.GeoSeries([Point(1, 1), Point(2, 2), Point(3, 3)])
+ >>> s = GeoSeries([Point(1, 1), Point(2, 2), Point(3, 3)])
>>> s
0 POINT (1 1)
1 POINT (2 2)
@@ -479,10 +480,10 @@ class GeoSeries(GeoFrame, pspd.Series):
pd_series = self._to_internal_pandas()
try:
return gpd.GeoSeries(
- pd_series.map(lambda wkb: shapely.wkb.loads(bytes(wkb)))
+ pd_series.map(lambda wkb: shapely.wkb.loads(bytes(wkb))),
crs=self.crs
)
- except Exception as e:
- return gpd.GeoSeries(pd_series)
+ except TypeError:
+ return gpd.GeoSeries(pd_series, crs=self.crs)
def to_spark_pandas(self) -> pspd.Series:
return pspd.Series(self._psdf._to_internal_pandas())
@@ -512,7 +513,6 @@ class GeoSeries(GeoFrame, pspd.Series):
Examples:
>>> from shapely.geometry import Point
- >>> import geopandas as gpd
>>> from sedona.geopandas import GeoSeries
>>> gs = GeoSeries([Point(1, 1), Point(2, 2)])
@@ -542,7 +542,6 @@ class GeoSeries(GeoFrame, pspd.Series):
Examples
--------
>>> from shapely.geometry import Polygon
- >>> import geopandas as gpd
>>> from sedona.geopandas import GeoSeries
>>> gs = GeoSeries([Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]),
Polygon([(0, 0), (2, 0), (2, 2), (0, 2)])])
@@ -615,7 +614,6 @@ class GeoSeries(GeoFrame, pspd.Series):
Examples
--------
>>> from shapely.geometry import Polygon
- >>> import geopandas as gpd
>>> from sedona.geopandas import GeoSeries
>>> gs = GeoSeries([Point(0, 0), LineString([(0, 0), (1, 1)]),
Polygon([(0, 0), (1, 0), (1, 1)]), GeometryCollection([Point(0, 0),
LineString([(0, 0), (1, 1)]), Polygon([(0, 0), (1, 0), (1, 1)])])])
@@ -649,8 +647,9 @@ class GeoSeries(GeoFrame, pspd.Series):
An example with one invalid polygon (a bowtie geometry crossing itself)
and one missing geometry:
+ >>> from sedona.geopandas import GeoSeries
>>> from shapely.geometry import Polygon
- >>> s = geopandas.GeoSeries(
+ >>> s = GeoSeries(
... [
... Polygon([(0, 0), (1, 1), (0, 1)]),
... Polygon([(0,0), (1, 1), (1, 0), (0, 1)]), # bowtie
geometry
@@ -697,16 +696,15 @@ class GeoSeries(GeoFrame, pspd.Series):
An example of a GeoDataFrame with one empty point, one point and one
missing
value:
+ >>> from sedona.geopandas import GeoSeries
>>> from shapely.geometry import Point
- >>> d = {'geometry': [Point(), Point(2, 1), None]}
- >>> gdf = geopandas.GeoDataFrame(d, crs="EPSG:4326")
- >>> gdf
- geometry
+ >>> geoseries = GeoSeries([Point(), Point(2, 1), None],
crs="EPSG:4326")
+ >>> geoseries
0 POINT EMPTY
1 POINT (2 1)
2 None
- >>> gdf.is_empty
+ >>> geoseries.is_empty
0 True
1 False
2 False
@@ -743,8 +741,9 @@ class GeoSeries(GeoFrame, pspd.Series):
Examples
--------
+ >>> from sedona.geopandas import GeoSeries
>>> from shapely.geometry import LineString
- >>> s = geopandas.GeoSeries(
+ >>> s = GeoSeries(
... [
... LineString([(0, 0), (1, 1), (1, -1), (0, 1)]),
... LineString([(0, 0), (1, 1), (1, -1)]),
@@ -793,8 +792,9 @@ class GeoSeries(GeoFrame, pspd.Series):
Examples
--------
+ >>> from sedona.geopandas import GeoSeries
>>> from shapely.geometry import Point
- >>> s = geopandas.GeoSeries(
+ >>> s = GeoSeries(
... [
... Point(0, 1),
... Point(0, 1, 2),
@@ -967,8 +967,9 @@ class GeoSeries(GeoFrame, pspd.Series):
Examples
--------
+ >>> from sedona.geopandas import GeoSeries
>>> from shapely.geometry import Polygon, LineString, Point
- >>> s = geopandas.GeoSeries(
+ >>> s = GeoSeries(
... [
... Polygon([(0, 0), (2, 2), (0, 2)]),
... LineString([(0, 0), (2, 2)]),
@@ -976,7 +977,7 @@ class GeoSeries(GeoFrame, pspd.Series):
... Point(0, 1),
... ],
... )
- >>> s2 = geopandas.GeoSeries(
+ >>> s2 = GeoSeries(
... [
... LineString([(1, 0), (1, 3)]),
... LineString([(2, 0), (0, 2)]),
@@ -1051,8 +1052,9 @@ class GeoSeries(GeoFrame, pspd.Series):
Examples
--------
+ >>> from sedona.geopandas import GeoSeries
>>> from shapely.geometry import Polygon, LineString, Point
- >>> s = geopandas.GeoSeries(
+ >>> s = GeoSeries(
... [
... Polygon([(0, 0), (2, 2), (0, 2)]),
... Polygon([(0, 0), (2, 2), (0, 2)]),
@@ -1061,7 +1063,7 @@ class GeoSeries(GeoFrame, pspd.Series):
... Point(0, 1),
... ],
... )
- >>> s2 = geopandas.GeoSeries(
+ >>> s2 = GeoSeries(
... [
... Polygon([(0, 0), (1, 1), (0, 1)]),
... LineString([(1, 0), (1, 3)]),
@@ -1277,8 +1279,9 @@ class GeoSeries(GeoFrame, pspd.Series):
Examples
--------
+ >>> from sedona.geopandas import GeoSeries
>>> from shapely.geometry import Point
- >>> s = geopandas.GeoSeries([Point(1, 1), Point(2, 2), Point(3, 3)])
+ >>> s = GeoSeries([Point(1, 1), Point(2, 2), Point(3, 3)])
>>> s.x
0 1.0
1 2.0
@@ -1305,8 +1308,9 @@ class GeoSeries(GeoFrame, pspd.Series):
Examples
--------
+ >>> from sedona.geopandas import GeoSeries
>>> from shapely.geometry import Point
- >>> s = geopandas.GeoSeries([Point(1, 1), Point(2, 2), Point(3, 3)])
+ >>> s = GeoSeries([Point(1, 1), Point(2, 2), Point(3, 3)])
>>> s.y
0 1.0
1 2.0
@@ -1334,8 +1338,9 @@ class GeoSeries(GeoFrame, pspd.Series):
Examples
--------
+ >>> from sedona.geopandas import GeoSeries
>>> from shapely.geometry import Point
- >>> s = geopandas.GeoSeries([Point(1, 1, 1), Point(2, 2, 2), Point(3,
3, 3)])
+ >>> s = GeoSeries([Point(1, 1, 1), Point(2, 2, 2), Point(3, 3, 3)])
>>> s.z
0 1.0
1 2.0
@@ -1681,7 +1686,101 @@ class GeoSeries(GeoFrame, pspd.Series):
def to_crs(
self, crs: Union[Any, None] = None, epsg: Union[int, None] = None
) -> "GeoSeries":
- raise NotImplementedError("GeoSeries.to_crs() is not implemented yet.")
+ """Returns a ``GeoSeries`` with all geometries transformed to a new
+ coordinate reference system.
+
+ Transform all geometries in a GeoSeries to a different coordinate
+ reference system. The ``crs`` attribute on the current GeoSeries must
+ be set. Either ``crs`` or ``epsg`` may be specified for output.
+
+ This method will transform all points in all objects. It has no notion
+ of projecting entire geometries. All segments joining points are
+ assumed to be lines in the current projection, not geodesics. Objects
+ crossing the dateline (or other projection boundary) will have
+ undesirable behavior.
+
+ Parameters
+ ----------
+ crs : pyproj.CRS, optional if `epsg` is specified
+ The value can be anything accepted
+ by :meth:`pyproj.CRS.from_user_input()
<pyproj.crs.CRS.from_user_input>`,
+ such as an authority string (eg "EPSG:4326") or a WKT string.
+ epsg : int, optional if `crs` is specified
+ EPSG code specifying output projection.
+
+ Returns
+ -------
+ GeoSeries
+
+ Examples
+ --------
+ >>> from shapely.geometry import Point
+ >>> from sedona.geopandas import GeoSeries
+ >>> geoseries = GeoSeries([Point(1, 1), Point(2, 2), Point(3, 3)],
crs=4326)
+ >>> geoseries.crs
+ <Geographic 2D CRS: EPSG:4326>
+ Name: WGS 84
+ Axis Info [ellipsoidal]:
+ - Lat[north]: Geodetic latitude (degree)
+ - Lon[east]: Geodetic longitude (degree)
+ Area of Use:
+ - name: World
+ - bounds: (-180.0, -90.0, 180.0, 90.0)
+ Datum: World Geodetic System 1984
+ - Ellipsoid: WGS 84
+ - Prime Meridian: Greenwich
+
+ >>> geoseries = geoseries.to_crs(3857)
+ >>> print(geoseries)
+ 0 POINT (111319.491 111325.143)
+ 1 POINT (222638.982 222684.209)
+ 2 POINT (333958.472 334111.171)
+ dtype: geometry
+ >>> geoseries.crs
+ <Projected CRS: EPSG:3857>
+ Name: WGS 84 / Pseudo-Mercator
+ Axis Info [cartesian]:
+ - X[east]: Easting (metre)
+ - Y[north]: Northing (metre)
+ Area of Use:
+ - name: World - 85°S to 85°N
+ - bounds: (-180.0, -85.06, 180.0, 85.06)
+ Coordinate Operation:
+ - name: Popular Visualisation Pseudo-Mercator
+ - method: Popular Visualisation Pseudo Mercator
+ Datum: World Geodetic System 1984
+ - Ellipsoid: WGS 84
+ - Prime Meridian: Greenwich
+
+ """
+
+ from pyproj import CRS
+
+ old_crs = self.crs
+ if old_crs is None:
+ raise ValueError(
+ "Cannot transform naive geometries. "
+ "Please set a crs on the object first."
+ )
+ assert isinstance(old_crs, CRS)
+
+ if crs is not None:
+ crs = CRS.from_user_input(crs)
+ elif epsg is not None:
+ crs = CRS.from_epsg(epsg)
+ else:
+ raise ValueError("Must pass either crs or epsg.")
+
+ # skip if the input CRS and output CRS are the exact same
+ if old_crs.is_exact_same(crs):
+ return self
+
+ col = self.get_first_geometry_column()
+ return self._query_geometry_column(
+ f"ST_Transform(`{col}`, 'EPSG:{old_crs.to_epsg()}',
'EPSG:{crs.to_epsg()}')",
+ col,
+ "",
+ )
def estimate_utm_crs(self, datum_name: str = "WGS 84"):
raise NotImplementedError(
diff --git a/python/tests/geopandas/test_geoseries.py
b/python/tests/geopandas/test_geoseries.py
index 78cb66812e..21b84ca7e5 100644
--- a/python/tests/geopandas/test_geoseries.py
+++ b/python/tests/geopandas/test_geoseries.py
@@ -189,7 +189,21 @@ class TestGeoSeries(TestBase):
pass
def test_to_crs(self):
- pass
+ from pyproj import CRS
+
+ geoseries = sgpd.GeoSeries([Point(1, 1), Point(2, 2), Point(3, 3)],
crs=4326)
+ assert isinstance(geoseries.crs, CRS) and geoseries.crs.to_epsg() ==
4326
+ result = geoseries.to_crs(3857)
+ assert isinstance(result.crs, CRS) and result.crs.to_epsg() == 3857
+ expected = gpd.GeoSeries(
+ [
+ Point(111319.49079327356, 111325.14286638486),
+ Point(222638.98158654712, 222684.20850554455),
+ Point(333958.4723798207, 334111.1714019597),
+ ],
+ crs=3857,
+ )
+ self.check_sgpd_equals_gpd(result, expected)
def test_estimate_utm_crs(self):
pass
diff --git a/python/tests/geopandas/test_match_geopandas_series.py
b/python/tests/geopandas/test_match_geopandas_series.py
index 00ad6687ee..1990b87074 100644
--- a/python/tests/geopandas/test_match_geopandas_series.py
+++ b/python/tests/geopandas/test_match_geopandas_series.py
@@ -303,7 +303,14 @@ class TestMatchGeopandasSeries(TestBase):
pass
def test_to_crs(self):
- pass
+ for _, geom in self.geoms:
+ sgpd_result = GeoSeries(geom, crs=4326)
+ gpd_result = gpd.GeoSeries(geom, crs=4326)
+ self.check_sgpd_equals_gpd(sgpd_result, gpd_result)
+
+ sgpd_result = sgpd_result.to_crs(epsg=3857)
+ gpd_result = gpd_result.to_crs(epsg=3857)
+ self.check_sgpd_equals_gpd(sgpd_result, gpd_result)
def test_estimate_utm_crs(self):
pass