This is an automated email from the ASF dual-hosted git repository.
jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sedona.git
The following commit(s) were added to refs/heads/master by this push:
new 475c1687f9 [GH-2104] Geopandas: Implement `to_json`, `to_arrow`,
`from_arrow` (#2105)
475c1687f9 is described below
commit 475c1687f997719c3fe787046f9063a7fcebc3d7
Author: Peter Nguyen <[email protected]>
AuthorDate: Tue Jul 22 23:52:17 2025 -0700
[GH-2104] Geopandas: Implement `to_json`, `to_arrow`, `from_arrow` (#2105)
* Implement to_json and to_arrow
* Implement them for GeoSeries too
* Fix StopIteration error to_geopandas() in test_boundary
* Skip to_arrow for geopandas versions < 1.0.0, and add note in doc
* fix skip tests
* Skip using get_srid for shapely version < 2.0.0
* Add from_arrow to both series and dataframe
* Remove geoarrow dependency
* Update python/sedona/geopandas/geoseries.py
Co-authored-by: Copilot <[email protected]>
* Remove debug comments
Co-authored-by: Copilot <[email protected]>
* Remove geometry collection skip in from_arrow
* Fix tests after merge by selecting as EWKB in sjoin.py
* Refactor to use self.name instead of first_geom column
* Add a complex df test
* Refactor series.py to use spark df api instead of spark sql
* Fix after merge
* Clean up
* Remove tuple type annotation
* More cleanup
* Fix align in fillna
* Add missing default geometry column logic to fix to_json
* Fix tests after merge
* Empty commit to retrigger ci
* Remove null first geoms to avoid spark bug
* Add back _to_geoframe
* Cast to object in series and fix test
* empty commit to retrigger ci
---------
Co-authored-by: Copilot <[email protected]>
---
python/sedona/geopandas/geodataframe.py | 380 ++++++++++++++++++++-
python/sedona/geopandas/geoseries.py | 191 ++++++++++-
python/sedona/geopandas/tools/sjoin.py | 3 +-
python/tests/geopandas/test_geodataframe.py | 114 ++++++-
python/tests/geopandas/test_geopandas_base.py | 31 +-
python/tests/geopandas/test_geoseries.py | 63 +++-
.../geopandas/test_match_geopandas_dataframe.py | 117 +++++++
.../tests/geopandas/test_match_geopandas_series.py | 25 +-
8 files changed, 887 insertions(+), 37 deletions(-)
diff --git a/python/sedona/geopandas/geodataframe.py
b/python/sedona/geopandas/geodataframe.py
index 60282a7c7f..6edf0bb741 100644
--- a/python/sedona/geopandas/geodataframe.py
+++ b/python/sedona/geopandas/geodataframe.py
@@ -19,6 +19,8 @@ from __future__ import annotations
from typing import Any, Literal, Callable, Union
import typing
+import os
+import shapely
import warnings
import numpy as np
import shapely
@@ -412,6 +414,9 @@ class GeoDataFrame(GeoFrame, pspd.DataFrame):
if isinstance(data, GeoDataFrame):
if data._safe_get_crs() is None:
data.crs = crs
+
+ super().__init__(data, index=index, columns=columns, dtype=dtype,
copy=copy)
+
elif isinstance(data, GeoSeries):
if data.crs is None:
data.crs = crs
@@ -469,11 +474,41 @@ class GeoDataFrame(GeoFrame, pspd.DataFrame):
copy=copy,
)
+ # Set geometry column name
if isinstance(data, (GeoDataFrame, gpd.GeoDataFrame)):
self._geometry_column_name = data._geometry_column_name
if crs is not None and data.crs != crs:
raise ValueError(crs_mismatch_error)
+ if geometry is None and "geometry" in self.columns:
+
+ if (self.columns == "geometry").sum() > 1:
+ raise ValueError(
+ "GeoDataFrame does not support multiple columns "
+ "using the geometry column name 'geometry'."
+ )
+
+ geometry: pspd.Series = self["geometry"]
+ if isinstance(geometry, sgpd.GeoSeries):
+ geom_crs = geometry.crs
+ if geom_crs is None:
+ if crs is not None:
+ geometry.set_crs(crs, inplace=True)
+ self.set_geometry(geometry, inplace=True)
+ else:
+ if crs is not None and geom_crs != crs:
+ raise ValueError(crs_mismatch_error)
+
+ # No need to call set_geometry() here since it's already part of
the df, just set the name
+ self._geometry_column_name = "geometry"
+
+ if geometry is None and crs:
+ raise ValueError(
+ "Assigning CRS to a GeoDataFrame without a geometry column is
not "
+ "supported. Supply geometry using the 'geometry=' keyword
argument, "
+ "or by providing a DataFrame with column name 'geometry'",
+ )
+
#
============================================================================
# GEOMETRY COLUMN MANAGEMENT
#
============================================================================
@@ -842,23 +877,33 @@ class GeoDataFrame(GeoFrame, pspd.DataFrame):
sdf = self._internal.spark_frame.selectExpr(*select_expressions)
return GeoDataFrame(sdf)
- def to_geopandas(self) -> gpd.GeoDataFrame | pd.Series:
- # Implementation of the abstract method
- raise NotImplementedError(
- _not_implemented_error(
- "to_geopandas",
- "Converts to GeoPandas GeoDataFrame by collecting all data to
driver.",
- )
- )
+ def to_geopandas(self) -> gpd.GeoDataFrame:
+ """
+ Note: Unlike in pandas and geopandas, Sedona will always return a
general Index.
+ This differs from pandas and geopandas, which will return a RangeIndex
by default.
- def _to_geopandas(self) -> gpd.GeoDataFrame | pd.Series:
- # Implementation of the abstract method
- raise NotImplementedError(
- _not_implemented_error(
- "_to_geopandas",
- "Internal method for GeoPandas conversion without logging
warnings.",
- )
+ e.g pd.Index([0, 1, 2]) instead of pd.RangeIndex(start=0, stop=3,
step=1)
+ """
+ from pyspark.pandas.utils import log_advice
+
+ log_advice(
+ "`to_geopandas` loads all data into the driver's memory. "
+ "It should only be used if the resulting geopandas GeoSeries is
expected to be small."
)
+ return self._to_geopandas()
+
+ def _to_geopandas(self) -> gpd.GeoDataFrame:
+ pd_df = self._internal.to_pandas_frame
+
+ for col_name in pd_df.columns:
+ series: pspd.Series = self[col_name]
+ if isinstance(series, sgpd.GeoSeries):
+ # Use _to_geopandas instead of to_geopandas to avoid logging
extra warnings
+ pd_df[col_name] = series._to_geopandas()
+ else:
+ pd_df[col_name] = series.to_pandas()
+
+ return gpd.GeoDataFrame(pd_df, geometry=self._geometry_column_name)
@property
def sindex(self) -> SpatialIndex | None:
@@ -952,8 +997,311 @@ class GeoDataFrame(GeoFrame, pspd.DataFrame):
return
self.geometry.crs = value
+ @classmethod
+ def from_dict(
+ cls,
+ data: dict,
+ geometry=None,
+ crs: Any | None = None,
+ **kwargs,
+ ) -> GeoDataFrame:
+ raise NotImplementedError("from_dict() is not implemented yet.")
+
+ @classmethod
+ def from_file(cls, filename: os.PathLike | typing.IO, **kwargs) ->
GeoDataFrame:
+ raise NotImplementedError("from_file() is not implemented yet.")
+
+ @classmethod
+ def from_features(
+ cls, features, crs: Any | None = None, columns: Iterable[str] | None =
None
+ ) -> GeoDataFrame:
+ raise NotImplementedError("from_features() is not implemented yet.")
+
+ @classmethod
+ def from_postgis(
+ cls,
+ sql: str | sqlalchemy.text,
+ con,
+ geom_col: str = "geom",
+ crs: Any | None = None,
+ index_col: str | list[str] | None = None,
+ coerce_float: bool = True,
+ parse_dates: list | dict | None = None,
+ params: list | tuple | dict | None = None,
+ chunksize: int | None = None,
+ ) -> GeoDataFrame:
+ raise NotImplementedError("from_postgis() is not implemented yet.")
+
+ @classmethod
+ def from_arrow(
+ cls, table, geometry: str | None = None, to_pandas_kwargs: dict | None
= None
+ ):
+ """
+ Construct a GeoDataFrame from a Arrow table object based on GeoArrow
+ extension types.
+
+ See https://geoarrow.org/ for details on the GeoArrow specification.
+
+ This functions accepts any tabular Arrow object implementing
+ the `Arrow PyCapsule Protocol`_ (i.e. having an ``__arrow_c_array__``
+ or ``__arrow_c_stream__`` method).
+
+ .. _Arrow PyCapsule Protocol:
https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
+
+ .. versionadded:: 1.0
+
+ Parameters
+ ----------
+ table : pyarrow.Table or Arrow-compatible table
+ Any tabular object implementing the Arrow PyCapsule Protocol
+ (i.e. has an ``__arrow_c_array__`` or ``__arrow_c_stream__``
+ method). This table should have at least one column with a
+ geoarrow geometry type.
+ geometry : str, default None
+ The name of the geometry column to set as the active geometry
+ column. If None, the first geometry column found will be used.
+ to_pandas_kwargs : dict, optional
+ Arguments passed to the `pa.Table.to_pandas` method for
non-geometry
+ columns. This can be used to control the behavior of the
conversion of the
+ non-geometry columns to a pandas DataFrame. For example, you can
use this
+ to control the dtype conversion of the columns. By default, the
`to_pandas`
+ method is called with no additional arguments.
+
+ Returns
+ -------
+ GeoDataFrame
+
+ See Also
+ --------
+ GeoDataFrame.to_arrow
+ GeoSeries.from_arrow
+
+ Examples
+ --------
+
+ >>> from sedona.geopandas import GeoDataFrame
+ >>> import geoarrow.pyarrow as ga
+ >>> import pyarrow as pa
+ >>> table = pa.Table.from_arrays([
+ ... ga.as_geoarrow([None, "POLYGON ((0 0, 1 1, 0 1, 0 0))",
"LINESTRING (0 0, -1 1, 0 -1)"]),
+ ... pa.array([1, 2, 3]),
+ ... pa.array(["a", "b", "c"]),
+ ... ], names=["geometry", "id", "value"])
+ >>> gdf = GeoDataFrame.from_arrow(table)
+ >>> gdf
+ geometry id value
+ 0 None 1 a
+ 1 POLYGON ((0 0, 1 1, 0 1, 0 0)) 2 b
+ 2 LINESTRING (0 0, -1 1, 0 -1) 3 c
+ """
+ if to_pandas_kwargs is None:
+ to_pandas_kwargs = {}
+
+ gpd_df = gpd.GeoDataFrame.from_arrow(
+ table, geometry=geometry, **to_pandas_kwargs
+ )
+ return GeoDataFrame(gpd_df)
+
+ def to_json(
+ self,
+ na: Literal["null", "drop", "keep"] = "null",
+ show_bbox: bool = False,
+ drop_id: bool = False,
+ to_wgs84: bool = False,
+ **kwargs,
+ ) -> str:
+ """
+ Returns a GeoJSON representation of the ``GeoDataFrame`` as a string.
+ Parameters
+ ----------
+ na : {'null', 'drop', 'keep'}, default 'null'
+ Indicates how to output missing (NaN) values in the GeoDataFrame.
+ See below.
+ show_bbox : bool, optional, default: False
+ Include bbox (bounds) in the geojson
+ drop_id : bool, default: False
+ Whether to retain the index of the GeoDataFrame as the id property
+ in the generated GeoJSON. Default is False, but may want True
+ if the index is just arbitrary row numbers.
+ to_wgs84: bool, optional, default: False
+ If the CRS is set on the active geometry column it is exported as
+ WGS84 (EPSG:4326) to meet the `2016 GeoJSON specification
+ <https://tools.ietf.org/html/rfc7946>`_.
+ Set to True to force re-projection and set to False to ignore CRS.
False by
+ default.
+ Notes
+ -----
+ The remaining *kwargs* are passed to json.dumps().
+ Missing (NaN) values in the GeoDataFrame can be represented as follows:
+ - ``null``: output the missing entries as JSON null.
+ - ``drop``: remove the property from the feature. This applies to each
+ feature individually so that features may have different properties.
+ - ``keep``: output the missing entries as NaN.
+ If the GeoDataFrame has a defined CRS, its definition will be included
+ in the output unless it is equal to WGS84 (default GeoJSON CRS) or not
+ possible to represent in the URN OGC format, or unless
``to_wgs84=True``
+ is specified.
+ Examples
+ --------
+ >>> from sedona.geopandas import GeoDataFrame
+ >>> from shapely.geometry import Point
+ >>> d = {'col1': ['name1', 'name2'], 'geometry': [Point(1, 2),
Point(2, 1)]}
+ >>> gdf = GeoDataFrame(d, crs="EPSG:3857")
+ >>> gdf
+ col1 geometry
+ 0 name1 POINT (1 2)
+ 1 name2 POINT (2 1)
+ >>> gdf.to_json()
+ '{"type": "FeatureCollection", "features": [{"id": "0", "type":
"Feature", \
+"properties": {"col1": "name1"}, "geometry": {"type": "Point", "coordinates":
[1.0,\
+ 2.0]}}, {"id": "1", "type": "Feature", "properties": {"col1": "name2"},
"geometry"\
+: {"type": "Point", "coordinates": [2.0, 1.0]}}], "crs": {"type": "name",
"properti\
+es": {"name": "urn:ogc:def:crs:EPSG::3857"}}}'
+ Alternatively, you can write GeoJSON to file:
+ >>> gdf.to_file(path, driver="GeoJSON") # doctest: +SKIP
+ See also
+ --------
+ GeoDataFrame.to_file : write GeoDataFrame to file
+ """
+ # Because this function returns the geojson string in memory,
+ # we simply rely on geopandas's implementation.
+ # Additionally, spark doesn't seem to have a straight forward way to
get the string
+ # without writing to a file first by using
sdf.write.format("geojson").save(path, **kwargs)
+ # return self.to_geopandas().to_json(na, show_bbox, drop_id, to_wgs84,
**kwargs)
+ # ST_AsGeoJSON() works only for one column
+ result = self.to_geopandas()
+ return result.to_json(na, show_bbox, drop_id, to_wgs84, **kwargs)
+
+ @property
+ def __geo_interface__(self) -> dict:
+ raise NotImplementedError("__geo_interface__() is not implemented
yet.")
+
+ def iterfeatures(
+ self, na: str = "null", show_bbox: bool = False, drop_id: bool = False
+ ) -> typing.Generator[dict]:
+ raise NotImplementedError("iterfeatures() is not implemented yet.")
+
+ def to_geo_dict(
+ self, na: str | None = "null", show_bbox: bool = False, drop_id: bool
= False
+ ) -> dict:
+ raise NotImplementedError("to_geo_dict() is not implemented yet.")
+
+ def to_wkb(self, hex: bool = False, **kwargs) -> pd.DataFrame:
+ raise NotImplementedError("to_wkb() is not implemented yet.")
+
+ def to_wkt(self, **kwargs) -> pd.DataFrame:
+ raise NotImplementedError("to_wkt() is not implemented yet.")
+
+ def to_arrow(
+ self,
+ *,
+ index: bool | None = None,
+ geometry_encoding="WKB",
+ interleaved: bool = True,
+ include_z: bool | None = None,
+ ):
+ """Encode a GeoDataFrame to GeoArrow format.
+ See https://geoarrow.org/ for details on the GeoArrow specification.
+ This function returns a generic Arrow data object implementing
+ the `Arrow PyCapsule Protocol`_ (i.e. having an ``__arrow_c_stream__``
+ method). This object can then be consumed by your Arrow implementation
+ of choice that supports this protocol.
+ .. _Arrow PyCapsule Protocol:
https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
+
+ Note: Requires geopandas versions >= 1.0.0 to use with Sedona.
+
+ Parameters
+ ----------
+ index : bool, default None
+ If ``True``, always include the dataframe's index(es) as columns
+ in the file output.
+ If ``False``, the index(es) will not be written to the file.
+ If ``None``, the index(ex) will be included as columns in the file
+ output except `RangeIndex` which is stored as metadata only.
+
+ Note: Unlike in geopandas, ``None`` will include the index in the
column because Sedona always
+ converts `RangeIndex` into a general `Index`.
+
+ geometry_encoding : {'WKB', 'geoarrow' }, default 'WKB'
+ The GeoArrow encoding to use for the data conversion.
+ interleaved : bool, default True
+ Only relevant for 'geoarrow' encoding. If True, the geometries'
+ coordinates are interleaved in a single fixed size list array.
+ If False, the coordinates are stored as separate arrays in a
+ struct type.
+ include_z : bool, default None
+ Only relevant for 'geoarrow' encoding (for WKB, the dimensionality
+ of the individual geometries is preserved).
+ If False, return 2D geometries. If True, include the third
dimension
+ in the output (if a geometry has no third dimension, the
z-coordinates
+ will be NaN). By default, will infer the dimensionality from the
+ input geometries. Note that this inference can be unreliable with
+ empty geometries (for a guaranteed result, it is recommended to
+ specify the keyword).
+ Returns
+ -------
+ ArrowTable
+ A generic Arrow table object with geometry columns encoded to
+ GeoArrow.
+ Examples
+ --------
+ >>> from sedona.geopandas import GeoDataFrame
+ >>> from shapely.geometry import Point
+ >>> data = {'col1': ['name1', 'name2'], 'geometry': [Point(1, 2),
Point(2, 1)]}
+ >>> gdf = GeoDataFrame(data)
+ >>> gdf
+ col1 geometry
+ 0 name1 POINT (1 2)
+ 1 name2 POINT (2 1)
+ >>> arrow_table = gdf.to_arrow(index=False)
+ >>> arrow_table
+ <geopandas.io._geoarrow.ArrowTable object at ...>
+ The returned data object needs to be consumed by a library implementing
+ the Arrow PyCapsule Protocol. For example, wrapping the data as a
+ pyarrow.Table (requires pyarrow >= 14.0):
+ >>> import pyarrow as pa
+ >>> table = pa.table(arrow_table)
+ >>> table
+ pyarrow.Table
+ col1: string
+ geometry: binary
+ ----
+ col1: [["name1","name2"]]
+ geometry: [[0101000000000000000000F03F0000000000000040,\
+01010000000000000000000040000000000000F03F]]
+ """
+ # Because this function returns the arrow table in memory, we simply
rely on geopandas's implementation.
+ # This also returns a geopandas specific data type, which can be
converted to an actual pyarrow table,
+ # so there is no direct Sedona equivalent. This way we also get all of
the arguments implemented for free.
+ return self.to_geopandas().to_arrow(
+ index=index,
+ geometry_encoding=geometry_encoding,
+ interleaved=interleaved,
+ include_z=include_z,
+ )
+
+ def to_feather(
+ self,
+ path,
+ index: bool | None = None,
+ compression: str | None = None,
+ schema_version=None,
+ **kwargs,
+ ):
+ raise NotImplementedError("to_feather() is not implemented yet.")
+
+ def to_file(
+ self,
+ filename: str,
+ driver: str | None = None,
+ schema: dict | None = None,
+ index: bool | None = None,
+ **kwargs,
+ ):
+ raise NotImplementedError("to_file() is not implemented yet.")
+
@property
- def geom_type(self):
+ def geom_type(self) -> str:
# Implementation of the abstract method
raise NotImplementedError(
_not_implemented_error(
diff --git a/python/sedona/geopandas/geoseries.py
b/python/sedona/geopandas/geoseries.py
index 434c0ba315..5e50d3787c 100644
--- a/python/sedona/geopandas/geoseries.py
+++ b/python/sedona/geopandas/geoseries.py
@@ -46,6 +46,7 @@ from sedona.geopandas._typing import Label
from sedona.geopandas.base import GeoFrame
from sedona.geopandas.geodataframe import GeoDataFrame
from sedona.geopandas.sindex import SpatialIndex
+from packaging.version import parse as parse_version
from pyspark.pandas.internal import (
SPARK_DEFAULT_INDEX_NAME, # __index_level_0__
@@ -439,6 +440,8 @@ class GeoSeries(GeoFrame, pspd.Series):
fastpath=fastpath,
)
+ pd_series = pd_series.astype(object)
+
# initialize the parent class pyspark Series with the pandas Series
super().__init__(data=pd_series)
@@ -624,6 +627,10 @@ class GeoSeries(GeoFrame, pspd.Series):
curr_crs = self.crs
+ # If CRS is the same, do nothing
+ if curr_crs == crs:
+ return
+
if not allow_override and curr_crs is not None and not curr_crs == crs:
raise ValueError(
"The GeoSeries already has a CRS which is not equal to the
passed "
@@ -3621,7 +3628,55 @@ class GeoSeries(GeoFrame, pspd.Series):
@classmethod
def from_arrow(cls, arr, **kwargs) -> "GeoSeries":
- raise NotImplementedError("GeoSeries.from_arrow() is not implemented
yet.")
+ """
+ Construct a GeoSeries from a Arrow array object with a GeoArrow
+ extension type.
+
+ See https://geoarrow.org/ for details on the GeoArrow specification.
+
+ This functions accepts any Arrow array object implementing
+ the `Arrow PyCapsule Protocol`_ (i.e. having an ``__arrow_c_array__``
+ method).
+
+ .. _Arrow PyCapsule Protocol:
https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
+
+ Note: Requires geopandas versions >= 1.0.0 to use with Sedona.
+
+ Parameters
+ ----------
+ arr : pyarrow.Array, Arrow array
+ Any array object implementing the Arrow PyCapsule Protocol
+ (i.e. has an ``__arrow_c_array__`` or ``__arrow_c_stream__``
+ method). The type of the array should be one of the
+ geoarrow geometry types.
+ **kwargs
+ Other parameters passed to the GeoSeries constructor.
+
+ Returns
+ -------
+ GeoSeries
+
+ See Also
+ --------
+ GeoSeries.to_arrow
+ GeoDataFrame.from_arrow
+
+ Examples
+ --------
+
+ >>> from sedona.geopandas import GeoSeries
+ >>> import geoarrow.pyarrow as ga
+ >>> array = ga.as_geoarrow([None, "POLYGON ((0 0, 1 1, 0 1, 0 0))",
"LINESTRING (0 0, -1 1, 0 -1)"])
+ >>> geoseries = GeoSeries.from_arrow(array)
+ >>> geoseries
+ 0 None
+ 1 POLYGON ((0 0, 1 1, 0 1, 0 0))
+ 2 LINESTRING (0 0, -1 1, 0 -1)
+ dtype: geometry
+
+ """
+ gpd_series = gpd.GeoSeries.from_arrow(arr, **kwargs)
+ return GeoSeries(gpd_series)
@classmethod
def _create_from_select(
@@ -4210,7 +4265,56 @@ class GeoSeries(GeoFrame, pspd.Series):
to_wgs84: bool = False,
**kwargs,
) -> str:
- raise NotImplementedError("GeoSeries.to_json() is not implemented
yet.")
+ """
+ Returns a GeoJSON string representation of the GeoSeries.
+
+ Parameters
+ ----------
+ show_bbox : bool, optional, default: True
+ Include bbox (bounds) in the geojson
+ drop_id : bool, default: False
+ Whether to retain the index of the GeoSeries as the id property
+ in the generated GeoJSON. Default is False, but may want True
+ if the index is just arbitrary row numbers.
+ to_wgs84: bool, optional, default: False
+ If the CRS is set on the active geometry column it is exported as
+ WGS84 (EPSG:4326) to meet the `2016 GeoJSON specification
+ <https://tools.ietf.org/html/rfc7946>`_.
+ Set to True to force re-projection and set to False to ignore CRS.
False by
+ default.
+
+ *kwargs* that will be passed to json.dumps().
+
+ Returns
+ -------
+ JSON string
+
+ Examples
+ --------
+ >>> from sedona.geopandas import GeoSeries
+ >>> from shapely.geometry import Point
+ >>> s = GeoSeries([Point(1, 1), Point(2, 2), Point(3, 3)])
+ >>> s
+ 0 POINT (1 1)
+ 1 POINT (2 2)
+ 2 POINT (3 3)
+ dtype: geometry
+
+ >>> s.to_json()
+ '{"type": "FeatureCollection", "features": [{"id": "0", "type":
"Feature", "pr\
+operties": {}, "geometry": {"type": "Point", "coordinates": [1.0, 1.0]},
"bbox": [1.0,\
+ 1.0, 1.0, 1.0]}, {"id": "1", "type": "Feature", "properties": {}, "geometry":
{"type"\
+: "Point", "coordinates": [2.0, 2.0]}, "bbox": [2.0, 2.0, 2.0, 2.0]}, {"id":
"2", "typ\
+e": "Feature", "properties": {}, "geometry": {"type": "Point", "coordinates":
[3.0, 3.\
+0]}, "bbox": [3.0, 3.0, 3.0, 3.0]}], "bbox": [1.0, 1.0, 3.0, 3.0]}'
+
+ See Also
+ --------
+ GeoSeries.to_file : write GeoSeries to file
+ """
+ return self._to_geoframe(name="geometry").to_json(
+ na="null", show_bbox=show_bbox, drop_id=drop_id,
to_wgs84=to_wgs84, **kwargs
+ )
def to_wkb(self, hex: bool = False, **kwargs) -> pspd.Series:
"""
@@ -4313,7 +4417,79 @@ class GeoSeries(GeoFrame, pspd.Series):
)
def to_arrow(self, geometry_encoding="WKB", interleaved=True,
include_z=None):
- raise NotImplementedError("GeoSeries.to_arrow() is not implemented
yet.")
+ """Encode a GeoSeries to GeoArrow format.
+
+ See https://geoarrow.org/ for details on the GeoArrow specification.
+
+ This functions returns a generic Arrow array object implementing
+ the `Arrow PyCapsule Protocol`_ (i.e. having an ``__arrow_c_array__``
+ method). This object can then be consumed by your Arrow implementation
+ of choice that supports this protocol.
+
+ .. _Arrow PyCapsule Protocol:
https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
+
+ Note: Requires geopandas versions >= 1.0.0 to use with Sedona.
+
+ Parameters
+ ----------
+ geometry_encoding : {'WKB', 'geoarrow' }, default 'WKB'
+ The GeoArrow encoding to use for the data conversion.
+ interleaved : bool, default True
+ Only relevant for 'geoarrow' encoding. If True, the geometries'
+ coordinates are interleaved in a single fixed size list array.
+ If False, the coordinates are stored as separate arrays in a
+ struct type.
+ include_z : bool, default None
+ Only relevant for 'geoarrow' encoding (for WKB, the dimensionality
+ of the individual geometries is preserved).
+ If False, return 2D geometries. If True, include the third
dimension
+ in the output (if a geometry has no third dimension, the
z-coordinates
+ will be NaN). By default, will infer the dimensionality from the
+ input geometries. Note that this inference can be unreliable with
+ empty geometries (for a guaranteed result, it is recommended to
+ specify the keyword).
+
+ Returns
+ -------
+ GeoArrowArray
+ A generic Arrow array object with geometry data encoded to
GeoArrow.
+
+ Examples
+ --------
+ >>> from sedona.geopandas import GeoSeries
+ >>> from shapely.geometry import Point
+ >>> gser = GeoSeries([Point(1, 2), Point(2, 1)])
+ >>> gser
+ 0 POINT (1 2)
+ 1 POINT (2 1)
+ dtype: geometry
+
+ >>> arrow_array = gser.to_arrow()
+ >>> arrow_array
+ <geopandas.io._geoarrow.GeoArrowArray object at ...>
+
+ The returned array object needs to be consumed by a library
implementing
+ the Arrow PyCapsule Protocol. For example, wrapping the data as a
+ pyarrow.Array (requires pyarrow >= 14.0):
+
+ >>> import pyarrow as pa
+ >>> array = pa.array(arrow_array)
+ >>> array
+ <pyarrow.lib.BinaryArray object at ...>
+ [
+ 0101000000000000000000F03F0000000000000040,
+ 01010000000000000000000040000000000000F03F
+ ]
+
+ """
+ # Because this function returns the arrow array in memory, we simply
rely on geopandas's implementation.
+ # This also returns a geopandas specific data type, which can be
converted to an actual pyarrow array,
+ # so there is no direct Sedona equivalent. This way we also get all of
the arguments implemented for free.
+ return self.to_geopandas().to_arrow(
+ geometry_encoding=geometry_encoding,
+ interleaved=interleaved,
+ include_z=include_z,
+ )
def clip(self, mask, keep_geom_type: bool = False, sort=False) ->
"GeoSeries":
raise NotImplementedError(
@@ -4349,6 +4525,15 @@ class GeoSeries(GeoFrame, pspd.Series):
else:
return value, False
+ def _to_geoframe(self, name=None):
+ if name is not None:
+ renamed = self.rename(name)
+ elif self._column_label is None:
+ renamed = self.rename("geometry")
+ else:
+ renamed = self
+ return GeoDataFrame(pspd.DataFrame(renamed._internal))
+
# -----------------------------------------------------------------------------
# # Utils
diff --git a/python/sedona/geopandas/tools/sjoin.py
b/python/sedona/geopandas/tools/sjoin.py
index e0dcd8921c..a62529475e 100644
--- a/python/sedona/geopandas/tools/sjoin.py
+++ b/python/sedona/geopandas/tools/sjoin.py
@@ -166,7 +166,8 @@ def _frame_join(
final_columns = []
# Add geometry column (always from left for geopandas compatibility)
- final_columns.append("l_geometry as geometry")
+ # Currently, Sedona stores geometries in EWKB format
+ final_columns.append("ST_AsEWKB(l_geometry) as geometry")
# Add other columns with suffix handling
left_data_cols = [col for col in left_geo_df.columns if col !=
"l_geometry"]
diff --git a/python/tests/geopandas/test_geodataframe.py
b/python/tests/geopandas/test_geodataframe.py
index 4857435946..cceaa25af9 100644
--- a/python/tests/geopandas/test_geodataframe.py
+++ b/python/tests/geopandas/test_geodataframe.py
@@ -19,7 +19,14 @@ import tempfile
from shapely.geometry import (
Point,
+ LineString,
Polygon,
+ GeometryCollection,
+ MultiPoint,
+ MultiLineString,
+ MultiPolygon,
+ LinearRing,
+ box,
)
import shapely
@@ -51,7 +58,8 @@ class TestDataframe(TestGeopandasBase):
],
)
def test_constructor(self, obj):
- sgpd_df = GeoDataFrame(obj)
+ with self.ps_allow_diff_frames():
+ sgpd_df = GeoDataFrame(obj)
check_geodataframe(sgpd_df)
@pytest.mark.parametrize(
@@ -320,7 +328,6 @@ class TestDataframe(TestGeopandasBase):
def test_buffer(self):
# Create a GeoDataFrame with geometries to test buffer operation
- from shapely.geometry import Polygon, Point
# Create input geometries
point = Point(0, 0)
@@ -353,6 +360,109 @@ class TestDataframe(TestGeopandasBase):
# Check that square buffer area is greater than original (1.0)
assert areas[1] > 1.0
+ def test_to_parquet(self):
+ pass
+
+ def test_from_arrow(self):
+ if parse_version(gpd.__version__) < parse_version("1.0.0"):
+ return
+
+ import pyarrow as pa
+
+ table = pa.table({"a": [0, 1, 2], "b": [0.1, 0.2, 0.3]})
+ with pytest.raises(ValueError, match="No geometry column found"):
+ GeoDataFrame.from_arrow(table)
+
+ gdf = gpd.GeoDataFrame(
+ {
+ "col": [1, 2, 3, 4],
+ "geometry": [
+ LineString([(0, 0), (1, 1)]),
+ box(0, 0, 10, 10),
+ Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]),
+ Point(1, 1),
+ ],
+ }
+ )
+
+ result = GeoDataFrame.from_arrow(gdf.to_arrow())
+ self.check_sgpd_df_equals_gpd_df(result, gdf)
+
+ gdf = gpd.GeoDataFrame(
+ {
+ "col": ["a", "b", "c", "d"],
+ "geometry": [
+ Point(1, 1),
+ Polygon(),
+ LineString([(0, 0), (1, 1)]),
+ None,
+ ],
+ }
+ )
+
+ result = GeoDataFrame.from_arrow(gdf.to_arrow())
+
+ self.check_sgpd_df_equals_gpd_df(result, gdf)
+
+ def test_to_json(self):
+ import json
+
+ d = {"col1": ["name1", "name2"], "geometry": [Point(1, 2), Point(2,
1)]}
+
+ # Currently, adding the crs information later requires us to join
across partitions
+ with self.ps_allow_diff_frames():
+ gdf = GeoDataFrame(d, crs="EPSG:3857")
+
+ result = gdf.to_json()
+
+ obj = json.loads(result)
+ assert obj["type"] == "FeatureCollection"
+ assert obj["features"][0]["geometry"]["type"] == "Point"
+ assert obj["features"][0]["geometry"]["coordinates"] == [1.0, 2.0]
+ assert obj["features"][1]["geometry"]["type"] == "Point"
+ assert obj["features"][1]["geometry"]["coordinates"] == [2.0, 1.0]
+ assert obj["crs"]["type"] == "name"
+ assert obj["crs"]["properties"]["name"] == "urn:ogc:def:crs:EPSG::3857"
+
+ expected = '{"type": "FeatureCollection", "features": [{"id": "0",
"type": "Feature", \
+"properties": {"col1": "name1"}, "geometry": {"type": "Point", "coordinates":
[1.0,\
+ 2.0]}}, {"id": "1", "type": "Feature", "properties": {"col1": "name2"},
"geometry"\
+: {"type": "Point", "coordinates": [2.0, 1.0]}}], "crs": {"type": "name",
"properti\
+es": {"name": "urn:ogc:def:crs:EPSG::3857"}}}'
+ assert result == expected, f"Expected {expected}, but got {result}"
+
+ def test_to_arrow(self):
+ if parse_version(gpd.__version__) < parse_version("1.0.0"):
+ return
+
+ import pyarrow as pa
+ from geopandas.testing import assert_geodataframe_equal
+
+ data = {"col1": ["name1", "name2"], "geometry": [Point(1, 2), Point(2,
1)]}
+
+ # Ensure index is not preserved for index=False
+ sgpd_df = GeoDataFrame(data, index=pd.Index([1, 2]))
+ result = pa.table(sgpd_df.to_arrow(index=False))
+
+ expected = gpd.GeoDataFrame(data)
+
+ # Ensure we can read it from using geopandas
+ gpd_df = gpd.GeoDataFrame.from_arrow(result)
+ assert_geodataframe_equal(gpd_df, expected)
+
+ # Ensure we can read it using sedona geopandas
+ sgpd_df = GeoDataFrame.from_arrow(result)
+ self.check_sgpd_df_equals_gpd_df(sgpd_df, expected)
+
+ # Ensure index is preserved for index=True
+ sgpd_df = GeoDataFrame(data, index=pd.Index([1, 2]))
+ result = pa.table(sgpd_df.to_arrow(index=True))
+
+ expected = gpd.GeoDataFrame(data, pd.Index([1, 2]))
+
+ gpd_df = gpd.GeoDataFrame.from_arrow(result)
+ assert_geodataframe_equal(gpd_df, expected)
+
# -----------------------------------------------------------------------------
# # Utils
diff --git a/python/tests/geopandas/test_geopandas_base.py
b/python/tests/geopandas/test_geopandas_base.py
index a772b273a6..d30c3dbb5f 100644
--- a/python/tests/geopandas/test_geopandas_base.py
+++ b/python/tests/geopandas/test_geopandas_base.py
@@ -47,9 +47,10 @@ class TestGeopandasBase(TestBase):
# TODO chore: rename to check_sgpd_series_equals_gpd_series and change the
names in the geoseries tests
@classmethod
def check_sgpd_equals_gpd(cls, actual: GeoSeries, expected: gpd.GeoSeries):
- assert isinstance(actual, GeoSeries)
- assert isinstance(expected, gpd.GeoSeries)
+ assert isinstance(actual, GeoSeries), "result is not a sgpd.GeoSeries"
+ assert isinstance(expected, gpd.GeoSeries), "expected is not a
gpd.GeoSeries"
sgpd_result = actual.to_geopandas()
+ assert len(sgpd_result) == len(expected), "results are of different
lengths"
for a, e in zip(sgpd_result, expected):
if a is None or e is None:
assert a is None and e is None
@@ -65,27 +66,39 @@ class TestGeopandasBase(TestBase):
def check_sgpd_df_equals_gpd_df(
cls, actual: GeoDataFrame, expected: gpd.GeoDataFrame
):
- assert isinstance(actual, GeoDataFrame)
- assert isinstance(expected, gpd.GeoDataFrame)
+ assert isinstance(actual, GeoDataFrame), "result is not a
sgpd.GeoDataFrame"
+ assert isinstance(
+ expected, gpd.GeoDataFrame
+ ), "expected is not a gpd.GeoDataFrame"
assert len(actual.columns) == len(expected.columns)
for col_name in actual.keys():
actual_series, expected_series = actual[col_name],
expected[col_name]
if isinstance(actual_series, GeoSeries):
- assert isinstance(actual_series, GeoSeries)
+ assert isinstance(
+ actual_series, GeoSeries
+ ), f"result[{col_name}] series is not a sgpd.GeoSeries"
# original geopandas does not guarantee a GeoSeries will be
returned, so convert it here
expected_series = gpd.GeoSeries(expected_series)
cls.check_sgpd_equals_gpd(actual_series, expected_series)
else:
- assert isinstance(actual_series, ps.Series)
- assert isinstance(expected_series, pd.Series)
+ assert isinstance(
+ actual_series, ps.Series
+ ), f"result[{col_name}] series is not a ps.Series"
+ assert isinstance(
+ expected_series, pd.Series
+ ), f"expected[{col_name}] series is not a pd.Series"
cls.check_pd_series_equal(actual_series, expected_series)
@classmethod
def check_pd_series_equal(cls, actual: ps.Series, expected: pd.Series):
- assert isinstance(actual, ps.Series)
- assert isinstance(expected, pd.Series)
+ assert isinstance(actual, ps.Series), "result series is not a
ps.Series"
+ assert isinstance(expected, pd.Series), "expected series is not a
pd.Series"
assert_series_equal(actual.to_pandas(), expected)
+ @classmethod
+ def contains_any_geom_collection(cls, geoms) -> bool:
+ return any(isinstance(g, GeometryCollection) for g in geoms)
+
@contextmanager
def ps_allow_diff_frames(self):
"""
diff --git a/python/tests/geopandas/test_geoseries.py
b/python/tests/geopandas/test_geoseries.py
index 66d6b75d11..99b0fb3039 100644
--- a/python/tests/geopandas/test_geoseries.py
+++ b/python/tests/geopandas/test_geoseries.py
@@ -180,7 +180,20 @@ class TestGeoSeries(TestGeopandasBase):
pass
def test_from_arrow(self):
- pass
+ if parse_version(gpd.__version__) < parse_version("1.0.0"):
+ return
+
+ import pyarrow as pa
+
+ table = pa.table({"a": [0, 1, 2], "b": [0.1, 0.2, 0.3]})
+ with pytest.raises(ValueError, match="No GeoArrow geometry field
found"):
+ GeoSeries.from_arrow(table["a"].chunk(0))
+
+ gpd_series = gpd.GeoSeries(
+ [Point(1, 1), Polygon(), LineString([(0, 0), (1, 1)]), None]
+ )
+ result = sgpd.GeoSeries.from_arrow(gpd_series.to_arrow())
+ self.check_sgpd_equals_gpd(result, gpd_series)
def test_to_file(self):
pass
@@ -349,7 +362,34 @@ class TestGeoSeries(TestGeopandasBase):
sgpd.GeoSeries([Polygon([(0, 90), (1, 90), (2,
90)])]).estimate_utm_crs()
def test_to_json(self):
- pass
+ s = GeoSeries([Point(1, 1), Point(2, 2), Point(3, 3)])
+
+ # TODO: optimize this away
+ with self.ps_allow_diff_frames():
+ result = s.to_json()
+ expected = '{"type": "FeatureCollection", "features": [{"id": "0",
"type": "Feature", "pr\
+operties": {}, "geometry": {"type": "Point", "coordinates": [1.0, 1.0]},
"bbox": [1.0,\
+ 1.0, 1.0, 1.0]}, {"id": "1", "type": "Feature", "properties": {}, "geometry":
{"type"\
+: "Point", "coordinates": [2.0, 2.0]}, "bbox": [2.0, 2.0, 2.0, 2.0]}, {"id":
"2", "typ\
+e": "Feature", "properties": {}, "geometry": {"type": "Point", "coordinates":
[3.0, 3.\
+0]}, "bbox": [3.0, 3.0, 3.0, 3.0]}], "bbox": [1.0, 1.0, 3.0, 3.0]}'
+
+ assert result == expected
+
+ with self.ps_allow_diff_frames():
+ result = s.to_json(show_bbox=True)
+ expected = '{"type": "FeatureCollection", "features": [{"id": "0",
"type": "Feature", "properties": {}, "geometry": {"type": "Point",
"coordinates": [1.0, 1.0]}, "bbox": [1.0, 1.0, 1.0, 1.0]}, {"id": "1", "type":
"Feature", "properties": {}, "geometry": {"type": "Point", "coordinates": [2.0,
2.0]}, "bbox": [2.0, 2.0, 2.0, 2.0]}, {"id": "2", "type": "Feature",
"properties": {}, "geometry": {"type": "Point", "coordinates": [3.0, 3.0]},
"bbox": [3.0, 3.0, 3.0, 3.0]}], "bbox": [1 [...]
+ assert result == expected
+
+ with self.ps_allow_diff_frames():
+ result = s.to_json(drop_id=True)
+ expected = '{"type": "FeatureCollection", "features": [{"type":
"Feature", "properties": {}, "geometry": {"type": "Point", "coordinates": [1.0,
1.0]}, "bbox": [1.0, 1.0, 1.0, 1.0]}, {"type": "Feature", "properties": {},
"geometry": {"type": "Point", "coordinates": [2.0, 2.0]}, "bbox": [2.0, 2.0,
2.0, 2.0]}, {"type": "Feature", "properties": {}, "geometry": {"type": "Point",
"coordinates": [3.0, 3.0]}, "bbox": [3.0, 3.0, 3.0, 3.0]}], "bbox": [1.0, 1.0,
3.0, 3.0]}'
+ assert result == expected
+
+ with self.ps_allow_diff_frames():
+ result = s.set_crs("EPSG:3857").to_json(to_wgs84=True)
+ expected = '{"type": "FeatureCollection", "features": [{"id": "0",
"type": "Feature", "properties": {}, "geometry": {"type": "Point",
"coordinates": [8.983152841195214e-06, 8.983152841195177e-06]}, "bbox":
[8.983152841195214e-06, 8.983152841195177e-06, 8.983152841195214e-06,
8.983152841195177e-06]}, {"id": "1", "type": "Feature", "properties": {},
"geometry": {"type": "Point", "coordinates": [1.7966305682390428e-05,
1.7966305682390134e-05]}, "bbox": [1.7966305682390428e-05, 1 [...]
+ assert result == expected
def test_to_wkb(self):
if parse_version(shapely.__version__) < parse_version("2.0.0"):
@@ -421,7 +461,24 @@ class TestGeoSeries(TestGeopandasBase):
self.check_pd_series_equal(result, expected)
def test_to_arrow(self):
- pass
+ if parse_version(gpd.__version__) < parse_version("1.0.0"):
+ return
+
+ import pyarrow as pa
+
+ gser = GeoSeries([Point(1, 2), Point(2, 1)])
+ # TODO: optimize this away
+ with self.ps_allow_diff_frames():
+ arrow_array = gser.to_arrow()
+ result = pa.array(arrow_array)
+
+ expected = [
+ "0101000000000000000000F03F0000000000000040",
+ "01010000000000000000000040000000000000F03F",
+ ]
+ expected = pa.array([bytes.fromhex(x) for x in expected],
type=pa.binary())
+
+ assert result.equals(expected)
def test_clip(self):
pass
diff --git a/python/tests/geopandas/test_match_geopandas_dataframe.py
b/python/tests/geopandas/test_match_geopandas_dataframe.py
index 4dd7714c98..8b80f9c7dc 100644
--- a/python/tests/geopandas/test_match_geopandas_dataframe.py
+++ b/python/tests/geopandas/test_match_geopandas_dataframe.py
@@ -176,3 +176,120 @@ class TestMatchGeopandasDataFrame(TestGeopandasBase):
sgpd_df = sgpd_df.rename_geometry("name1")
gpd_df = gpd_df.rename_geometry("name2")
assert sgpd_df.geometry.name != gpd_df.geometry.name
+
+ def test_to_json(self):
+ tests = [
+ {
+ "a": [1, 2, 3],
+ "b": ["4", "5", "6"],
+ "geometry": [
+ Point(1, 2),
+ Point(2, 1),
+ Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]),
+ ],
+ },
+ {
+ "a": [1, 2, 3],
+ "b": ["4", "5", "6"],
+ "geometry": [
+ LineString([(0, 0), (1, 1)]),
+ GeometryCollection(Point()),
+ Point(2, 1),
+ ],
+ },
+ {
+ "a": [1, 2, 3],
+ "b": ["4", "5", "6"],
+ "geometry": [Polygon(), Point(1, 2), None],
+ },
+ ]
+
+ for data in tests:
+ # TODO: Try to optimize this with self.ps_allow_diff_frames() away
+ with self.ps_allow_diff_frames():
+ sgpd_result = GeoDataFrame(data).to_json()
+ gpd_result = gpd.GeoDataFrame(data).to_json()
+ assert sgpd_result == gpd_result
+
+ # test different json args
+ data = {
+ "a": [1, 2, 3],
+ "b": [4, 5, 6],
+ "geometry": [Point(1, 2), Point(2, 1), LineString([(0, 0), (1,
1)])],
+ }
+ tests = [
+ {"na": "drop"},
+ {"na": "keep"},
+ {"show_bbox": True},
+ {"drop_id": True},
+ {"to_wgs84": True},
+ {"na": "drop", "show_bbox": True, "drop_id": True, "to_wgs84":
True},
+ ]
+ for kwargs in tests:
+ # TODO: Try to optimize this with self.ps_allow_diff_frames() away
+ with self.ps_allow_diff_frames():
+ sgpd_result = GeoDataFrame(data,
crs="EPSG:3857").to_json(**kwargs)
+ gpd_result = gpd.GeoDataFrame(data,
crs="EPSG:3857").to_json(**kwargs)
+ assert sgpd_result == gpd_result
+
+ def test_from_arrow(self):
+ if parse_version(gpd.__version__) < parse_version("1.0.0"):
+ return
+
+ gdf = gpd.GeoDataFrame(
+ {
+ "ints": [1, 2, 3, 4],
+ "strings": ["a", "b", "c", "d"],
+ "bools": [True, False, True, False],
+ "geometry": [
+ Point(0, 1),
+ LineString([(0, 0), (1, 1)]),
+ Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]),
+ Point(1, 1),
+ ],
+ }
+ )
+
+ # TODO: optimize this away
+ with self.ps_allow_diff_frames():
+ sgpd_result = GeoDataFrame.from_arrow(gdf.to_arrow())
+ gpd_result = gpd.GeoDataFrame.from_arrow(gdf.to_arrow())
+ self.check_sgpd_df_equals_gpd_df(sgpd_result, gpd_result)
+
+ def test_to_arrow(self):
+ if parse_version(gpd.__version__) < parse_version("1.0.0"):
+ return
+
+ import pyarrow as pa
+ import pandas as pd
+
+ data = {
+ "a": [1, 2, 3],
+ "b": [4, 5, 6],
+ "geometry": [Point(1, 2), Point(2, 1), LineString([(0, 0), (1,
1)])],
+ }
+
+ # TODO: Try to optimize this with self.ps_allow_diff_frames() away
+ with self.ps_allow_diff_frames():
+ sgpd_result = pa.table(GeoDataFrame(data).to_arrow(index=False))
+ gpd_result = pa.table(gpd.GeoDataFrame(data).to_arrow(index=False))
+
+ assert sgpd_result.equals(gpd_result)
+
+ # TODO: Try to optimize this with self.ps_allow_diff_frames() away
+ with self.ps_allow_diff_frames():
+ sgpd_result = pa.table(
+ GeoDataFrame(
+ data, index=pd.RangeIndex(start=0, stop=3, step=1)
+ ).to_arrow(index=True)
+ )
+ gpd_result = pa.table(
+ gpd.GeoDataFrame(
+ data, index=pd.RangeIndex(start=0, stop=3, step=1)
+ ).to_arrow(index=True)
+ )
+
+ assert sgpd_result.equals(gpd_result)
+
+ # Note: Results for not specifying index=True or index=False for
to_arrow is expected to be different
+ # from geopandas. See the to_arrow docstring for more details.
diff --git a/python/tests/geopandas/test_match_geopandas_series.py
b/python/tests/geopandas/test_match_geopandas_series.py
index df4641d54b..0b6ff9e00f 100644
--- a/python/tests/geopandas/test_match_geopandas_series.py
+++ b/python/tests/geopandas/test_match_geopandas_series.py
@@ -282,7 +282,15 @@ class TestMatchGeopandasSeries(TestGeopandasBase):
pass
def test_from_arrow(self):
- pass
+ if parse_version(gpd.__version__) < parse_version("1.0.0"):
+ return
+
+ for _, geom in self.geoms:
+ gpd_series = gpd.GeoSeries(geom)
+ gpd_result = gpd.GeoSeries.from_arrow(gpd_series.to_arrow())
+
+ sgpd_result = GeoSeries.from_arrow(gpd_series.to_arrow())
+ self.check_sgpd_equals_gpd(sgpd_result, gpd_result)
def test_to_file(self):
pass
@@ -371,7 +379,10 @@ class TestMatchGeopandasSeries(TestGeopandasBase):
assert sgpd_result == gpd_result
def test_to_json(self):
- pass
+ for _, geom in self.geoms:
+ sgpd_result = GeoSeries(geom).to_json()
+ gpd_result = gpd.GeoSeries(geom).to_json()
+ assert sgpd_result == gpd_result
def test_to_wkb(self):
for _, geom in self.geoms:
@@ -395,7 +406,15 @@ class TestMatchGeopandasSeries(TestGeopandasBase):
self.check_sgpd_equals_gpd(sgpd_result, gpd_result)
def test_to_arrow(self):
- pass
+ if parse_version(gpd.__version__) < parse_version("1.0.0"):
+ return
+
+ import pyarrow as pa
+
+ for _, geom in self.geoms:
+ sgpd_result = pa.array(GeoSeries(geom).to_arrow())
+ gpd_result = pa.array(gpd.GeoSeries(geom).to_arrow())
+ assert sgpd_result == gpd_result
def test_clip(self):
pass