(sedona) branch master updated: [GH-2104] Geopandas: Implement `to_json`, `to_arrow`, `from_arrow` (#2105)

jiayu Tue, 22 Jul 2025 23:53:19 -0700

This is an automated email from the ASF dual-hosted git repository.

jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sedona.git



The following commit(s) were added to refs/heads/master by this push:
     new 475c1687f9 [GH-2104] Geopandas: Implement `to_json`, `to_arrow`, 
`from_arrow` (#2105)
475c1687f9 is described below

commit 475c1687f997719c3fe787046f9063a7fcebc3d7
Author: Peter Nguyen <[email protected]>
AuthorDate: Tue Jul 22 23:52:17 2025 -0700

    [GH-2104] Geopandas: Implement `to_json`, `to_arrow`, `from_arrow` (#2105)
    
    * Implement to_json and to_arrow
    
    * Implement them for GeoSeries too
    
    * Fix StopIteration error to_geopandas() in test_boundary
    
    * Skip to_arrow for geopandas versions < 1.0.0, and add note in doc
    
    * fix skip tests
    
    * Skip using get_srid for shapely version < 2.0.0
    
    * Add from_arrow to both series and dataframe
    
    * Remove geoarrow dependency
    
    * Update python/sedona/geopandas/geoseries.py
    
    Co-authored-by: Copilot <[email protected]>
    
    * Remove debug comments
    
    Co-authored-by: Copilot <[email protected]>
    
    * Remove geometry collection skip in from_arrow
    
    * Fix tests after merge by selecting as EWKB in sjoin.py
    
    * Refactor to use self.name instead of first_geom column
    
    * Add a complex df test
    
    * Refactor series.py to use spark df api instead of spark sql
    
    * Fix after merge
    
    * Clean up
    
    * Remove tuple type annotation
    
    * More cleanup
    
    * Fix align in fillna
    
    * Add missing default geometry column logic to fix to_json
    
    * Fix tests after merge
    
    * Empty commit to retrigger ci
    
    * Remove null first geoms to avoid spark bug
    
    * Add back _to_geoframe
    
    * Cast to object in series and fix test
    
    * empty commit to retrigger ci
    
    ---------
    
    Co-authored-by: Copilot <[email protected]>
---
 python/sedona/geopandas/geodataframe.py            | 380 ++++++++++++++++++++-
 python/sedona/geopandas/geoseries.py               | 191 ++++++++++-
 python/sedona/geopandas/tools/sjoin.py             |   3 +-
 python/tests/geopandas/test_geodataframe.py        | 114 ++++++-
 python/tests/geopandas/test_geopandas_base.py      |  31 +-
 python/tests/geopandas/test_geoseries.py           |  63 +++-
 .../geopandas/test_match_geopandas_dataframe.py    | 117 +++++++
 .../tests/geopandas/test_match_geopandas_series.py |  25 +-
 8 files changed, 887 insertions(+), 37 deletions(-)

diff --git a/python/sedona/geopandas/geodataframe.py 
b/python/sedona/geopandas/geodataframe.py
index 60282a7c7f..6edf0bb741 100644
--- a/python/sedona/geopandas/geodataframe.py
+++ b/python/sedona/geopandas/geodataframe.py
@@ -19,6 +19,8 @@ from __future__ import annotations
 from typing import Any, Literal, Callable, Union
 import typing
 
+import os
+import shapely
 import warnings
 import numpy as np
 import shapely
@@ -412,6 +414,9 @@ class GeoDataFrame(GeoFrame, pspd.DataFrame):
         if isinstance(data, GeoDataFrame):
             if data._safe_get_crs() is None:
                 data.crs = crs
+
+            super().__init__(data, index=index, columns=columns, dtype=dtype, 
copy=copy)
+
         elif isinstance(data, GeoSeries):
             if data.crs is None:
                 data.crs = crs
@@ -469,11 +474,41 @@ class GeoDataFrame(GeoFrame, pspd.DataFrame):
                 copy=copy,
             )
 
+        # Set geometry column name
         if isinstance(data, (GeoDataFrame, gpd.GeoDataFrame)):
             self._geometry_column_name = data._geometry_column_name
             if crs is not None and data.crs != crs:
                 raise ValueError(crs_mismatch_error)
 
+        if geometry is None and "geometry" in self.columns:
+
+            if (self.columns == "geometry").sum() > 1:
+                raise ValueError(
+                    "GeoDataFrame does not support multiple columns "
+                    "using the geometry column name 'geometry'."
+                )
+
+            geometry: pspd.Series = self["geometry"]
+            if isinstance(geometry, sgpd.GeoSeries):
+                geom_crs = geometry.crs
+                if geom_crs is None:
+                    if crs is not None:
+                        geometry.set_crs(crs, inplace=True)
+                        self.set_geometry(geometry, inplace=True)
+                else:
+                    if crs is not None and geom_crs != crs:
+                        raise ValueError(crs_mismatch_error)
+
+            # No need to call set_geometry() here since it's already part of 
the df, just set the name
+            self._geometry_column_name = "geometry"
+
+        if geometry is None and crs:
+            raise ValueError(
+                "Assigning CRS to a GeoDataFrame without a geometry column is 
not "
+                "supported. Supply geometry using the 'geometry=' keyword 
argument, "
+                "or by providing a DataFrame with column name 'geometry'",
+            )
+
     # 
============================================================================
     # GEOMETRY COLUMN MANAGEMENT
     # 
============================================================================
@@ -842,23 +877,33 @@ class GeoDataFrame(GeoFrame, pspd.DataFrame):
         sdf = self._internal.spark_frame.selectExpr(*select_expressions)
         return GeoDataFrame(sdf)
 
-    def to_geopandas(self) -> gpd.GeoDataFrame | pd.Series:
-        # Implementation of the abstract method
-        raise NotImplementedError(
-            _not_implemented_error(
-                "to_geopandas",
-                "Converts to GeoPandas GeoDataFrame by collecting all data to 
driver.",
-            )
-        )
+    def to_geopandas(self) -> gpd.GeoDataFrame:
+        """
+        Note: Unlike in pandas and geopandas, Sedona will always return a 
general Index.
+        This differs from pandas and geopandas, which will return a RangeIndex 
by default.
 
-    def _to_geopandas(self) -> gpd.GeoDataFrame | pd.Series:
-        # Implementation of the abstract method
-        raise NotImplementedError(
-            _not_implemented_error(
-                "_to_geopandas",
-                "Internal method for GeoPandas conversion without logging 
warnings.",
-            )
+        e.g pd.Index([0, 1, 2]) instead of pd.RangeIndex(start=0, stop=3, 
step=1)
+        """
+        from pyspark.pandas.utils import log_advice
+
+        log_advice(
+            "`to_geopandas` loads all data into the driver's memory. "
+            "It should only be used if the resulting geopandas GeoSeries is 
expected to be small."
         )
+        return self._to_geopandas()
+
+    def _to_geopandas(self) -> gpd.GeoDataFrame:
+        pd_df = self._internal.to_pandas_frame
+
+        for col_name in pd_df.columns:
+            series: pspd.Series = self[col_name]
+            if isinstance(series, sgpd.GeoSeries):
+                # Use _to_geopandas instead of to_geopandas to avoid logging 
extra warnings
+                pd_df[col_name] = series._to_geopandas()
+            else:
+                pd_df[col_name] = series.to_pandas()
+
+        return gpd.GeoDataFrame(pd_df, geometry=self._geometry_column_name)
 
     @property
     def sindex(self) -> SpatialIndex | None:
@@ -952,8 +997,311 @@ class GeoDataFrame(GeoFrame, pspd.DataFrame):
             return
         self.geometry.crs = value
 
+    @classmethod
+    def from_dict(
+        cls,
+        data: dict,
+        geometry=None,
+        crs: Any | None = None,
+        **kwargs,
+    ) -> GeoDataFrame:
+        raise NotImplementedError("from_dict() is not implemented yet.")
+
+    @classmethod
+    def from_file(cls, filename: os.PathLike | typing.IO, **kwargs) -> 
GeoDataFrame:
+        raise NotImplementedError("from_file() is not implemented yet.")
+
+    @classmethod
+    def from_features(
+        cls, features, crs: Any | None = None, columns: Iterable[str] | None = 
None
+    ) -> GeoDataFrame:
+        raise NotImplementedError("from_features() is not implemented yet.")
+
+    @classmethod
+    def from_postgis(
+        cls,
+        sql: str | sqlalchemy.text,
+        con,
+        geom_col: str = "geom",
+        crs: Any | None = None,
+        index_col: str | list[str] | None = None,
+        coerce_float: bool = True,
+        parse_dates: list | dict | None = None,
+        params: list | tuple | dict | None = None,
+        chunksize: int | None = None,
+    ) -> GeoDataFrame:
+        raise NotImplementedError("from_postgis() is not implemented yet.")
+
+    @classmethod
+    def from_arrow(
+        cls, table, geometry: str | None = None, to_pandas_kwargs: dict | None 
= None
+    ):
+        """
+        Construct a GeoDataFrame from a Arrow table object based on GeoArrow
+        extension types.
+
+        See https://geoarrow.org/ for details on the GeoArrow specification.
+
+        This functions accepts any tabular Arrow object implementing
+        the `Arrow PyCapsule Protocol`_ (i.e. having an ``__arrow_c_array__``
+        or ``__arrow_c_stream__`` method).
+
+        .. _Arrow PyCapsule Protocol: 
https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
+
+        .. versionadded:: 1.0
+
+        Parameters
+        ----------
+        table : pyarrow.Table or Arrow-compatible table
+            Any tabular object implementing the Arrow PyCapsule Protocol
+            (i.e. has an ``__arrow_c_array__`` or ``__arrow_c_stream__``
+            method). This table should have at least one column with a
+            geoarrow geometry type.
+        geometry : str, default None
+            The name of the geometry column to set as the active geometry
+            column. If None, the first geometry column found will be used.
+        to_pandas_kwargs : dict, optional
+            Arguments passed to the `pa.Table.to_pandas` method for 
non-geometry
+            columns. This can be used to control the behavior of the 
conversion of the
+            non-geometry columns to a pandas DataFrame. For example, you can 
use this
+            to control the dtype conversion of the columns. By default, the 
`to_pandas`
+            method is called with no additional arguments.
+
+        Returns
+        -------
+        GeoDataFrame
+
+        See Also
+        --------
+        GeoDataFrame.to_arrow
+        GeoSeries.from_arrow
+
+        Examples
+        --------
+
+        >>> from sedona.geopandas import GeoDataFrame
+        >>> import geoarrow.pyarrow as ga
+        >>> import pyarrow as pa
+        >>> table = pa.Table.from_arrays([
+        ...     ga.as_geoarrow([None, "POLYGON ((0 0, 1 1, 0 1, 0 0))", 
"LINESTRING (0 0, -1 1, 0 -1)"]),
+        ...     pa.array([1, 2, 3]),
+        ...     pa.array(["a", "b", "c"]),
+        ... ], names=["geometry", "id", "value"])
+        >>> gdf = GeoDataFrame.from_arrow(table)
+        >>> gdf
+                                   geometry   id  value
+        0                              None    1      a
+        1    POLYGON ((0 0, 1 1, 0 1, 0 0))    2      b
+        2      LINESTRING (0 0, -1 1, 0 -1)    3      c
+        """
+        if to_pandas_kwargs is None:
+            to_pandas_kwargs = {}
+
+        gpd_df = gpd.GeoDataFrame.from_arrow(
+            table, geometry=geometry, **to_pandas_kwargs
+        )
+        return GeoDataFrame(gpd_df)
+
+    def to_json(
+        self,
+        na: Literal["null", "drop", "keep"] = "null",
+        show_bbox: bool = False,
+        drop_id: bool = False,
+        to_wgs84: bool = False,
+        **kwargs,
+    ) -> str:
+        """
+        Returns a GeoJSON representation of the ``GeoDataFrame`` as a string.
+        Parameters
+        ----------
+        na : {'null', 'drop', 'keep'}, default 'null'
+            Indicates how to output missing (NaN) values in the GeoDataFrame.
+            See below.
+        show_bbox : bool, optional, default: False
+            Include bbox (bounds) in the geojson
+        drop_id : bool, default: False
+            Whether to retain the index of the GeoDataFrame as the id property
+            in the generated GeoJSON. Default is False, but may want True
+            if the index is just arbitrary row numbers.
+        to_wgs84: bool, optional, default: False
+            If the CRS is set on the active geometry column it is exported as
+            WGS84 (EPSG:4326) to meet the `2016 GeoJSON specification
+            <https://tools.ietf.org/html/rfc7946>`_.
+            Set to True to force re-projection and set to False to ignore CRS. 
False by
+            default.
+        Notes
+        -----
+        The remaining *kwargs* are passed to json.dumps().
+        Missing (NaN) values in the GeoDataFrame can be represented as follows:
+        - ``null``: output the missing entries as JSON null.
+        - ``drop``: remove the property from the feature. This applies to each
+          feature individually so that features may have different properties.
+        - ``keep``: output the missing entries as NaN.
+        If the GeoDataFrame has a defined CRS, its definition will be included
+        in the output unless it is equal to WGS84 (default GeoJSON CRS) or not
+        possible to represent in the URN OGC format, or unless 
``to_wgs84=True``
+        is specified.
+        Examples
+        --------
+        >>> from sedona.geopandas import GeoDataFrame
+        >>> from shapely.geometry import Point
+        >>> d = {'col1': ['name1', 'name2'], 'geometry': [Point(1, 2), 
Point(2, 1)]}
+        >>> gdf = GeoDataFrame(d, crs="EPSG:3857")
+        >>> gdf
+            col1     geometry
+        0  name1  POINT (1 2)
+        1  name2  POINT (2 1)
+        >>> gdf.to_json()
+        '{"type": "FeatureCollection", "features": [{"id": "0", "type": 
"Feature", \
+"properties": {"col1": "name1"}, "geometry": {"type": "Point", "coordinates": 
[1.0,\
+ 2.0]}}, {"id": "1", "type": "Feature", "properties": {"col1": "name2"}, 
"geometry"\
+: {"type": "Point", "coordinates": [2.0, 1.0]}}], "crs": {"type": "name", 
"properti\
+es": {"name": "urn:ogc:def:crs:EPSG::3857"}}}'
+        Alternatively, you can write GeoJSON to file:
+        >>> gdf.to_file(path, driver="GeoJSON")  # doctest: +SKIP
+        See also
+        --------
+        GeoDataFrame.to_file : write GeoDataFrame to file
+        """
+        # Because this function returns the geojson string in memory,
+        # we simply rely on geopandas's implementation.
+        # Additionally, spark doesn't seem to have a straight forward way to 
get the string
+        # without writing to a file first by using 
sdf.write.format("geojson").save(path, **kwargs)
+        # return self.to_geopandas().to_json(na, show_bbox, drop_id, to_wgs84, 
**kwargs)
+        # ST_AsGeoJSON() works only for one column
+        result = self.to_geopandas()
+        return result.to_json(na, show_bbox, drop_id, to_wgs84, **kwargs)
+
+    @property
+    def __geo_interface__(self) -> dict:
+        raise NotImplementedError("__geo_interface__() is not implemented 
yet.")
+
+    def iterfeatures(
+        self, na: str = "null", show_bbox: bool = False, drop_id: bool = False
+    ) -> typing.Generator[dict]:
+        raise NotImplementedError("iterfeatures() is not implemented yet.")
+
+    def to_geo_dict(
+        self, na: str | None = "null", show_bbox: bool = False, drop_id: bool 
= False
+    ) -> dict:
+        raise NotImplementedError("to_geo_dict() is not implemented yet.")
+
+    def to_wkb(self, hex: bool = False, **kwargs) -> pd.DataFrame:
+        raise NotImplementedError("to_wkb() is not implemented yet.")
+
+    def to_wkt(self, **kwargs) -> pd.DataFrame:
+        raise NotImplementedError("to_wkt() is not implemented yet.")
+
+    def to_arrow(
+        self,
+        *,
+        index: bool | None = None,
+        geometry_encoding="WKB",
+        interleaved: bool = True,
+        include_z: bool | None = None,
+    ):
+        """Encode a GeoDataFrame to GeoArrow format.
+        See https://geoarrow.org/ for details on the GeoArrow specification.
+        This function returns a generic Arrow data object implementing
+        the `Arrow PyCapsule Protocol`_ (i.e. having an ``__arrow_c_stream__``
+        method). This object can then be consumed by your Arrow implementation
+        of choice that supports this protocol.
+        .. _Arrow PyCapsule Protocol: 
https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
+
+        Note: Requires geopandas versions >= 1.0.0 to use with Sedona.
+
+        Parameters
+        ----------
+        index : bool, default None
+            If ``True``, always include the dataframe's index(es) as columns
+            in the file output.
+            If ``False``, the index(es) will not be written to the file.
+            If ``None``, the index(ex) will be included as columns in the file
+            output except `RangeIndex` which is stored as metadata only.
+
+            Note: Unlike in geopandas, ``None`` will include the index in the 
column because Sedona always
+            converts `RangeIndex` into a general `Index`.
+
+        geometry_encoding : {'WKB', 'geoarrow' }, default 'WKB'
+            The GeoArrow encoding to use for the data conversion.
+        interleaved : bool, default True
+            Only relevant for 'geoarrow' encoding. If True, the geometries'
+            coordinates are interleaved in a single fixed size list array.
+            If False, the coordinates are stored as separate arrays in a
+            struct type.
+        include_z : bool, default None
+            Only relevant for 'geoarrow' encoding (for WKB, the dimensionality
+            of the individual geometries is preserved).
+            If False, return 2D geometries. If True, include the third 
dimension
+            in the output (if a geometry has no third dimension, the 
z-coordinates
+            will be NaN). By default, will infer the dimensionality from the
+            input geometries. Note that this inference can be unreliable with
+            empty geometries (for a guaranteed result, it is recommended to
+            specify the keyword).
+        Returns
+        -------
+        ArrowTable
+            A generic Arrow table object with geometry columns encoded to
+            GeoArrow.
+        Examples
+        --------
+        >>> from sedona.geopandas import GeoDataFrame
+        >>> from shapely.geometry import Point
+        >>> data = {'col1': ['name1', 'name2'], 'geometry': [Point(1, 2), 
Point(2, 1)]}
+        >>> gdf = GeoDataFrame(data)
+        >>> gdf
+            col1     geometry
+        0  name1  POINT (1 2)
+        1  name2  POINT (2 1)
+        >>> arrow_table = gdf.to_arrow(index=False)
+        >>> arrow_table
+        <geopandas.io._geoarrow.ArrowTable object at ...>
+        The returned data object needs to be consumed by a library implementing
+        the Arrow PyCapsule Protocol. For example, wrapping the data as a
+        pyarrow.Table (requires pyarrow >= 14.0):
+        >>> import pyarrow as pa
+        >>> table = pa.table(arrow_table)
+        >>> table
+        pyarrow.Table
+        col1: string
+        geometry: binary
+        ----
+        col1: [["name1","name2"]]
+        geometry: [[0101000000000000000000F03F0000000000000040,\
+01010000000000000000000040000000000000F03F]]
+        """
+        # Because this function returns the arrow table in memory, we simply 
rely on geopandas's implementation.
+        # This also returns a geopandas specific data type, which can be 
converted to an actual pyarrow table,
+        # so there is no direct Sedona equivalent. This way we also get all of 
the arguments implemented for free.
+        return self.to_geopandas().to_arrow(
+            index=index,
+            geometry_encoding=geometry_encoding,
+            interleaved=interleaved,
+            include_z=include_z,
+        )
+
+    def to_feather(
+        self,
+        path,
+        index: bool | None = None,
+        compression: str | None = None,
+        schema_version=None,
+        **kwargs,
+    ):
+        raise NotImplementedError("to_feather() is not implemented yet.")
+
+    def to_file(
+        self,
+        filename: str,
+        driver: str | None = None,
+        schema: dict | None = None,
+        index: bool | None = None,
+        **kwargs,
+    ):
+        raise NotImplementedError("to_file() is not implemented yet.")
+
     @property
-    def geom_type(self):
+    def geom_type(self) -> str:
         # Implementation of the abstract method
         raise NotImplementedError(
             _not_implemented_error(
diff --git a/python/sedona/geopandas/geoseries.py 
b/python/sedona/geopandas/geoseries.py
index 434c0ba315..5e50d3787c 100644
--- a/python/sedona/geopandas/geoseries.py
+++ b/python/sedona/geopandas/geoseries.py
@@ -46,6 +46,7 @@ from sedona.geopandas._typing import Label
 from sedona.geopandas.base import GeoFrame
 from sedona.geopandas.geodataframe import GeoDataFrame
 from sedona.geopandas.sindex import SpatialIndex
+from packaging.version import parse as parse_version
 
 from pyspark.pandas.internal import (
     SPARK_DEFAULT_INDEX_NAME,  # __index_level_0__
@@ -439,6 +440,8 @@ class GeoSeries(GeoFrame, pspd.Series):
                     fastpath=fastpath,
                 )
 
+            pd_series = pd_series.astype(object)
+
             # initialize the parent class pyspark Series with the pandas Series
             super().__init__(data=pd_series)
 
@@ -624,6 +627,10 @@ class GeoSeries(GeoFrame, pspd.Series):
 
         curr_crs = self.crs
 
+        # If CRS is the same, do nothing
+        if curr_crs == crs:
+            return
+
         if not allow_override and curr_crs is not None and not curr_crs == crs:
             raise ValueError(
                 "The GeoSeries already has a CRS which is not equal to the 
passed "
@@ -3621,7 +3628,55 @@ class GeoSeries(GeoFrame, pspd.Series):
 
     @classmethod
     def from_arrow(cls, arr, **kwargs) -> "GeoSeries":
-        raise NotImplementedError("GeoSeries.from_arrow() is not implemented 
yet.")
+        """
+        Construct a GeoSeries from a Arrow array object with a GeoArrow
+        extension type.
+
+        See https://geoarrow.org/ for details on the GeoArrow specification.
+
+        This functions accepts any Arrow array object implementing
+        the `Arrow PyCapsule Protocol`_ (i.e. having an ``__arrow_c_array__``
+        method).
+
+        .. _Arrow PyCapsule Protocol: 
https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
+
+        Note: Requires geopandas versions >= 1.0.0 to use with Sedona.
+
+        Parameters
+        ----------
+        arr : pyarrow.Array, Arrow array
+            Any array object implementing the Arrow PyCapsule Protocol
+            (i.e. has an ``__arrow_c_array__`` or ``__arrow_c_stream__``
+            method). The type of the array should be one of the
+            geoarrow geometry types.
+        **kwargs
+            Other parameters passed to the GeoSeries constructor.
+
+        Returns
+        -------
+        GeoSeries
+
+        See Also
+        --------
+        GeoSeries.to_arrow
+        GeoDataFrame.from_arrow
+
+        Examples
+        --------
+
+        >>> from sedona.geopandas import GeoSeries
+        >>> import geoarrow.pyarrow as ga
+        >>> array = ga.as_geoarrow([None, "POLYGON ((0 0, 1 1, 0 1, 0 0))", 
"LINESTRING (0 0, -1 1, 0 -1)"])
+        >>> geoseries = GeoSeries.from_arrow(array)
+        >>> geoseries
+        0                              None
+        1    POLYGON ((0 0, 1 1, 0 1, 0 0))
+        2      LINESTRING (0 0, -1 1, 0 -1)
+        dtype: geometry
+
+        """
+        gpd_series = gpd.GeoSeries.from_arrow(arr, **kwargs)
+        return GeoSeries(gpd_series)
 
     @classmethod
     def _create_from_select(
@@ -4210,7 +4265,56 @@ class GeoSeries(GeoFrame, pspd.Series):
         to_wgs84: bool = False,
         **kwargs,
     ) -> str:
-        raise NotImplementedError("GeoSeries.to_json() is not implemented 
yet.")
+        """
+        Returns a GeoJSON string representation of the GeoSeries.
+
+        Parameters
+        ----------
+        show_bbox : bool, optional, default: True
+            Include bbox (bounds) in the geojson
+        drop_id : bool, default: False
+            Whether to retain the index of the GeoSeries as the id property
+            in the generated GeoJSON. Default is False, but may want True
+            if the index is just arbitrary row numbers.
+        to_wgs84: bool, optional, default: False
+            If the CRS is set on the active geometry column it is exported as
+            WGS84 (EPSG:4326) to meet the `2016 GeoJSON specification
+            <https://tools.ietf.org/html/rfc7946>`_.
+            Set to True to force re-projection and set to False to ignore CRS. 
False by
+            default.
+
+        *kwargs* that will be passed to json.dumps().
+
+        Returns
+        -------
+        JSON string
+
+        Examples
+        --------
+        >>> from sedona.geopandas import GeoSeries
+        >>> from shapely.geometry import Point
+        >>> s = GeoSeries([Point(1, 1), Point(2, 2), Point(3, 3)])
+        >>> s
+        0    POINT (1 1)
+        1    POINT (2 2)
+        2    POINT (3 3)
+        dtype: geometry
+
+        >>> s.to_json()
+        '{"type": "FeatureCollection", "features": [{"id": "0", "type": 
"Feature", "pr\
+operties": {}, "geometry": {"type": "Point", "coordinates": [1.0, 1.0]}, 
"bbox": [1.0,\
+ 1.0, 1.0, 1.0]}, {"id": "1", "type": "Feature", "properties": {}, "geometry": 
{"type"\
+: "Point", "coordinates": [2.0, 2.0]}, "bbox": [2.0, 2.0, 2.0, 2.0]}, {"id": 
"2", "typ\
+e": "Feature", "properties": {}, "geometry": {"type": "Point", "coordinates": 
[3.0, 3.\
+0]}, "bbox": [3.0, 3.0, 3.0, 3.0]}], "bbox": [1.0, 1.0, 3.0, 3.0]}'
+
+        See Also
+        --------
+        GeoSeries.to_file : write GeoSeries to file
+        """
+        return self._to_geoframe(name="geometry").to_json(
+            na="null", show_bbox=show_bbox, drop_id=drop_id, 
to_wgs84=to_wgs84, **kwargs
+        )
 
     def to_wkb(self, hex: bool = False, **kwargs) -> pspd.Series:
         """
@@ -4313,7 +4417,79 @@ class GeoSeries(GeoFrame, pspd.Series):
         )
 
     def to_arrow(self, geometry_encoding="WKB", interleaved=True, 
include_z=None):
-        raise NotImplementedError("GeoSeries.to_arrow() is not implemented 
yet.")
+        """Encode a GeoSeries to GeoArrow format.
+
+        See https://geoarrow.org/ for details on the GeoArrow specification.
+
+        This functions returns a generic Arrow array object implementing
+        the `Arrow PyCapsule Protocol`_ (i.e. having an ``__arrow_c_array__``
+        method). This object can then be consumed by your Arrow implementation
+        of choice that supports this protocol.
+
+        .. _Arrow PyCapsule Protocol: 
https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
+
+        Note: Requires geopandas versions >= 1.0.0 to use with Sedona.
+
+        Parameters
+        ----------
+        geometry_encoding : {'WKB', 'geoarrow' }, default 'WKB'
+            The GeoArrow encoding to use for the data conversion.
+        interleaved : bool, default True
+            Only relevant for 'geoarrow' encoding. If True, the geometries'
+            coordinates are interleaved in a single fixed size list array.
+            If False, the coordinates are stored as separate arrays in a
+            struct type.
+        include_z : bool, default None
+            Only relevant for 'geoarrow' encoding (for WKB, the dimensionality
+            of the individual geometries is preserved).
+            If False, return 2D geometries. If True, include the third 
dimension
+            in the output (if a geometry has no third dimension, the 
z-coordinates
+            will be NaN). By default, will infer the dimensionality from the
+            input geometries. Note that this inference can be unreliable with
+            empty geometries (for a guaranteed result, it is recommended to
+            specify the keyword).
+
+        Returns
+        -------
+        GeoArrowArray
+            A generic Arrow array object with geometry data encoded to 
GeoArrow.
+
+        Examples
+        --------
+        >>> from sedona.geopandas import GeoSeries
+        >>> from shapely.geometry import Point
+        >>> gser = GeoSeries([Point(1, 2), Point(2, 1)])
+        >>> gser
+        0    POINT (1 2)
+        1    POINT (2 1)
+        dtype: geometry
+
+        >>> arrow_array = gser.to_arrow()
+        >>> arrow_array
+        <geopandas.io._geoarrow.GeoArrowArray object at ...>
+
+        The returned array object needs to be consumed by a library 
implementing
+        the Arrow PyCapsule Protocol. For example, wrapping the data as a
+        pyarrow.Array (requires pyarrow >= 14.0):
+
+        >>> import pyarrow as pa
+        >>> array = pa.array(arrow_array)
+        >>> array
+        <pyarrow.lib.BinaryArray object at ...>
+        [
+          0101000000000000000000F03F0000000000000040,
+          01010000000000000000000040000000000000F03F
+        ]
+
+        """
+        # Because this function returns the arrow array in memory, we simply 
rely on geopandas's implementation.
+        # This also returns a geopandas specific data type, which can be 
converted to an actual pyarrow array,
+        # so there is no direct Sedona equivalent. This way we also get all of 
the arguments implemented for free.
+        return self.to_geopandas().to_arrow(
+            geometry_encoding=geometry_encoding,
+            interleaved=interleaved,
+            include_z=include_z,
+        )
 
     def clip(self, mask, keep_geom_type: bool = False, sort=False) -> 
"GeoSeries":
         raise NotImplementedError(
@@ -4349,6 +4525,15 @@ class GeoSeries(GeoFrame, pspd.Series):
         else:
             return value, False
 
+    def _to_geoframe(self, name=None):
+        if name is not None:
+            renamed = self.rename(name)
+        elif self._column_label is None:
+            renamed = self.rename("geometry")
+        else:
+            renamed = self
+        return GeoDataFrame(pspd.DataFrame(renamed._internal))
+
 
 # -----------------------------------------------------------------------------
 # # Utils
diff --git a/python/sedona/geopandas/tools/sjoin.py 
b/python/sedona/geopandas/tools/sjoin.py
index e0dcd8921c..a62529475e 100644
--- a/python/sedona/geopandas/tools/sjoin.py
+++ b/python/sedona/geopandas/tools/sjoin.py
@@ -166,7 +166,8 @@ def _frame_join(
     final_columns = []
 
     # Add geometry column (always from left for geopandas compatibility)
-    final_columns.append("l_geometry as geometry")
+    # Currently, Sedona stores geometries in EWKB format
+    final_columns.append("ST_AsEWKB(l_geometry) as geometry")
 
     # Add other columns with suffix handling
     left_data_cols = [col for col in left_geo_df.columns if col != 
"l_geometry"]
diff --git a/python/tests/geopandas/test_geodataframe.py 
b/python/tests/geopandas/test_geodataframe.py
index 4857435946..cceaa25af9 100644
--- a/python/tests/geopandas/test_geodataframe.py
+++ b/python/tests/geopandas/test_geodataframe.py
@@ -19,7 +19,14 @@ import tempfile
 
 from shapely.geometry import (
     Point,
+    LineString,
     Polygon,
+    GeometryCollection,
+    MultiPoint,
+    MultiLineString,
+    MultiPolygon,
+    LinearRing,
+    box,
 )
 import shapely
 
@@ -51,7 +58,8 @@ class TestDataframe(TestGeopandasBase):
         ],
     )
     def test_constructor(self, obj):
-        sgpd_df = GeoDataFrame(obj)
+        with self.ps_allow_diff_frames():
+            sgpd_df = GeoDataFrame(obj)
         check_geodataframe(sgpd_df)
 
     @pytest.mark.parametrize(
@@ -320,7 +328,6 @@ class TestDataframe(TestGeopandasBase):
 
     def test_buffer(self):
         # Create a GeoDataFrame with geometries to test buffer operation
-        from shapely.geometry import Polygon, Point
 
         # Create input geometries
         point = Point(0, 0)
@@ -353,6 +360,109 @@ class TestDataframe(TestGeopandasBase):
         # Check that square buffer area is greater than original (1.0)
         assert areas[1] > 1.0
 
+    def test_to_parquet(self):
+        pass
+
+    def test_from_arrow(self):
+        if parse_version(gpd.__version__) < parse_version("1.0.0"):
+            return
+
+        import pyarrow as pa
+
+        table = pa.table({"a": [0, 1, 2], "b": [0.1, 0.2, 0.3]})
+        with pytest.raises(ValueError, match="No geometry column found"):
+            GeoDataFrame.from_arrow(table)
+
+        gdf = gpd.GeoDataFrame(
+            {
+                "col": [1, 2, 3, 4],
+                "geometry": [
+                    LineString([(0, 0), (1, 1)]),
+                    box(0, 0, 10, 10),
+                    Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]),
+                    Point(1, 1),
+                ],
+            }
+        )
+
+        result = GeoDataFrame.from_arrow(gdf.to_arrow())
+        self.check_sgpd_df_equals_gpd_df(result, gdf)
+
+        gdf = gpd.GeoDataFrame(
+            {
+                "col": ["a", "b", "c", "d"],
+                "geometry": [
+                    Point(1, 1),
+                    Polygon(),
+                    LineString([(0, 0), (1, 1)]),
+                    None,
+                ],
+            }
+        )
+
+        result = GeoDataFrame.from_arrow(gdf.to_arrow())
+
+        self.check_sgpd_df_equals_gpd_df(result, gdf)
+
+    def test_to_json(self):
+        import json
+
+        d = {"col1": ["name1", "name2"], "geometry": [Point(1, 2), Point(2, 
1)]}
+
+        # Currently, adding the crs information later requires us to join 
across partitions
+        with self.ps_allow_diff_frames():
+            gdf = GeoDataFrame(d, crs="EPSG:3857")
+
+        result = gdf.to_json()
+
+        obj = json.loads(result)
+        assert obj["type"] == "FeatureCollection"
+        assert obj["features"][0]["geometry"]["type"] == "Point"
+        assert obj["features"][0]["geometry"]["coordinates"] == [1.0, 2.0]
+        assert obj["features"][1]["geometry"]["type"] == "Point"
+        assert obj["features"][1]["geometry"]["coordinates"] == [2.0, 1.0]
+        assert obj["crs"]["type"] == "name"
+        assert obj["crs"]["properties"]["name"] == "urn:ogc:def:crs:EPSG::3857"
+
+        expected = '{"type": "FeatureCollection", "features": [{"id": "0", 
"type": "Feature", \
+"properties": {"col1": "name1"}, "geometry": {"type": "Point", "coordinates": 
[1.0,\
+ 2.0]}}, {"id": "1", "type": "Feature", "properties": {"col1": "name2"}, 
"geometry"\
+: {"type": "Point", "coordinates": [2.0, 1.0]}}], "crs": {"type": "name", 
"properti\
+es": {"name": "urn:ogc:def:crs:EPSG::3857"}}}'
+        assert result == expected, f"Expected {expected}, but got {result}"
+
+    def test_to_arrow(self):
+        if parse_version(gpd.__version__) < parse_version("1.0.0"):
+            return
+
+        import pyarrow as pa
+        from geopandas.testing import assert_geodataframe_equal
+
+        data = {"col1": ["name1", "name2"], "geometry": [Point(1, 2), Point(2, 
1)]}
+
+        # Ensure index is not preserved for index=False
+        sgpd_df = GeoDataFrame(data, index=pd.Index([1, 2]))
+        result = pa.table(sgpd_df.to_arrow(index=False))
+
+        expected = gpd.GeoDataFrame(data)
+
+        # Ensure we can read it from using geopandas
+        gpd_df = gpd.GeoDataFrame.from_arrow(result)
+        assert_geodataframe_equal(gpd_df, expected)
+
+        # Ensure we can read it using sedona geopandas
+        sgpd_df = GeoDataFrame.from_arrow(result)
+        self.check_sgpd_df_equals_gpd_df(sgpd_df, expected)
+
+        # Ensure index is preserved for index=True
+        sgpd_df = GeoDataFrame(data, index=pd.Index([1, 2]))
+        result = pa.table(sgpd_df.to_arrow(index=True))
+
+        expected = gpd.GeoDataFrame(data, pd.Index([1, 2]))
+
+        gpd_df = gpd.GeoDataFrame.from_arrow(result)
+        assert_geodataframe_equal(gpd_df, expected)
+
 
 # -----------------------------------------------------------------------------
 # # Utils
diff --git a/python/tests/geopandas/test_geopandas_base.py 
b/python/tests/geopandas/test_geopandas_base.py
index a772b273a6..d30c3dbb5f 100644
--- a/python/tests/geopandas/test_geopandas_base.py
+++ b/python/tests/geopandas/test_geopandas_base.py
@@ -47,9 +47,10 @@ class TestGeopandasBase(TestBase):
     # TODO chore: rename to check_sgpd_series_equals_gpd_series and change the 
names in the geoseries tests
     @classmethod
     def check_sgpd_equals_gpd(cls, actual: GeoSeries, expected: gpd.GeoSeries):
-        assert isinstance(actual, GeoSeries)
-        assert isinstance(expected, gpd.GeoSeries)
+        assert isinstance(actual, GeoSeries), "result is not a sgpd.GeoSeries"
+        assert isinstance(expected, gpd.GeoSeries), "expected is not a 
gpd.GeoSeries"
         sgpd_result = actual.to_geopandas()
+        assert len(sgpd_result) == len(expected), "results are of different 
lengths"
         for a, e in zip(sgpd_result, expected):
             if a is None or e is None:
                 assert a is None and e is None
@@ -65,27 +66,39 @@ class TestGeopandasBase(TestBase):
     def check_sgpd_df_equals_gpd_df(
         cls, actual: GeoDataFrame, expected: gpd.GeoDataFrame
     ):
-        assert isinstance(actual, GeoDataFrame)
-        assert isinstance(expected, gpd.GeoDataFrame)
+        assert isinstance(actual, GeoDataFrame), "result is not a 
sgpd.GeoDataFrame"
+        assert isinstance(
+            expected, gpd.GeoDataFrame
+        ), "expected is not a gpd.GeoDataFrame"
         assert len(actual.columns) == len(expected.columns)
         for col_name in actual.keys():
             actual_series, expected_series = actual[col_name], 
expected[col_name]
             if isinstance(actual_series, GeoSeries):
-                assert isinstance(actual_series, GeoSeries)
+                assert isinstance(
+                    actual_series, GeoSeries
+                ), f"result[{col_name}] series is not a sgpd.GeoSeries"
                 # original geopandas does not guarantee a GeoSeries will be 
returned, so convert it here
                 expected_series = gpd.GeoSeries(expected_series)
                 cls.check_sgpd_equals_gpd(actual_series, expected_series)
             else:
-                assert isinstance(actual_series, ps.Series)
-                assert isinstance(expected_series, pd.Series)
+                assert isinstance(
+                    actual_series, ps.Series
+                ), f"result[{col_name}] series is not a ps.Series"
+                assert isinstance(
+                    expected_series, pd.Series
+                ), f"expected[{col_name}] series is not a pd.Series"
                 cls.check_pd_series_equal(actual_series, expected_series)
 
     @classmethod
     def check_pd_series_equal(cls, actual: ps.Series, expected: pd.Series):
-        assert isinstance(actual, ps.Series)
-        assert isinstance(expected, pd.Series)
+        assert isinstance(actual, ps.Series), "result series is not a 
ps.Series"
+        assert isinstance(expected, pd.Series), "expected series is not a 
pd.Series"
         assert_series_equal(actual.to_pandas(), expected)
 
+    @classmethod
+    def contains_any_geom_collection(cls, geoms) -> bool:
+        return any(isinstance(g, GeometryCollection) for g in geoms)
+
     @contextmanager
     def ps_allow_diff_frames(self):
         """
diff --git a/python/tests/geopandas/test_geoseries.py 
b/python/tests/geopandas/test_geoseries.py
index 66d6b75d11..99b0fb3039 100644
--- a/python/tests/geopandas/test_geoseries.py
+++ b/python/tests/geopandas/test_geoseries.py
@@ -180,7 +180,20 @@ class TestGeoSeries(TestGeopandasBase):
         pass
 
     def test_from_arrow(self):
-        pass
+        if parse_version(gpd.__version__) < parse_version("1.0.0"):
+            return
+
+        import pyarrow as pa
+
+        table = pa.table({"a": [0, 1, 2], "b": [0.1, 0.2, 0.3]})
+        with pytest.raises(ValueError, match="No GeoArrow geometry field 
found"):
+            GeoSeries.from_arrow(table["a"].chunk(0))
+
+        gpd_series = gpd.GeoSeries(
+            [Point(1, 1), Polygon(), LineString([(0, 0), (1, 1)]), None]
+        )
+        result = sgpd.GeoSeries.from_arrow(gpd_series.to_arrow())
+        self.check_sgpd_equals_gpd(result, gpd_series)
 
     def test_to_file(self):
         pass
@@ -349,7 +362,34 @@ class TestGeoSeries(TestGeopandasBase):
             sgpd.GeoSeries([Polygon([(0, 90), (1, 90), (2, 
90)])]).estimate_utm_crs()
 
     def test_to_json(self):
-        pass
+        s = GeoSeries([Point(1, 1), Point(2, 2), Point(3, 3)])
+
+        # TODO: optimize this away
+        with self.ps_allow_diff_frames():
+            result = s.to_json()
+        expected = '{"type": "FeatureCollection", "features": [{"id": "0", 
"type": "Feature", "pr\
+operties": {}, "geometry": {"type": "Point", "coordinates": [1.0, 1.0]}, 
"bbox": [1.0,\
+ 1.0, 1.0, 1.0]}, {"id": "1", "type": "Feature", "properties": {}, "geometry": 
{"type"\
+: "Point", "coordinates": [2.0, 2.0]}, "bbox": [2.0, 2.0, 2.0, 2.0]}, {"id": 
"2", "typ\
+e": "Feature", "properties": {}, "geometry": {"type": "Point", "coordinates": 
[3.0, 3.\
+0]}, "bbox": [3.0, 3.0, 3.0, 3.0]}], "bbox": [1.0, 1.0, 3.0, 3.0]}'
+
+        assert result == expected
+
+        with self.ps_allow_diff_frames():
+            result = s.to_json(show_bbox=True)
+            expected = '{"type": "FeatureCollection", "features": [{"id": "0", 
"type": "Feature", "properties": {}, "geometry": {"type": "Point", 
"coordinates": [1.0, 1.0]}, "bbox": [1.0, 1.0, 1.0, 1.0]}, {"id": "1", "type": 
"Feature", "properties": {}, "geometry": {"type": "Point", "coordinates": [2.0, 
2.0]}, "bbox": [2.0, 2.0, 2.0, 2.0]}, {"id": "2", "type": "Feature", 
"properties": {}, "geometry": {"type": "Point", "coordinates": [3.0, 3.0]}, 
"bbox": [3.0, 3.0, 3.0, 3.0]}], "bbox": [1 [...]
+            assert result == expected
+
+        with self.ps_allow_diff_frames():
+            result = s.to_json(drop_id=True)
+            expected = '{"type": "FeatureCollection", "features": [{"type": 
"Feature", "properties": {}, "geometry": {"type": "Point", "coordinates": [1.0, 
1.0]}, "bbox": [1.0, 1.0, 1.0, 1.0]}, {"type": "Feature", "properties": {}, 
"geometry": {"type": "Point", "coordinates": [2.0, 2.0]}, "bbox": [2.0, 2.0, 
2.0, 2.0]}, {"type": "Feature", "properties": {}, "geometry": {"type": "Point", 
"coordinates": [3.0, 3.0]}, "bbox": [3.0, 3.0, 3.0, 3.0]}], "bbox": [1.0, 1.0, 
3.0, 3.0]}'
+            assert result == expected
+
+        with self.ps_allow_diff_frames():
+            result = s.set_crs("EPSG:3857").to_json(to_wgs84=True)
+            expected = '{"type": "FeatureCollection", "features": [{"id": "0", 
"type": "Feature", "properties": {}, "geometry": {"type": "Point", 
"coordinates": [8.983152841195214e-06, 8.983152841195177e-06]}, "bbox": 
[8.983152841195214e-06, 8.983152841195177e-06, 8.983152841195214e-06, 
8.983152841195177e-06]}, {"id": "1", "type": "Feature", "properties": {}, 
"geometry": {"type": "Point", "coordinates": [1.7966305682390428e-05, 
1.7966305682390134e-05]}, "bbox": [1.7966305682390428e-05, 1 [...]
+            assert result == expected
 
     def test_to_wkb(self):
         if parse_version(shapely.__version__) < parse_version("2.0.0"):
@@ -421,7 +461,24 @@ class TestGeoSeries(TestGeopandasBase):
         self.check_pd_series_equal(result, expected)
 
     def test_to_arrow(self):
-        pass
+        if parse_version(gpd.__version__) < parse_version("1.0.0"):
+            return
+
+        import pyarrow as pa
+
+        gser = GeoSeries([Point(1, 2), Point(2, 1)])
+        # TODO: optimize this away
+        with self.ps_allow_diff_frames():
+            arrow_array = gser.to_arrow()
+        result = pa.array(arrow_array)
+
+        expected = [
+            "0101000000000000000000F03F0000000000000040",
+            "01010000000000000000000040000000000000F03F",
+        ]
+        expected = pa.array([bytes.fromhex(x) for x in expected], 
type=pa.binary())
+
+        assert result.equals(expected)
 
     def test_clip(self):
         pass
diff --git a/python/tests/geopandas/test_match_geopandas_dataframe.py 
b/python/tests/geopandas/test_match_geopandas_dataframe.py
index 4dd7714c98..8b80f9c7dc 100644
--- a/python/tests/geopandas/test_match_geopandas_dataframe.py
+++ b/python/tests/geopandas/test_match_geopandas_dataframe.py
@@ -176,3 +176,120 @@ class TestMatchGeopandasDataFrame(TestGeopandasBase):
             sgpd_df = sgpd_df.rename_geometry("name1")
         gpd_df = gpd_df.rename_geometry("name2")
         assert sgpd_df.geometry.name != gpd_df.geometry.name
+
+    def test_to_json(self):
+        tests = [
+            {
+                "a": [1, 2, 3],
+                "b": ["4", "5", "6"],
+                "geometry": [
+                    Point(1, 2),
+                    Point(2, 1),
+                    Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]),
+                ],
+            },
+            {
+                "a": [1, 2, 3],
+                "b": ["4", "5", "6"],
+                "geometry": [
+                    LineString([(0, 0), (1, 1)]),
+                    GeometryCollection(Point()),
+                    Point(2, 1),
+                ],
+            },
+            {
+                "a": [1, 2, 3],
+                "b": ["4", "5", "6"],
+                "geometry": [Polygon(), Point(1, 2), None],
+            },
+        ]
+
+        for data in tests:
+            # TODO: Try to optimize this with self.ps_allow_diff_frames() away
+            with self.ps_allow_diff_frames():
+                sgpd_result = GeoDataFrame(data).to_json()
+            gpd_result = gpd.GeoDataFrame(data).to_json()
+            assert sgpd_result == gpd_result
+
+        # test different json args
+        data = {
+            "a": [1, 2, 3],
+            "b": [4, 5, 6],
+            "geometry": [Point(1, 2), Point(2, 1), LineString([(0, 0), (1, 
1)])],
+        }
+        tests = [
+            {"na": "drop"},
+            {"na": "keep"},
+            {"show_bbox": True},
+            {"drop_id": True},
+            {"to_wgs84": True},
+            {"na": "drop", "show_bbox": True, "drop_id": True, "to_wgs84": 
True},
+        ]
+        for kwargs in tests:
+            # TODO: Try to optimize this with self.ps_allow_diff_frames() away
+            with self.ps_allow_diff_frames():
+                sgpd_result = GeoDataFrame(data, 
crs="EPSG:3857").to_json(**kwargs)
+            gpd_result = gpd.GeoDataFrame(data, 
crs="EPSG:3857").to_json(**kwargs)
+            assert sgpd_result == gpd_result
+
+    def test_from_arrow(self):
+        if parse_version(gpd.__version__) < parse_version("1.0.0"):
+            return
+
+        gdf = gpd.GeoDataFrame(
+            {
+                "ints": [1, 2, 3, 4],
+                "strings": ["a", "b", "c", "d"],
+                "bools": [True, False, True, False],
+                "geometry": [
+                    Point(0, 1),
+                    LineString([(0, 0), (1, 1)]),
+                    Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]),
+                    Point(1, 1),
+                ],
+            }
+        )
+
+        # TODO: optimize this away
+        with self.ps_allow_diff_frames():
+            sgpd_result = GeoDataFrame.from_arrow(gdf.to_arrow())
+        gpd_result = gpd.GeoDataFrame.from_arrow(gdf.to_arrow())
+        self.check_sgpd_df_equals_gpd_df(sgpd_result, gpd_result)
+
+    def test_to_arrow(self):
+        if parse_version(gpd.__version__) < parse_version("1.0.0"):
+            return
+
+        import pyarrow as pa
+        import pandas as pd
+
+        data = {
+            "a": [1, 2, 3],
+            "b": [4, 5, 6],
+            "geometry": [Point(1, 2), Point(2, 1), LineString([(0, 0), (1, 
1)])],
+        }
+
+        # TODO: Try to optimize this with self.ps_allow_diff_frames() away
+        with self.ps_allow_diff_frames():
+            sgpd_result = pa.table(GeoDataFrame(data).to_arrow(index=False))
+        gpd_result = pa.table(gpd.GeoDataFrame(data).to_arrow(index=False))
+
+        assert sgpd_result.equals(gpd_result)
+
+        # TODO: Try to optimize this with self.ps_allow_diff_frames() away
+        with self.ps_allow_diff_frames():
+            sgpd_result = pa.table(
+                GeoDataFrame(
+                    data, index=pd.RangeIndex(start=0, stop=3, step=1)
+                ).to_arrow(index=True)
+            )
+        gpd_result = pa.table(
+            gpd.GeoDataFrame(
+                data, index=pd.RangeIndex(start=0, stop=3, step=1)
+            ).to_arrow(index=True)
+        )
+
+        assert sgpd_result.equals(gpd_result)
+
+        # Note: Results for not specifying index=True or index=False for 
to_arrow is expected to be different
+        # from geopandas. See the to_arrow docstring for more details.
diff --git a/python/tests/geopandas/test_match_geopandas_series.py 
b/python/tests/geopandas/test_match_geopandas_series.py
index df4641d54b..0b6ff9e00f 100644
--- a/python/tests/geopandas/test_match_geopandas_series.py
+++ b/python/tests/geopandas/test_match_geopandas_series.py
@@ -282,7 +282,15 @@ class TestMatchGeopandasSeries(TestGeopandasBase):
         pass
 
     def test_from_arrow(self):
-        pass
+        if parse_version(gpd.__version__) < parse_version("1.0.0"):
+            return
+
+        for _, geom in self.geoms:
+            gpd_series = gpd.GeoSeries(geom)
+            gpd_result = gpd.GeoSeries.from_arrow(gpd_series.to_arrow())
+
+            sgpd_result = GeoSeries.from_arrow(gpd_series.to_arrow())
+            self.check_sgpd_equals_gpd(sgpd_result, gpd_result)
 
     def test_to_file(self):
         pass
@@ -371,7 +379,10 @@ class TestMatchGeopandasSeries(TestGeopandasBase):
                 assert sgpd_result == gpd_result
 
     def test_to_json(self):
-        pass
+        for _, geom in self.geoms:
+            sgpd_result = GeoSeries(geom).to_json()
+            gpd_result = gpd.GeoSeries(geom).to_json()
+            assert sgpd_result == gpd_result
 
     def test_to_wkb(self):
         for _, geom in self.geoms:
@@ -395,7 +406,15 @@ class TestMatchGeopandasSeries(TestGeopandasBase):
             self.check_sgpd_equals_gpd(sgpd_result, gpd_result)
 
     def test_to_arrow(self):
-        pass
+        if parse_version(gpd.__version__) < parse_version("1.0.0"):
+            return
+
+        import pyarrow as pa
+
+        for _, geom in self.geoms:
+            sgpd_result = pa.array(GeoSeries(geom).to_arrow())
+            gpd_result = pa.array(gpd.GeoSeries(geom).to_arrow())
+            assert sgpd_result == gpd_result
 
     def test_clip(self):
         pass

(sedona) branch master updated: [GH-2104] Geopandas: Implement `to_json`, `to_arrow`, `from_arrow` (#2105)

Reply via email to