This is an automated email from the ASF dual-hosted git repository.

jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sedona.git


The following commit(s) were added to refs/heads/master by this push:
     new 5cf9e607c2 [GH-2070] Geopandas.GeoSeries: Implement `get_geometry` 
(#2071)
5cf9e607c2 is described below

commit 5cf9e607c2819152863195f8bce383cf547bb5b9
Author: Peter Nguyen <[email protected]>
AuthorDate: Fri Jul 11 23:22:12 2025 -0700

    [GH-2070] Geopandas.GeoSeries: Implement `get_geometry` (#2071)
    
    * Implement get_geometry
    
    * Skip test for old versions
    
    * Support extra-negative indices
    
    * Skip test for shapely < 2.0.0
    
    * Update docs to mention the shapely < 2.0.0 behavior
---
 python/sedona/geopandas/geoseries.py               | 122 ++++++++++++++++++++-
 python/tests/geopandas/test_geoseries.py           |  45 +++++++-
 .../tests/geopandas/test_match_geopandas_series.py |  18 ++-
 3 files changed, 177 insertions(+), 8 deletions(-)

diff --git a/python/sedona/geopandas/geoseries.py 
b/python/sedona/geopandas/geoseries.py
index 06602b27ce..97e35038b3 100644
--- a/python/sedona/geopandas/geoseries.py
+++ b/python/sedona/geopandas/geoseries.py
@@ -935,9 +935,98 @@ class GeoSeries(GeoFrame, pspd.Series):
         # Implementation of the abstract method
         raise NotImplementedError("This method is not implemented yet.")
 
-    def get_geometry(self, index):
-        # Implementation of the abstract method
-        raise NotImplementedError("This method is not implemented yet.")
+    def get_geometry(self, index) -> "GeoSeries":
+        """Returns the n-th geometry from a collection of geometries 
(0-indexed).
+
+        If the index is non-negative, it returns the geometry at that index.
+        If the index is negative, it counts backward from the end of the 
collection (e.g., -1 returns the last geometry).
+        Returns None if the index is out of bounds.
+
+        Note: Simple geometries act as length-1 collections
+
+        Note: Using Shapely < 2.0, may lead to different results for empty 
simple geometries due to how
+        shapely interprets them.
+
+        Parameters
+        ----------
+        index : int or array_like
+            Position of a geometry to be retrieved within its collection
+
+        Returns
+        -------
+        GeoSeries
+
+        Notes
+        -----
+        Simple geometries act as collections of length 1. Any out-of-range 
index value
+        returns None.
+
+        Examples
+        --------
+        >>> from shapely.geometry import Point, MultiPoint, GeometryCollection
+        >>> s = geopandas.GeoSeries(
+        ...     [
+        ...         Point(0, 0),
+        ...         MultiPoint([(0, 0), (1, 1), (0, 1), (1, 0)]),
+        ...         GeometryCollection(
+        ...             [MultiPoint([(0, 0), (1, 1), (0, 1), (1, 0)]), 
Point(0, 1)]
+        ...         ),
+        ...         Polygon(),
+        ...         GeometryCollection(),
+        ...     ]
+        ... )
+        >>> s
+        0                                          POINT (0 0)
+        1              MULTIPOINT ((0 0), (1 1), (0 1), (1 0))
+        2    GEOMETRYCOLLECTION (MULTIPOINT ((0 0), (1 1), ...
+        3                                        POLYGON EMPTY
+        4                             GEOMETRYCOLLECTION EMPTY
+        dtype: geometry
+
+        >>> s.get_geometry(0)
+        0                                POINT (0 0)
+        1                                POINT (0 0)
+        2    MULTIPOINT ((0 0), (1 1), (0 1), (1 0))
+        3                              POLYGON EMPTY
+        4                                       None
+        dtype: geometry
+
+        >>> s.get_geometry(1)
+        0           None
+        1    POINT (1 1)
+        2    POINT (0 1)
+        3           None
+        4           None
+        dtype: geometry
+
+        >>> s.get_geometry(-1)
+        0    POINT (0 0)
+        1    POINT (1 0)
+        2    POINT (0 1)
+        3  POLYGON EMPTY
+        4           None
+        dtype: geometry
+
+        """
+
+        # Sedona errors on negative indexes, so we use a case statement to 
handle it ourselves
+        select = """
+        ST_GeometryN(
+            `L`,
+            CASE
+                WHEN ST_NumGeometries(`L`) + `R` < 0 THEN NULL
+                WHEN `R` < 0 THEN ST_NumGeometries(`L`) + `R`
+                ELSE `R`
+            END
+        )
+        """
+
+        return self._row_wise_operation(
+            select,
+            index,
+            align=False,
+            rename="get_geometry",
+        )
 
     @property
     def boundary(self):
@@ -1353,7 +1442,7 @@ class GeoSeries(GeoFrame, pspd.Series):
     def _row_wise_operation(
         self,
         select: str,
-        other: Union["GeoSeries", BaseGeometry],
+        other: Any,
         align: Union[bool, None],
         rename: str,
         returns_geom: bool = True,
@@ -1372,7 +1461,11 @@ class GeoSeries(GeoFrame, pspd.Series):
         if isinstance(other, BaseGeometry):
             other = GeoSeries([other] * len(self))
 
-        assert isinstance(other, GeoSeries), f"Invalid type for other: 
{type(other)}"
+        # e.g int input
+        if not isinstance(other, pspd.Series):
+            other = pspd.Series([other] * len(self))
+
+        assert isinstance(other, pspd.Series), f"Invalid type for other: 
{type(other)}"
 
         # This code assumes there is only one index (SPARK_DEFAULT_INDEX_NAME)
         # and would need to be updated if Sedona later supports multi-index
@@ -1385,7 +1478,7 @@ class GeoSeries(GeoFrame, pspd.Series):
             col(SPARK_DEFAULT_INDEX_NAME),
         )
         other_df = other._internal.spark_frame.select(
-            col(other.get_first_geometry_column()).alias("R"),
+            col(_get_first_column_name(other)).alias("R"),
             # for the right side, we only need the column that we are joining 
on
             col(index_col),
         )
@@ -2368,6 +2461,23 @@ class GeoSeries(GeoFrame, pspd.Series):
 # -----------------------------------------------------------------------------
 
 
+def _get_first_column_name(series: pspd.Series) -> str:
+    """
+    Get the first column name of a Series.
+
+    Parameters:
+    - series: The input Series.
+
+    Returns:
+    - str: The first column name of the Series.
+    """
+    return next(
+        field.name
+        for field in series._internal.spark_frame.schema.fields
+        if field.name not in (SPARK_DEFAULT_INDEX_NAME, 
NATURAL_ORDER_COLUMN_NAME)
+    )
+
+
 def _to_spark_pandas_df(ps_series: pspd.Series) -> pspd.DataFrame:
     return pspd.DataFrame(ps_series._psdf._internal)
 
diff --git a/python/tests/geopandas/test_geoseries.py 
b/python/tests/geopandas/test_geoseries.py
index 6aabf33f25..298b7b13ac 100644
--- a/python/tests/geopandas/test_geoseries.py
+++ b/python/tests/geopandas/test_geoseries.py
@@ -15,6 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
+import shapely
 import numpy as np
 import pytest
 import pandas as pd
@@ -446,7 +447,49 @@ class TestGeoSeries(TestBase):
         pass
 
     def test_get_geometry(self):
-        pass
+        # Shapely 1 seems to have a bug where Polygon() is incorrectly 
interpreted as a GeometryCollection
+        if shapely.__version__ < "2.0.0":
+            return
+
+        from shapely.geometry import MultiPoint
+
+        s = GeoSeries(
+            [
+                Point(0, 0),
+                MultiPoint([(0, 0), (1, 1), (0, 1), (1, 0)]),
+                GeometryCollection(
+                    [MultiPoint([(0, 0), (1, 1), (0, 1), (1, 0)]), Point(0, 1)]
+                ),
+                Polygon(),
+                GeometryCollection(),
+            ]
+        )
+
+        result = s.get_geometry(0)
+        expected = gpd.GeoSeries(
+            [
+                Point(0, 0),
+                Point(0, 0),
+                MultiPoint([(0, 0), (1, 1), (0, 1), (1, 0)]),
+                Polygon(),
+                None,
+            ]
+        )
+        self.check_sgpd_equals_gpd(result, expected)
+
+        result = s.get_geometry(1)
+        expected = gpd.GeoSeries([None, Point(1, 1), Point(0, 1), None, None])
+        self.check_sgpd_equals_gpd(result, expected)
+
+        result = s.get_geometry(-1)
+        expected = gpd.GeoSeries(
+            [Point(0, 0), Point(1, 0), Point(0, 1), Polygon(), None]
+        )
+        self.check_sgpd_equals_gpd(result, expected)
+
+        result = s.get_geometry(2)
+        expected = gpd.GeoSeries([None, Point(0, 1), None, None, None])
+        self.check_sgpd_equals_gpd(result, expected)
 
     def test_boundary(self):
         pass
diff --git a/python/tests/geopandas/test_match_geopandas_series.py 
b/python/tests/geopandas/test_match_geopandas_series.py
index cbb61527c5..389597c651 100644
--- a/python/tests/geopandas/test_match_geopandas_series.py
+++ b/python/tests/geopandas/test_match_geopandas_series.py
@@ -38,6 +38,7 @@ from shapely.geometry import (
 from sedona.geopandas import GeoSeries
 from tests.test_base import TestBase
 import pyspark.pandas as ps
+from packaging.version import parse as parse_version
 
 
 class TestMatchGeopandasSeries(TestBase):
@@ -460,7 +461,22 @@ class TestMatchGeopandasSeries(TestBase):
         pass
 
     def test_get_geometry(self):
-        pass
+        if parse_version(gpd.__version__) < parse_version("1.0.0"):
+            return
+
+        for _, geom in self.geoms:
+            # test negative index, in-bounds index, and out of bounds index
+            for index in [-1, 0, len(geom) + 1]:
+                sgpd_result = GeoSeries(geom).get_geometry(index)
+                gpd_result = gpd.GeoSeries(geom).get_geometry(index)
+                self.check_sgpd_equals_gpd(sgpd_result, gpd_result)
+
+        data = [GeometryCollection(), Polygon(), MultiPolygon()]
+
+        for idx in [-2, -1, 0, 1]:
+            sgpd_result = GeoSeries(data).get_geometry(idx)
+            gpd_result = gpd.GeoSeries(data).get_geometry(idx)
+            self.check_sgpd_equals_gpd(sgpd_result, gpd_result)
 
     def test_boundary(self):
         pass

Reply via email to