(sedona) branch master updated: [GH-2192] Geopandas: Implement `plot()` for GeoDataFrame and GeoSeries + `read_parquet` (#2193)

jiayu Tue, 29 Jul 2025 20:04:45 -0700

This is an automated email from the ASF dual-hosted git repository.

jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sedona.git



The following commit(s) were added to refs/heads/master by this push:
     new f4719ed2a2 [GH-2192] Geopandas: Implement `plot()` for GeoDataFrame 
and GeoSeries + `read_parquet` (#2193)
f4719ed2a2 is described below

commit f4719ed2a2382675b843b7d12a29764c2cfe3b80
Author: Peter Nguyen <[email protected]>
AuthorDate: Tue Jul 29 20:04:35 2025 -0700

    [GH-2192] Geopandas: Implement `plot()` for GeoDataFrame and GeoSeries + 
`read_parquet` (#2193)
    
    * Copy original test_plotting into sedona's repo
    
    * Fix up test_plotting to work
    
    * Implement plot() in geoseries and geodataframe
    
    * Remove gpd compat import
    
    * Just set GEOS_GE_390=True in test_plotting
    
    * skip tests for shapely < 2.0
    
    * Skip tests if gpd < 1.0
    
    * Add read_parquet
    
    * Skip plotting tests that fail on older versions
    
    * Add warning for unsupported params in read_parquet
    
    * Remove test_plotting.py
    
    * Add test_plot in test_geodataframe and test_series
---
 python/sedona/geopandas/__init__.py         |   2 +-
 python/sedona/geopandas/base.py             |  10 ++-
 python/sedona/geopandas/geodataframe.py     | 131 ++++++++++++++++++++++++++++
 python/sedona/geopandas/geoseries.py        |  50 +++++++++++
 python/sedona/geopandas/io.py               |  50 +++++++++++
 python/tests/geopandas/test_geodataframe.py |  17 +++-
 python/tests/geopandas/test_geoseries.py    |   4 +
 python/tests/geopandas/test_io.py           |   3 +-
 8 files changed, 262 insertions(+), 5 deletions(-)

diff --git a/python/sedona/geopandas/__init__.py 
b/python/sedona/geopandas/__init__.py
index 855f27d591..989f268c31 100644
--- a/python/sedona/geopandas/__init__.py
+++ b/python/sedona/geopandas/__init__.py
@@ -25,4 +25,4 @@ from sedona.geopandas.geodataframe import GeoDataFrame
 
 from sedona.geopandas.tools import sjoin
 
-from sedona.geopandas.io import read_file
+from sedona.geopandas.io import read_file, read_parquet
diff --git a/python/sedona/geopandas/base.py b/python/sedona/geopandas/base.py
index 3c874816b7..d7837bec8e 100644
--- a/python/sedona/geopandas/base.py
+++ b/python/sedona/geopandas/base.py
@@ -2288,8 +2288,14 @@ class GeoFrame(metaclass=ABCMeta):
             "simplify", self, tolerance, preserve_topology
         )
 
-    def sjoin(self, other, predicate="intersects", **kwargs):
-        raise NotImplementedError("This method is not implemented yet.")
+    @abstractmethod
+    def to_geopandas(self) -> Union[gpd.GeoSeries, gpd.GeoDataFrame]: ...
+
+    @abstractmethod
+    def plot(self, *args, **kwargs): ...
+
+    @abstractmethod
+    def sjoin(self, other, predicate="intersects", **kwargs): ...
 
 
 def _delegate_to_geometry_column(op, this, *args, **kwargs):
diff --git a/python/sedona/geopandas/geodataframe.py 
b/python/sedona/geopandas/geodataframe.py
index d3d0d21ce1..b2c3060c04 100644
--- a/python/sedona/geopandas/geodataframe.py
+++ b/python/sedona/geopandas/geodataframe.py
@@ -1184,6 +1184,137 @@ class GeoDataFrame(GeoFrame, pspd.DataFrame):
             _not_implemented_error("type", "Returns numeric geometry type 
codes.")
         )
 
+    def plot(self, *args, **kwargs):
+        """
+        Plot a GeoDataFrame.
+
+        Generate a plot of a GeoDataFrame with matplotlib.  If a
+        column is specified, the plot coloring will be based on values
+        in that column.
+
+        Note: This method is not scalable and requires collecting all data to 
the driver.
+
+        Parameters
+        ----------
+        column : str, np.array, pd.Series, pd.Index (default None)
+            The name of the dataframe column, np.array, pd.Series, or pd.Index
+            to be plotted. If np.array, pd.Series, or pd.Index are used then it
+            must have same length as dataframe. Values are used to color the 
plot.
+            Ignored if `color` is also set.
+        kind: str
+            The kind of plots to produce. The default is to create a map 
("geo").
+            Other supported kinds of plots from pandas:
+
+            - 'line' : line plot
+            - 'bar' : vertical bar plot
+            - 'barh' : horizontal bar plot
+            - 'hist' : histogram
+            - 'box' : BoxPlot
+            - 'kde' : Kernel Density Estimation plot
+            - 'density' : same as 'kde'
+            - 'area' : area plot
+            - 'pie' : pie plot
+            - 'scatter' : scatter plot
+            - 'hexbin' : hexbin plot.
+        cmap : str (default None)
+            The name of a colormap recognized by matplotlib.
+        color : str, np.array, pd.Series (default None)
+            If specified, all objects will be colored uniformly.
+        ax : matplotlib.pyplot.Artist (default None)
+            axes on which to draw the plot
+        cax : matplotlib.pyplot Artist (default None)
+            axes on which to draw the legend in case of color map.
+        categorical : bool (default False)
+            If False, cmap will reflect numerical values of the
+            column being plotted.  For non-numerical columns, this
+            will be set to True.
+        legend : bool (default False)
+            Plot a legend. Ignored if no `column` is given, or if `color` is 
given.
+        scheme : str (default None)
+            Name of a choropleth classification scheme (requires mapclassify).
+            A mapclassify.MapClassifier object will be used
+            under the hood. Supported are all schemes provided by mapclassify 
(e.g.
+            'BoxPlot', 'EqualInterval', 'FisherJenks', 'FisherJenksSampled',
+            'HeadTailBreaks', 'JenksCaspall', 'JenksCaspallForced',
+            'JenksCaspallSampled', 'MaxP', 'MaximumBreaks',
+            'NaturalBreaks', 'Quantiles', 'Percentiles', 'StdMean',
+            'UserDefined'). Arguments can be passed in classification_kwds.
+        k : int (default 5)
+            Number of classes (ignored if scheme is None)
+        vmin : None or float (default None)
+            Minimum value of cmap. If None, the minimum data value
+            in the column to be plotted is used.
+        vmax : None or float (default None)
+            Maximum value of cmap. If None, the maximum data value
+            in the column to be plotted is used.
+        markersize : str or float or sequence (default None)
+            Only applies to point geometries within a frame.
+            If a str, will use the values in the column of the frame specified
+            by markersize to set the size of markers. Otherwise can be a value
+            to apply to all points, or a sequence of the same length as the
+            number of points.
+        figsize : tuple of integers (default None)
+            Size of the resulting matplotlib.figure.Figure. If the argument
+            axes is given explicitly, figsize is ignored.
+        legend_kwds : dict (default None)
+            Keyword arguments to pass to :func:`matplotlib.pyplot.legend` or
+            :func:`matplotlib.pyplot.colorbar`.
+            Additional accepted keywords when `scheme` is specified:
+
+            fmt : string
+                A formatting specification for the bin edges of the classes in 
the
+                legend. For example, to have no decimals: ``{"fmt": 
"{:.0f}"}``.
+            labels : list-like
+                A list of legend labels to override the auto-generated labels.
+                Needs to have the same number of elements as the number of
+                classes (`k`).
+            interval : boolean (default False)
+                An option to control brackets from mapclassify legend.
+                If True, open/closed interval brackets are shown in the legend.
+        categories : list-like
+            Ordered list-like object of categories to be used for categorical 
plot.
+        classification_kwds : dict (default None)
+            Keyword arguments to pass to mapclassify
+        missing_kwds : dict (default None)
+            Keyword arguments specifying color options (as style_kwds)
+            to be passed on to geometries with missing values in addition to
+            or overwriting other style kwds. If None, geometries with missing
+            values are not plotted.
+        aspect : 'auto', 'equal', None or float (default 'auto')
+            Set aspect of axis. If 'auto', the default aspect for map plots is 
'equal'; if
+            however data are not projected (coordinates are long/lat), the 
aspect is by
+            default set to 1/cos(df_y * pi/180) with df_y the y coordinate of 
the middle of
+            the GeoDataFrame (the mean of the y range of bounding box) so that 
a long/lat
+            square appears square in the middle of the plot. This implies an
+            Equirectangular projection. If None, the aspect of `ax` won't be 
changed. It can
+            also be set manually (float) as the ratio of y-unit to x-unit.
+        autolim : bool (default True)
+            Update axes data limits to contain the new geometries.
+        **style_kwds : dict
+            Style options to be passed on to the actual plot function, such
+            as ``edgecolor``, ``facecolor``, ``linewidth``, ``markersize``,
+            ``alpha``.
+
+        Returns
+        -------
+        ax : matplotlib axes instance
+
+        Examples
+        --------
+        >>> import geodatasets
+        >>> df = geopandas.read_file(geodatasets.get_path("nybb"))
+        >>> df.head()  # doctest: +SKIP
+        BoroCode  ...                                           geometry
+        0         5  ...  MULTIPOLYGON (((970217.022 145643.332, 970227....
+        1         4  ...  MULTIPOLYGON (((1029606.077 156073.814, 102957...
+        2         3  ...  MULTIPOLYGON (((1021176.479 151374.797, 102100...
+        3         1  ...  MULTIPOLYGON (((981219.056 188655.316, 980940....
+        4         2  ...  MULTIPOLYGON (((1012821.806 229228.265, 101278...
+
+        >>> df.plot("BoroName", cmap="Set1")  # doctest: +SKIP
+        """
+        return self.to_geopandas().plot(*args, **kwargs)
+
     # 
============================================================================
     # SPATIAL OPERATIONS
     # 
============================================================================
diff --git a/python/sedona/geopandas/geoseries.py 
b/python/sedona/geopandas/geoseries.py
index 54bf9ecc6a..2075895681 100644
--- a/python/sedona/geopandas/geoseries.py
+++ b/python/sedona/geopandas/geoseries.py
@@ -1487,6 +1487,56 @@ class GeoSeries(GeoFrame, pspd.Series):
 
         return self._query_geometry_column(spark_expr)
 
+    def plot(self, *args, **kwargs):
+        """
+        Plot a GeoSeries.
+
+        Generate a plot of a GeoSeries geometry with matplotlib.
+
+        Note: This method is not scalable and requires collecting all data to 
the driver.
+
+        Parameters
+        ----------
+        s : Series
+            The GeoSeries to be plotted. Currently Polygon,
+            MultiPolygon, LineString, MultiLineString, Point and MultiPoint
+            geometries can be plotted.
+        cmap : str (default None)
+            The name of a colormap recognized by matplotlib. Any
+            colormap will work, but categorical colormaps are
+            generally recommended. Examples of useful discrete
+            colormaps include:
+
+                tab10, tab20, Accent, Dark2, Paired, Pastel1, Set1, Set2
+
+        color : str, np.array, pd.Series, List (default None)
+            If specified, all objects will be colored uniformly.
+        ax : matplotlib.pyplot.Artist (default None)
+            axes on which to draw the plot
+        figsize : pair of floats (default None)
+            Size of the resulting matplotlib.figure.Figure. If the argument
+            ax is given explicitly, figsize is ignored.
+        aspect : 'auto', 'equal', None or float (default 'auto')
+            Set aspect of axis. If 'auto', the default aspect for map plots is 
'equal'; if
+            however data are not projected (coordinates are long/lat), the 
aspect is by
+            default set to 1/cos(s_y * pi/180) with s_y the y coordinate of 
the middle of
+            the GeoSeries (the mean of the y range of bounding box) so that a 
long/lat
+            square appears square in the middle of the plot. This implies an
+            Equirectangular projection. If None, the aspect of `ax` won't be 
changed. It can
+            also be set manually (float) as the ratio of y-unit to x-unit.
+        autolim : bool (default True)
+            Update axes data limits to contain the new geometries.
+        **style_kwds : dict
+            Color options to be passed on to the actual plot function, such
+            as ``edgecolor``, ``facecolor``, ``linewidth``, ``markersize``,
+            ``alpha``.
+
+        Returns
+        -------
+        ax : matplotlib axes instance
+        """
+        return self.to_geopandas().plot(*args, **kwargs)
+
     def sjoin(
         self,
         other,
diff --git a/python/sedona/geopandas/io.py b/python/sedona/geopandas/io.py
index 3b4d8bdf02..a9da89eef7 100644
--- a/python/sedona/geopandas/io.py
+++ b/python/sedona/geopandas/io.py
@@ -236,3 +236,53 @@ def read_file(filename: str, format: Union[str, None] = 
None, **kwargs):
 
     internal = InternalFrame(spark_frame=sdf, 
index_spark_columns=index_spark_columns)
     return GeoDataFrame(ps.DataFrame(internal))
+
+
+def read_parquet(
+    path,
+    columns=None,
+    storage_options=None,
+    bbox=None,
+    to_pandas_kwargs=None,
+    **kwargs,
+):
+    """
+    Load a Parquet object from the file path, returning a GeoDataFrame.
+
+    * if no geometry columns are read, this will raise a ``ValueError`` - you
+      should use the pandas `read_parquet` method instead.
+
+    If 'crs' key is not present in the GeoParquet metadata associated with the
+    Parquet object, it will default to "OGC:CRS84" according to the 
specification.
+
+    Parameters
+    ----------
+    path : str, path object
+    columns : list-like of strings, default=None
+        Not currently supported in Sedona
+    storage_options : dict, optional
+        Not currently supported in Sedona
+    bbox : tuple, optional
+        Not currently supported in Sedona
+    to_pandas_kwargs : dict, optional
+        Not currently supported in Sedona
+
+    Returns
+    -------
+    GeoDataFrame
+
+    Examples
+    --------
+    from sedona.geopandas import read_parquet
+    >>> df = read_parquet("data.parquet")  # doctest: +SKIP
+
+    Specifying columns to read:
+
+    >>> df = read_parquet(
+    ...     "data.parquet",
+    ... )  # doctest: +SKIP
+    """
+    if kwargs:
+        warnings.warn(f"The given arguments are not supported in Sedona: 
{kwargs}")
+
+    return read_file(path, format="geoparquet", **kwargs)
diff --git a/python/tests/geopandas/test_geodataframe.py 
b/python/tests/geopandas/test_geodataframe.py
index 6c7b7e7c11..abda5010c3 100644
--- a/python/tests/geopandas/test_geodataframe.py
+++ b/python/tests/geopandas/test_geodataframe.py
@@ -45,7 +45,7 @@ from packaging.version import parse as parse_version
     parse_version(shapely.__version__) < parse_version("2.0.0"),
     reason=f"Tests require shapely>=2.0.0, but found v{shapely.__version__}",
 )
-class TestDataframe(TestGeopandasBase):
+class TestGeoDataFrame(TestGeopandasBase):
     @pytest.mark.parametrize(
         "obj",
         [
@@ -126,6 +126,21 @@ class TestDataframe(TestGeopandasBase):
         sgpd_df = sgpd.GeoDataFrame(obj)
         assert_frame_equal(pd_df, sgpd_df.to_pandas())
 
+    def test_plot(self):
+        # Just make sure it doesn't error
+        df = GeoDataFrame(
+            {
+                "value1": ["a", "b", "c"],
+                "geometry": [
+                    Point(0, 0),
+                    Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]),
+                    LineString([(0, 0), (1, 1)]),
+                ],
+                "value2": [1, 2, 3],
+            }
+        )
+        df.plot()
+
     def test_psdf(self):
         # this is to make sure the spark session works with pandas on spark api
         psdf = ps.DataFrame(
diff --git a/python/tests/geopandas/test_geoseries.py 
b/python/tests/geopandas/test_geoseries.py
index 261ac304fb..d73545f08a 100644
--- a/python/tests/geopandas/test_geoseries.py
+++ b/python/tests/geopandas/test_geoseries.py
@@ -95,6 +95,10 @@ class TestGeoSeries(TestGeopandasBase):
         sgpd_series = GeoSeries(obj)
         assert isinstance(sgpd_series, sgpd.GeoSeries)
 
+    def test_plot(self):
+        # Just make sure it doesn't error
+        self.geoseries.plot()
+
     def test_area(self):
         result = self.geoseries.area.to_pandas()
         expected = pd.Series([0.0, 0.0, 5.23, 5.23])
diff --git a/python/tests/geopandas/test_io.py 
b/python/tests/geopandas/test_io.py
index 8e0ab68404..052216e15b 100644
--- a/python/tests/geopandas/test_io.py
+++ b/python/tests/geopandas/test_io.py
@@ -23,7 +23,7 @@ import pandas as pd
 import geopandas as gpd
 import pyspark.pandas as ps
 from functools import partial
-from sedona.geopandas import GeoDataFrame, GeoSeries, read_file
+from sedona.geopandas import GeoDataFrame, GeoSeries, read_file, read_parquet
 from tests import tests_resource
 from tests.geopandas.test_geopandas_base import TestGeopandasBase
 from shapely.geometry import (
@@ -102,6 +102,7 @@ class TestIO(TestGeopandasBase):
         [
             partial(GeoDataFrame.from_file, format="geoparquet"),
             partial(read_file, format="GeoParquet"),
+            read_parquet,
         ],
     )
     def test_read_geoparquet(self, read_func):

(sedona) branch master updated: [GH-2192] Geopandas: Implement `plot()` for GeoDataFrame and GeoSeries + `read_parquet` (#2193)

Reply via email to