This is an automated email from the ASF dual-hosted git repository.
jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sedona.git
The following commit(s) were added to refs/heads/master by this push:
new 9e031f5f03 [GH-2169] Clean up Geopandas Documentation after the
refactor (#2170)
9e031f5f03 is described below
commit 9e031f5f0372a3afd5b8a8b28c9b4b535a506c30
Author: Peter Nguyen <[email protected]>
AuthorDate: Mon Jul 28 19:03:34 2025 -0700
[GH-2169] Clean up Geopandas Documentation after the refactor (#2170)
* Delete geopandas/internal.py and remove it from the docs
* Fix: use sindex instead of geoindex for docs
* Remove geopandas base from the docs
* Delete the methods and attributes in the GeoDataFrame and GeoSeries top
level docstrings
* Use autoclass instead, add inherited-members to geoseries, and move
geoseries to top
* Move geoseries and geodataframe to a new sedona.geopandas.api.rst file
* Remove the empty 'Module contents' at the bottom
* Clean up unnecessary headers
* Document parent class methods in GeoSeries, while ignoring 'Series'
methods
* Fix: Move comment to outside of autoclass geoseries entry
* Fix docstrings in geoseries to work with sphinx
* Fix docstrings to avoid sphinx warnings in geodataframe.py and sjoin.py
* Comment out non-implemented functions in base.py to avoid them showing in
the docs
* Remove unnecessary part of to_file
* Fully fix the rest of GeoDataFrame docstrings
* Update python/sedona/geopandas/geoseries.py
Co-authored-by: Copilot <[email protected]>
---------
Co-authored-by: Copilot <[email protected]>
---
python/sedona/doc/sedona.geopandas.api.rst | 20 +++
python/sedona/doc/sedona.geopandas.rst | 58 +------
python/sedona/doc/sedona.geopandas.tools.rst | 14 --
python/sedona/geopandas/base.py | 157 ++++++++---------
python/sedona/geopandas/geodataframe.py | 249 +++++++++++++--------------
python/sedona/geopandas/geoseries.py | 225 +++++++++++-------------
python/sedona/geopandas/internal.py | 33 ----
python/sedona/geopandas/tools/sjoin.py | 3 -
8 files changed, 322 insertions(+), 437 deletions(-)
diff --git a/python/sedona/doc/sedona.geopandas.api.rst
b/python/sedona/doc/sedona.geopandas.api.rst
new file mode 100644
index 0000000000..93c2e87a62
--- /dev/null
+++ b/python/sedona/doc/sedona.geopandas.api.rst
@@ -0,0 +1,20 @@
+sedona.geopandas package
+========================
+
+.. Option: inherited-members: Series
+ Document all methods from the parent base 'GeoFrame' class, but ignore the
methods in 'Series' class
+.. autoclass:: sedona.geopandas.GeoSeries
+ :members:
+ :undoc-members:
+
+ :inherited-members: Series
+
+.. autoclass:: sedona.geopandas.GeoDataFrame
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+.. autoclass:: sedona.geopandas.sindex.SpatialIndex
+ :members:
+ :undoc-members:
+ :show-inheritance:
diff --git a/python/sedona/doc/sedona.geopandas.rst
b/python/sedona/doc/sedona.geopandas.rst
index 3f212392e8..81ef43ee86 100644
--- a/python/sedona/doc/sedona.geopandas.rst
+++ b/python/sedona/doc/sedona.geopandas.rst
@@ -1,61 +1,9 @@
sedona.geopandas package
========================
-Subpackages
------------
-
.. toctree::
- :maxdepth: 4
-
- sedona.geopandas.tools
-
-Submodules
-----------
-
-sedona.geopandas.base module
-----------------------------
-
-.. automodule:: sedona.geopandas.base
- :members:
- :undoc-members:
- :show-inheritance:
-
-sedona.geopandas.geodataframe module
-------------------------------------
-
-.. automodule:: sedona.geopandas.geodataframe
- :members:
- :undoc-members:
- :show-inheritance:
+ :maxdepth: 2
-sedona.geopandas.geoindex module
---------------------------------
+ sedona.geopandas.api
-.. automodule:: sedona.geopandas.geoindex
- :members:
- :undoc-members:
- :show-inheritance:
-
-sedona.geopandas.geoseries module
----------------------------------
-
-.. automodule:: sedona.geopandas.geoseries
- :members:
- :undoc-members:
- :show-inheritance:
-
-sedona.geopandas.internal module
---------------------------------
-
-.. automodule:: sedona.geopandas.internal
- :members:
- :undoc-members:
- :show-inheritance:
-
-Module contents
----------------
-
-.. automodule:: sedona.geopandas
- :members:
- :undoc-members:
- :show-inheritance:
+ sedona.geopandas.tools
diff --git a/python/sedona/doc/sedona.geopandas.tools.rst
b/python/sedona/doc/sedona.geopandas.tools.rst
index 1e5bf8e129..0d52c5f911 100644
--- a/python/sedona/doc/sedona.geopandas.tools.rst
+++ b/python/sedona/doc/sedona.geopandas.tools.rst
@@ -1,21 +1,7 @@
sedona.geopandas.tools package
==============================
-Submodules
-----------
-
-sedona.geopandas.tools.sjoin module
------------------------------------
-
.. automodule:: sedona.geopandas.tools.sjoin
:members:
:undoc-members:
:show-inheritance:
-
-Module contents
----------------
-
-.. automodule:: sedona.geopandas.tools
- :members:
- :undoc-members:
- :show-inheritance:
diff --git a/python/sedona/geopandas/base.py b/python/sedona/geopandas/base.py
index 0393d2fa63..4bd28df24a 100644
--- a/python/sedona/geopandas/base.py
+++ b/python/sedona/geopandas/base.py
@@ -47,18 +47,19 @@ class GeoFrame(metaclass=ABCMeta):
A base class for both GeoDataFrame and GeoSeries.
"""
- def _reduce_for_geostat_function(
- self,
- sfun: Callable[["GeoSeries"], Column],
- name: str,
- axis: Optional[Axis] = None,
- numeric_only: bool = True,
- skipna: bool = True,
- **kwargs: Any,
- ) -> Union["GeoSeries", Scalar]:
- raise NotImplementedError("This method is not implemented yet.")
+ # def _reduce_for_geostat_function(
+ # self,
+ # sfun: Callable[["GeoSeries"], Column],
+ # name: str,
+ # axis: Optional[Axis] = None,
+ # numeric_only: bool = True,
+ # skipna: bool = True,
+ # **kwargs: Any,
+ # ) -> Union["GeoSeries", Scalar]:
+ # raise NotImplementedError("This method is not implemented yet.")
@property
+ @abstractmethod
def sindex(self) -> "SpatialIndex":
"""
Returns a spatial index built from the geometries.
@@ -82,6 +83,7 @@ class GeoFrame(metaclass=ABCMeta):
# We pass in self.geometry here to use the active geometry column for
dataframe
return _delegate_to_geometry_column("sindex", self.geometry)
+ @abstractmethod
def copy(self: GeoFrameLike) -> GeoFrameLike:
raise NotImplementedError("This method is not implemented yet.")
@@ -134,6 +136,7 @@ class GeoFrame(metaclass=ABCMeta):
return _delegate_to_geometry_column("geom_type", self)
@property
+ @abstractmethod
def type(self):
raise NotImplementedError("This method is not implemented yet.")
@@ -281,14 +284,14 @@ class GeoFrame(metaclass=ABCMeta):
"""
return _delegate_to_geometry_column("is_empty", self)
- def count_coordinates(self):
- raise NotImplementedError("This method is not implemented yet.")
+ # def count_coordinates(self):
+ # raise NotImplementedError("This method is not implemented yet.")
- def count_geometries(self):
- raise NotImplementedError("This method is not implemented yet.")
+ # def count_geometries(self):
+ # raise NotImplementedError("This method is not implemented yet.")
- def count_interior_rings(self):
- raise NotImplementedError("This method is not implemented yet.")
+ # def count_interior_rings(self):
+ # raise NotImplementedError("This method is not implemented yet.")
@property
def is_simple(self):
@@ -319,17 +322,17 @@ class GeoFrame(metaclass=ABCMeta):
"""
return _delegate_to_geometry_column("is_simple", self)
- @property
- def is_ring(self):
- raise NotImplementedError("This method is not implemented yet.")
+ # @property
+ # def is_ring(self):
+ # raise NotImplementedError("This method is not implemented yet.")
- @property
- def is_ccw(self):
- raise NotImplementedError("This method is not implemented yet.")
+ # @property
+ # def is_ccw(self):
+ # raise NotImplementedError("This method is not implemented yet.")
- @property
- def is_closed(self):
- raise NotImplementedError("This method is not implemented yet.")
+ # @property
+ # def is_closed(self):
+ # raise NotImplementedError("This method is not implemented yet.")
@property
def has_z(self):
@@ -363,8 +366,8 @@ class GeoFrame(metaclass=ABCMeta):
"""
return _delegate_to_geometry_column("has_z", self)
- def get_precision(self):
- raise NotImplementedError("This method is not implemented yet.")
+ # def get_precision(self):
+ # raise NotImplementedError("This method is not implemented yet.")
def get_geometry(self, index):
"""Returns the n-th geometry from a collection of geometries
(0-indexed).
@@ -514,18 +517,18 @@ class GeoFrame(metaclass=ABCMeta):
"""
return _delegate_to_geometry_column("centroid", self)
- def concave_hull(self, ratio=0.0, allow_holes=False):
- raise NotImplementedError("This method is not implemented yet.")
+ # def concave_hull(self, ratio=0.0, allow_holes=False):
+ # raise NotImplementedError("This method is not implemented yet.")
- @property
- def convex_hull(self):
- raise NotImplementedError("This method is not implemented yet.")
+ # @property
+ # def convex_hull(self):
+ # raise NotImplementedError("This method is not implemented yet.")
- def delaunay_triangles(self, tolerance=0.0, only_edges=False):
- raise NotImplementedError("This method is not implemented yet.")
+ # def delaunay_triangles(self, tolerance=0.0, only_edges=False):
+ # raise NotImplementedError("This method is not implemented yet.")
- def voronoi_polygons(self, tolerance=0.0, extend_to=None,
only_edges=False):
- raise NotImplementedError("This method is not implemented yet.")
+ # def voronoi_polygons(self, tolerance=0.0, extend_to=None,
only_edges=False):
+ # raise NotImplementedError("This method is not implemented yet.")
@property
def envelope(self):
@@ -569,43 +572,43 @@ class GeoFrame(metaclass=ABCMeta):
"""
return _delegate_to_geometry_column("envelope", self)
- def minimum_rotated_rectangle(self):
- raise NotImplementedError("This method is not implemented yet.")
+ # def minimum_rotated_rectangle(self):
+ # raise NotImplementedError("This method is not implemented yet.")
- @property
- def exterior(self):
- raise NotImplementedError("This method is not implemented yet.")
+ # @property
+ # def exterior(self):
+ # raise NotImplementedError("This method is not implemented yet.")
- def extract_unique_points(self):
- raise NotImplementedError("This method is not implemented yet.")
+ # def extract_unique_points(self):
+ # raise NotImplementedError("This method is not implemented yet.")
- def offset_curve(self, distance, quad_segs=8, join_style="round",
mitre_limit=5.0):
- raise NotImplementedError("This method is not implemented yet.")
+ # def offset_curve(self, distance, quad_segs=8, join_style="round",
mitre_limit=5.0):
+ # raise NotImplementedError("This method is not implemented yet.")
- @property
- def interiors(self):
- raise NotImplementedError("This method is not implemented yet.")
+ # @property
+ # def interiors(self):
+ # raise NotImplementedError("This method is not implemented yet.")
- def remove_repeated_points(self, tolerance=0.0):
- raise NotImplementedError("This method is not implemented yet.")
+ # def remove_repeated_points(self, tolerance=0.0):
+ # raise NotImplementedError("This method is not implemented yet.")
- def set_precision(self, grid_size, mode="valid_output"):
- raise NotImplementedError("This method is not implemented yet.")
+ # def set_precision(self, grid_size, mode="valid_output"):
+ # raise NotImplementedError("This method is not implemented yet.")
- def representative_point(self):
- raise NotImplementedError("This method is not implemented yet.")
+ # def representative_point(self):
+ # raise NotImplementedError("This method is not implemented yet.")
- def minimum_bounding_circle(self):
- raise NotImplementedError("This method is not implemented yet.")
+ # def minimum_bounding_circle(self):
+ # raise NotImplementedError("This method is not implemented yet.")
- def minimum_bounding_radius(self):
- raise NotImplementedError("This method is not implemented yet.")
+ # def minimum_bounding_radius(self):
+ # raise NotImplementedError("This method is not implemented yet.")
- def minimum_clearance(self):
- raise NotImplementedError("This method is not implemented yet.")
+ # def minimum_clearance(self):
+ # raise NotImplementedError("This method is not implemented yet.")
- def normalize(self):
- raise NotImplementedError("This method is not implemented yet.")
+ # def normalize(self):
+ # raise NotImplementedError("This method is not implemented yet.")
def make_valid(self, *, method="linework", keep_collapsed=True):
"""Repairs invalid geometries.
@@ -670,27 +673,27 @@ class GeoFrame(metaclass=ABCMeta):
"make_valid", self, method=method, keep_collapsed=keep_collapsed
)
- def reverse(self):
- raise NotImplementedError("This method is not implemented yet.")
+ # def reverse(self):
+ # raise NotImplementedError("This method is not implemented yet.")
- def segmentize(self, max_segment_length):
- raise NotImplementedError("This method is not implemented yet.")
+ # def segmentize(self, max_segment_length):
+ # raise NotImplementedError("This method is not implemented yet.")
- def transform(self, transformation, include_z=False):
- raise NotImplementedError("This method is not implemented yet.")
+ # def transform(self, transformation, include_z=False):
+ # raise NotImplementedError("This method is not implemented yet.")
- def force_2d(self):
- raise NotImplementedError("This method is not implemented yet.")
+ # def force_2d(self):
+ # raise NotImplementedError("This method is not implemented yet.")
- def force_3d(self, z=0):
- raise NotImplementedError("This method is not implemented yet.")
+ # def force_3d(self, z=0):
+ # raise NotImplementedError("This method is not implemented yet.")
- def line_merge(self, directed=False):
- raise NotImplementedError("This method is not implemented yet.")
+ # def line_merge(self, directed=False):
+ # raise NotImplementedError("This method is not implemented yet.")
- @property
- def unary_union(self):
- raise NotImplementedError("This method is not implemented yet.")
+ # @property
+ # def unary_union(self):
+ # raise NotImplementedError("This method is not implemented yet.")
def union_all(self, method="unary", grid_size=None) -> BaseGeometry:
"""Returns a geometry containing the union of all geometries in the
diff --git a/python/sedona/geopandas/geodataframe.py
b/python/sedona/geopandas/geodataframe.py
index d368024c6b..d3d0d21ce1 100644
--- a/python/sedona/geopandas/geodataframe.py
+++ b/python/sedona/geopandas/geodataframe.py
@@ -213,34 +213,6 @@ class GeoDataFrame(GeoFrame, pspd.DataFrame):
index : Index or array-like, optional
Index to use for the resulting frame.
- Attributes
- ----------
- geometry : GeoSeries
- The active geometry column.
- crs : pyproj.CRS
- The Coordinate Reference System (CRS) for the geometries.
- active_geometry_name : str
- Name of the active geometry column.
- area : Series
- Area of each geometry in CRS units.
- sindex : SpatialIndex
- Spatial index for the geometries.
-
- Methods
- -------
- buffer(distance)
- Buffer geometries by specified distance.
- sjoin(right, how='inner', predicate='intersects')
- Spatial join with another GeoDataFrame.
- set_geometry(col, drop=False, inplace=False)
- Set the active geometry column.
- rename_geometry(col, inplace=False)
- Rename the active geometry column.
- to_parquet(path, **kwargs)
- Save to GeoParquet format.
- copy(deep=False)
- Make a copy of the GeoDataFrame.
-
Examples
--------
>>> from shapely.geometry import Point, Polygon
@@ -871,16 +843,19 @@ class GeoDataFrame(GeoFrame, pspd.DataFrame):
"""
Make a copy of this GeoDataFrame object.
- Parameters:
- - deep: bool, default False
+ Parameters
+ ----------
+ deep : bool, default False
This parameter is not supported but just a dummy parameter to
match pandas.
- Returns:
- - GeoDataFrame: A copy of this GeoDataFrame object.
+ Returns
+ -------
+ GeoDataFrame
+ A copy of this GeoDataFrame object.
- Examples:
+ Examples
+ --------
>>> from shapely.geometry import Point
- >>> import geopandas as gpd
>>> from sedona.geopandas import GeoDataFrame
>>> gdf = GeoDataFrame([{"geometry": Point(1, 1), "value1": 2,
"value2": 3}])
@@ -1028,53 +1003,49 @@ class GeoDataFrame(GeoFrame, pspd.DataFrame):
) -> str:
"""
Returns a GeoJSON representation of the ``GeoDataFrame`` as a string.
+
Parameters
----------
na : {'null', 'drop', 'keep'}, default 'null'
- Indicates how to output missing (NaN) values in the GeoDataFrame.
- See below.
- show_bbox : bool, optional, default: False
- Include bbox (bounds) in the geojson
- drop_id : bool, default: False
- Whether to retain the index of the GeoDataFrame as the id property
- in the generated GeoJSON. Default is False, but may want True
- if the index is just arbitrary row numbers.
- to_wgs84: bool, optional, default: False
- If the CRS is set on the active geometry column it is exported as
- WGS84 (EPSG:4326) to meet the `2016 GeoJSON specification
- <https://tools.ietf.org/html/rfc7946>`_.
- Set to True to force re-projection and set to False to ignore CRS.
False by
- default.
- Notes
- -----
- The remaining *kwargs* are passed to json.dumps().
- Missing (NaN) values in the GeoDataFrame can be represented as follows:
- - ``null``: output the missing entries as JSON null.
- - ``drop``: remove the property from the feature. This applies to each
- feature individually so that features may have different properties.
- - ``keep``: output the missing entries as NaN.
- If the GeoDataFrame has a defined CRS, its definition will be included
- in the output unless it is equal to WGS84 (default GeoJSON CRS) or not
- possible to represent in the URN OGC format, or unless
``to_wgs84=True``
- is specified.
+ Dictates how to represent missing (NaN) values in the output.
+ - ``null``: Outputs missing entries as JSON `null`.
+ - ``drop``: Removes the entire property from a feature if its
+ value is missing.
+ - ``keep``: Outputs missing entries as ``NaN``.
+ show_bbox : bool, default False
+ If True, the `bbox` (bounds) of the geometries is included in the
+ output.
+ drop_id : bool, default False
+ If True, the GeoDataFrame index is not written to the 'id' field
+ of each GeoJSON Feature.
+ to_wgs84 : bool, default False
+ If True, all geometries are transformed to WGS84 (EPSG:4326) to
+ meet the `2016 GeoJSON specification
+ <https://tools.ietf.org/html/rfc7946>`_. When False, the current
+ CRS is exported if it's set.
+ **kwargs
+ Additional keyword arguments passed to `json.dumps()`.
+
+ Returns
+ -------
+ str
+ A GeoJSON representation of the GeoDataFrame.
+
+ See Also
+ --------
+ GeoDataFrame.to_file : Write a ``GeoDataFrame`` to a file, which can be
+ used for GeoJSON format.
+
Examples
--------
>>> from sedona.geopandas import GeoDataFrame
>>> from shapely.geometry import Point
>>> d = {'col1': ['name1', 'name2'], 'geometry': [Point(1, 2),
Point(2, 1)]}
>>> gdf = GeoDataFrame(d, crs="EPSG:3857")
- >>> gdf
- col1 geometry
- 0 name1 POINT (1 2)
- 1 name2 POINT (2 1)
>>> gdf.to_json()
- '{"type": "FeatureCollection", "features": [{"id": "0", "type":
"Feature", \
-"properties": {"col1": "name1"}, "geometry": {"type": "Point", "coordinates":
[1.0,\
- 2.0]}}, {"id": "1", "type": "Feature", "properties": {"col1": "name2"},
"geometry"\
-: {"type": "Point", "coordinates": [2.0, 1.0]}}], "crs": {"type": "name",
"properti\
-es": {"name": "urn:ogc:def:crs:EPSG::3857"}}}'
- Alternatively, you can write GeoJSON to file:
- >>> gdf.to_file(path, driver="GeoJSON") # doctest: +SKIP
+ '{"type": "FeatureCollection", "features": [{"id": "0", "type":
"Feature", "properties": {"col1": "name1"}, "geometry": {"type": "Point",
"coordinates": [1.0, 2.0]}}, {"id": "1", "type": "Feature", "properties":
{"col1": "name2"}, "geometry": {"type": "Point", "coordinates": [2.0, 1.0]}}],
"crs": {"type": "name", "properties": {"name": "urn:ogc:def:crs:EPSG::3857"}}}'
+
+
See also
--------
GeoDataFrame.to_file : write GeoDataFrame to file
@@ -1239,8 +1210,7 @@ es": {"name": "urn:ogc:def:crs:EPSG::3857"}}}'
The type of join:
* 'left': use keys from left_df; retain only left_df geometry
column
* 'right': use keys from right_df; retain only right_df geometry
column
- * 'inner': use intersection of keys from both dfs; retain only
- left_df geometry column
+ * 'inner': use intersection of keys from both dfs; retain only
left_df geometry column
predicate : str, default 'intersects'
Binary predicate. Valid values: 'intersects', 'contains',
'within', 'dwithin'
lsuffix : str, default 'left'
@@ -1252,6 +1222,8 @@ es": {"name": "urn:ogc:def:crs:EPSG::3857"}}}'
on_attribute : str, list or tuple, optional
Column name(s) to join on as an additional join restriction.
These must be found in both DataFrames.
+ **kwargs
+ Additional keyword arguments passed to the spatial join function.
Returns
-------
@@ -1262,7 +1234,7 @@ es": {"name": "urn:ogc:def:crs:EPSG::3857"}}}'
--------
>>> from shapely.geometry import Point, Polygon
>>> from sedona.geopandas import GeoDataFrame
- >>>
+
>>> polygons = GeoDataFrame({
... 'geometry': [Polygon([(0, 0), (0, 1), (1, 1), (1, 0)])],
... 'value': [1]
@@ -1295,30 +1267,32 @@ es": {"name": "urn:ogc:def:crs:EPSG::3857"}}}'
def from_file(
cls, filename: str, format: str | None = None, **kwargs
) -> GeoDataFrame:
- """
- Alternate constructor to create a ``GeoDataFrame`` from a file.
+ """Alternate constructor to create a ``GeoDataFrame`` from a file.
Parameters
----------
filename : str
File path or file handle to read from. If the path is a directory,
- Sedona will read all files in the directory into a dataframe.
- format : str, default None
- The format of the file to read. If None, Sedona will infer the
format
- from the file extension. Note, inferring the format from the file
extension
- is not supported for directories.
- Options:
- - "shapefile"
- - "geojson"
- - "geopackage"
- - "geoparquet"
-
- table_name : str, default None
- The name of the table to read from a geopackage file. Required if
format is geopackage.
+ Sedona will read all files in that directory.
+ format : str, optional
+ The format of the file to read, by default None. If None, Sedona
+ infers the format from the file extension. Note that format
+ inference is not supported for directories. Available formats are
+ "shapefile", "geojson", "geopackage", and "geoparquet".
+ table_name : str, optional
+ The name of the table to read from a GeoPackage file, by default
+ None. This is required if ``format`` is "geopackage".
+ **kwargs
+ Additional keyword arguments passed to the file reader.
- See also
+ Returns
+ -------
+ GeoDataFrame
+ A new GeoDataFrame created from the file.
+
+ See Also
--------
- GeoDataFrame.to_file : write GeoDataFrame to file
+ GeoDataFrame.to_file : Write a ``GeoDataFrame`` to a file.
"""
return sgpd.io.read_file(filename, format, **kwargs)
@@ -1335,71 +1309,86 @@ es": {"name": "urn:ogc:def:crs:EPSG::3857"}}}'
Parameters
----------
- path : string
+ path : str
File path or file handle to write to.
- driver : string, default None
+
+ driver : str, default None
The format driver used to write the file.
If not specified, it attempts to infer it from the file extension.
If no extension is specified, Sedona will error.
- Options:
- - "geojson"
- - "geopackage"
- - "geoparquet"
+
+ Options: "geojson", "geopackage", "geoparquet"
+
schema : dict, default None
- Not applicable to Sedona's implementation
+ Not applicable to Sedona's implementation.
+
index : bool, default None
If True, write index into one or more columns (for MultiIndex).
Default None writes the index into one or more columns only if
the index is named, is a MultiIndex, or has a non-integer data
type. If False, no index is written.
- mode : string, default 'w'
- The write mode, 'w' to overwrite the existing file and 'a' to
append.
- 'overwrite' and 'append' are equivalent to 'w' and 'a'
respectively.
- crs : pyproj.CRS, default None
- If specified, the CRS is passed to Fiona to
- better control how the file is written. If None, GeoPandas
- will determine the crs based on crs df attribute.
- The value can be anything accepted
- by :meth:`pyproj.CRS.from_user_input()
<pyproj.crs.CRS.from_user_input>`,
- such as an authority string (eg "EPSG:4326") or a WKT string.
- engine : str
- Not applicable to Sedona's implementation
- metadata : dict[str, str], default None
- Optional metadata to be stored in the file. Keys and values must be
- strings. Supported only for "GPKG" driver. Not supported by Sedona
- **kwargs :
- Keyword args to be passed to the engine, and can be used to write
- to multi-layer data, store data within archives (zip files), etc.
- In case of the "pyogrio" engine, the keyword arguments are passed
to
- `pyogrio.write_dataframe`. In case of the "fiona" engine, the
keyword
- arguments are passed to fiona.open`. For more information on
possible
- keywords, type: ``import pyogrio; help(pyogrio.write_dataframe)``.
+
+ **kwargs
+ Additional keyword arguments:
+
+ mode : str, default 'w'
+ The write mode, 'w' to overwrite the existing file and 'a' to
append.
+ 'overwrite' and 'append' are equivalent to 'w' and 'a'
respectively.
+
+ crs : pyproj.CRS, default None
+ If specified, the CRS is passed to Fiona to better control how
the file is written.
+ If None, GeoPandas will determine the CRS based on the ``crs``
attribute.
+ The value can be anything accepted by
+ :meth:`pyproj.CRS.from_user_input
<pyproj.crs.CRS.from_user_input>`,
+ such as an authority string (e.g., "EPSG:4326") or a WKT
string.
+
+ engine : str
+ Not applicable to Sedona's implementation.
+
+ metadata : dict[str, str], default None
+ Optional metadata to be stored in the file. Keys and values
must be
+ strings. Supported only for "GPKG" driver. Not supported by
Sedona.
Examples
--------
+ >>> from shapely.geometry import Point, LineString
+ >>> from sedona.geopandas import GeoDataFrame
- >>> gdf = GeoDataFrame({"geometry": [Point(0, 0), LineString([(0, 0),
(1, 1)])], "int": [1, 2]}
- >>> gdf.to_file(filepath, format="geoparquet")
+ >>> gdf = GeoDataFrame({
+ ... "geometry": [Point(0, 0), LineString([(0, 0), (1, 1)])],
+ ... "int": [1, 2]
+ ... })
+ >>> gdf.to_file(filepath, driver="geoparquet")
- With selected drivers you can also append to a file with `mode="a"`:
+ With selected drivers you can also append to a file with ``mode="a"``:
- >>> gdf.to_file(gdf, driver="geojson", mode="a")
+ >>> gdf.to_file(filepath, driver="geojson", mode="a")
- When the index is of non-integer dtype, index=None (default) is
treated as True, writing the index to the file.
+ When the index is of non-integer dtype, ``index=None`` (default) is
treated as True,
+ writing the index to the file.
>>> gdf = GeoDataFrame({"geometry": [Point(0, 0)]}, index=["a", "b"])
- >>> gdf.to_file(gdf, driver="geoparquet")
+ >>> gdf.to_file(filepath, driver="geoparquet")
"""
sgpd.io._to_file(self, path, driver, index, **kwargs)
def to_parquet(self, path, **kwargs):
"""
- Write the GeoSeries to a GeoParquet file.
- Parameters:
- - path: str
+ Write the GeoDataFrame to a GeoParquet file.
+
+ Parameters
+ ----------
+ path : str
The file path where the GeoParquet file will be written.
- - kwargs: Any
+ **kwargs
Additional arguments to pass to the Sedona DataFrame output
function.
+
+ Examples
+ --------
+ >>> from shapely.geometry import Point
+ >>> from sedona.geopandas import GeoDataFrame
+ >>> gdf = GeoDataFrame({"geometry": [Point(0, 0), Point(1, 1)],
"value": [1, 2]})
+ >>> gdf.to_parquet("output.parquet")
"""
self.to_file(path, driver="geoparquet", **kwargs)
diff --git a/python/sedona/geopandas/geoseries.py
b/python/sedona/geopandas/geoseries.py
index 35525f98a4..e93c8a7cdd 100644
--- a/python/sedona/geopandas/geoseries.py
+++ b/python/sedona/geopandas/geoseries.py
@@ -249,36 +249,6 @@ class GeoSeries(GeoFrame, pspd.Series):
copy : bool, default False
Whether to copy the input data.
- Attributes
- ----------
- crs : pyproj.CRS
- The Coordinate Reference System (CRS) for the geometries.
- area : Series
- Area of each geometry in CRS units.
- length : Series
- Length/perimeter of each geometry in CRS units.
- bounds : DataFrame
- Bounding box coordinates for each geometry.
- geometry : GeoSeries
- The geometry column (returns self).
- sindex : SpatialIndex
- Spatial index for the geometries.
-
- Methods
- -------
- buffer(distance)
- Buffer geometries by specified distance.
- intersection(other)
- Compute intersection with other geometries.
- intersects(other)
- Test if geometries intersect with other geometries.
- to_geopandas()
- Convert to GeoPandas GeoSeries.
- to_crs(crs)
- Transform geometries to a different CRS.
- set_crs(crs)
- Set the CRS without transforming geometries.
-
Examples
--------
>>> from shapely.geometry import Point, Polygon
@@ -789,20 +759,23 @@ class GeoSeries(GeoFrame, pspd.Series):
return SpatialIndex(self._internal.spark_frame,
column_name=geometry_column)
def copy(self, deep=False):
- """
- Make a copy of this GeoSeries object.
+ """Make a copy of this GeoSeries object.
- Parameters:
- - deep: bool, default False
- If True, a deep copy of the data is made. Otherwise, a shallow
copy is made.
+ Parameters
+ ----------
+ deep : bool, default False
+ If True, a deep copy of the data is made. Otherwise, a shallow
+ copy is made.
- Returns:
- - GeoSeries: A copy of this GeoSeries object.
+ Returns
+ -------
+ GeoSeries
+ A copy of this GeoSeries object.
- Examples:
+ Examples
+ --------
>>> from shapely.geometry import Point
>>> from sedona.geopandas import GeoSeries
-
>>> gs = GeoSeries([Point(1, 1), Point(2, 2)])
>>> gs_copy = gs.copy()
>>> print(gs_copy)
@@ -1496,26 +1469,32 @@ class GeoSeries(GeoFrame, pspd.Series):
on_attribute=None,
**kwargs,
):
- """
- Perform a spatial join between two GeoSeries.
- Parameters:
- - other: GeoSeries
- - how: str, default 'inner'
+ """Perform a spatial join between two GeoSeries.
+
+ Parameters
+ ----------
+ other : GeoSeries
+ The GeoSeries to join with.
+ how : str, default 'inner'
The type of join to perform.
- - predicate: str, default 'intersects'
+ predicate : str, default 'intersects'
The spatial predicate to use for the join.
- - lsuffix: str, default 'left'
+ lsuffix : str, default 'left'
Suffix to apply to the left GeoSeries' column names.
- - rsuffix: str, default 'right'
+ rsuffix : str, default 'right'
Suffix to apply to the right GeoSeries' column names.
- - distance: float, optional
+ distance : float, optional
The distance threshold for the join.
- - on_attribute: str, optional
+ on_attribute : str, optional
The attribute to join on.
- - kwargs: Any
+ **kwargs
Additional arguments to pass to the join function.
- Returns:
- - GeoSeries
+
+ Returns
+ -------
+ GeoSeries
+ A new GeoSeries containing the result of the spatial join.
+
"""
from sedona.geopandas import sjoin
@@ -1654,30 +1633,25 @@ class GeoSeries(GeoFrame, pspd.Series):
def from_file(
cls, filename: str, format: Union[str, None] = None, **kwargs
) -> "GeoSeries":
- """
- Alternate constructor to create a ``GeoDataFrame`` from a file.
+ """Alternate constructor to create a ``GeoDataFrame`` from a file.
Parameters
----------
filename : str
File path or file handle to read from. If the path is a directory,
- Sedona will read all files in the directory into a dataframe.
- format : str, default None
- The format of the file to read. If None, Sedona will infer the
format
- from the file extension. Note, inferring the format from the file
extension
- is not supported for directories.
- Options:
- - "shapefile"
- - "geojson"
- - "geopackage"
- - "geoparquet"
-
- table_name : str, default None
- The name of the table to read from a geopackage file. Required if
format is geopackage.
+ Sedona will read all files in that directory.
+ format : str, optional
+ The format of the file to read, by default None. If None, Sedona
+ infers the format from the file extension. Note that format
+ inference is not supported for directories. Available formats are
+ "shapefile", "geojson", "geopackage", and "geoparquet".
+ table_name : str, optional
+ The name of the table to read from a GeoPackage file, by default
+ None. This is required if ``format`` is "geopackage".
- See also
+ See Also
--------
- GeoDataFrame.to_file : write GeoDataFrame to file
+ GeoDataFrame.to_file : Write a ``GeoDataFrame`` to a file.
"""
df = sgpd.io.read_file(filename, format, **kwargs)
return GeoSeries(df.geometry, crs=df.crs)
@@ -2770,76 +2744,77 @@ e": "Feature", "properties": {}, "geometry": {"type":
"Point", "coordinates": [3
index: Union[bool, None] = None,
**kwargs,
):
- """
- Write the ``GeoSeries`` to a file.
+ """Write the ``GeoSeries`` to a file.
Parameters
----------
- path : string
+ path : str
File path or file handle to write to.
- driver : string, default None
- The format driver used to write the file.
- If not specified, it attempts to infer it from the file extension.
- If no extension is specified, Sedona will error.
- Options:
- - "geojson"
- - "geopackage"
- - "geoparquet"
- schema : dict, default None
- Not applicable to Sedona's implementation
- index : bool, default None
- If True, write index into one or more columns (for MultiIndex).
- Default None writes the index into one or more columns only if
- the index is named, is a MultiIndex, or has a non-integer data
- type. If False, no index is written.
- mode : string, default 'w'
- The write mode, 'w' to overwrite the existing file and 'a' to
append.
- 'overwrite' and 'append' are equivalent to 'w' and 'a'
respectively.
- crs : pyproj.CRS, default None
- If specified, the CRS is passed to Fiona to
- better control how the file is written. If None, GeoPandas
- will determine the crs based on crs df attribute.
- The value can be anything accepted
- by :meth:`pyproj.CRS.from_user_input()
<pyproj.crs.CRS.from_user_input>`,
- such as an authority string (eg "EPSG:4326") or a WKT string.
- engine : str
- Not applicable to Sedona's implementation
- metadata : dict[str, str], default None
- Optional metadata to be stored in the file. Keys and values must be
- strings. Supported only for "GPKG" driver. Not supported by Sedona
- **kwargs :
- Keyword args to be passed to the engine, and can be used to write
- to multi-layer data, store data within archives (zip files), etc.
- In case of the "pyogrio" engine, the keyword arguments are passed
to
- `pyogrio.write_dataframe`. In case of the "fiona" engine, the
keyword
- arguments are passed to fiona.open`. For more information on
possible
- keywords, type: ``import pyogrio; help(pyogrio.write_dataframe)``.
+ driver : str, optional
+ The format driver used to write the file, by default None. If not
+ specified, it's inferred from the file extension. Available formats
+ are "geojson", "geopackage", and "geoparquet".
+ index : bool, optional
+ If True, writes the index as a column. If False, no index is
+ written. By default None, the index is written only if it is named,
+ is a MultiIndex, or has a non-integer data type.
+ mode : str, default 'w'
+ The write mode: 'w' to overwrite the existing file or 'a' to
append.
+ crs : pyproj.CRS, optional
+ The coordinate reference system to write. If None, it is determined
+ from the ``GeoSeries`` `crs` attribute. The value can be anything
+ accepted by :meth:`pyproj.CRS.from_user_input()`, such as an
+ authority string (e.g., "EPSG:4326") or a WKT string.
+ **kwargs
+ Additional keyword arguments passed to the underlying writing
engine.
Examples
--------
+ >>> from shapely.geometry import Point, LineString
+ >>> from sedona.geopandas import GeoSeries
+ >>> # Note: Examples write to temporary files for demonstration
+ >>> import tempfile
+ >>> import os
+
+ Create a GeoSeries:
+ >>> gs = GeoSeries(
+ ... [Point(0, 0), LineString([(1, 1), (2, 2)])],
+ ... index=["a", "b"]
+ ... )
- >>> gdf = GeoDataFrame({"geometry": [Point(0, 0), LineString([(0, 0),
(1, 1)])], "int": [1, 2]}
- >>> gdf.to_file(filepath, format="geoparquet")
-
- With selected drivers you can also append to a file with `mode="a"`:
-
- >>> gdf.to_file(gdf, driver="geojson", mode="a")
-
- When the index is of non-integer dtype, index=None (default) is
treated as True, writing the index to the file.
+ Save to a GeoParquet file:
+ >>> path_parquet = os.path.join(tempfile.gettempdir(), "data.parquet")
+ >>> gs.to_file(path_parquet, driver="geoparquet")
- >>> gdf = GeoDataFrame({"geometry": [Point(0, 0)]}, index=["a", "b"])
- >>> gdf.to_file(gdf, driver="geoparquet")
+ Append to a GeoJSON file:
+ >>> path_json = os.path.join(tempfile.gettempdir(), "data.json")
+ >>> gs.to_file(path_json, driver="geojson", mode='a')
"""
self.to_geoframe().to_file(path, driver, index=index, **kwargs)
def to_parquet(self, path, **kwargs):
- """
- Write the GeoSeries to a GeoParquet file.
- Parameters:
- - path: str
+ """Write the GeoSeries to a GeoParquet file.
+
+ Parameters
+ ----------
+ path : str
The file path where the GeoParquet file will be written.
- - kwargs: Any
- Additional arguments to pass to the Sedona DataFrame output
function.
+ **kwargs
+ Additional keyword arguments passed to the underlying writing
function.
+
+ Returns
+ -------
+ None
+
+ Examples
+ --------
+ >>> from shapely.geometry import Point
+ >>> from sedona.geopandas import GeoSeries
+ >>> import tempfile
+ >>> import os
+ >>> gs = GeoSeries([Point(1, 1), Point(2, 2)])
+ >>> file_path = os.path.join(tempfile.gettempdir(),
"my_geodata.parquet")
+ >>> gs.to_parquet(file_path)
"""
self.to_geoframe().to_file(path, driver="geoparquet", **kwargs)
diff --git a/python/sedona/geopandas/internal.py
b/python/sedona/geopandas/internal.py
deleted file mode 100644
index e2d62a13b1..0000000000
--- a/python/sedona/geopandas/internal.py
+++ /dev/null
@@ -1,33 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import pandas as pd
-from pyspark._typing import F
-from pyspark.pandas.internal import InternalFrame as InternalPySparkFrame
-
-
-class InternalGeoFrame(InternalPySparkFrame):
-
- @staticmethod
- def from_pandas(pdf: pd.DataFrame) -> "InternalGeoFrame":
- internal_frame = InternalPySparkFrame.from_pandas(pdf)
- sdf = internal_frame.spark_frame.withColumn("geometry", F.lit(None))
- return InternalGeoFrame(
- spark_frame=sdf,
- index_spark_columns=internal_frame.index_spark_columns,
- data_spark_columns=internal_frame.data_spark_columns,
- )
diff --git a/python/sedona/geopandas/tools/sjoin.py
b/python/sedona/geopandas/tools/sjoin.py
index f2526018e0..2b9ce1923c 100644
--- a/python/sedona/geopandas/tools/sjoin.py
+++ b/python/sedona/geopandas/tools/sjoin.py
@@ -227,9 +227,6 @@ def sjoin(
):
"""Spatial join of two GeoDataFrames.
- See the User Guide page :doc:`../../user_guide/mergingdata` for details.
-
-
Parameters
----------
left_df, right_df : GeoDataFrames