This is an automated email from the ASF dual-hosted git repository.
jiayu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/sedona-db.git
The following commit(s) were added to refs/heads/main by this push:
new 4438ed2 Add geopandas interop (#41)
4438ed2 is described below
commit 4438ed2e8a1f1f059f97f24c7f74a6b513d2f407
Author: Matthew Powers <[email protected]>
AuthorDate: Wed Sep 10 15:11:22 2025 -0400
Add geopandas interop (#41)
---
README.md | 4 +-
docs/geopandas-interop.ipynb | 269 +++++++++++++++++++++++++++++++++++++++++++
docs/some_data.json | 33 ++++++
3 files changed, 304 insertions(+), 2 deletions(-)
diff --git a/README.md b/README.md
index c61d0bc..797d257 100644
--- a/README.md
+++ b/README.md
@@ -27,7 +27,7 @@ SedonaDB only runs on a single machine, so it’s perfect for
processing smaller
## Install
-You can install Python SedonaDB with `pip install apache-sedona`.
+You can install Python SedonaDB with `pip install apache-sedona[db]`.
## Overture buildings example
@@ -37,7 +37,7 @@ Start by establishing a connection:
```python
import sedonadb
-sedona = sedonadb.connect()
+sd = sedonadb.connect()
```
Set some AWS environment variables to access the data:
diff --git a/docs/geopandas-interop.ipynb b/docs/geopandas-interop.ipynb
new file mode 100644
index 0000000..ce11116
--- /dev/null
+++ b/docs/geopandas-interop.ipynb
@@ -0,0 +1,269 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "caa5f2d5-28bb-4ce9-8a11-92646b3a9f6c",
+ "metadata": {},
+ "source": [
+ "# GeoPandas interoperability\n",
+ "\n",
+ "This example shows how to read a GeoJSON file with GeoPandas and then
convert the GeoPandas DataFrame to a SedonaDB DataFrame.\n",
+ "\n",
+ "Any file type that can be read by GeoPandas can also be read into a
SedonaDB DataFrame!"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "0434bead-2628-4844-a3f6-2f9c15a21899",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import sedonadb\n",
+ "import geopandas as gpd\n",
+ "\n",
+ "sd = sedonadb.connect()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "618b5d1d-ac2b-4786-ae5b-0d10efd6a8d4",
+ "metadata": {},
+ "source": [
+ "### Read a GeoJSON file with GeoPandas"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "2691bd24-9b2d-4cf9-958d-4ef01d967cb3",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "gdf = gpd.read_file(\"some_data.json\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "cd367a73-acd3-41cf-b892-7d863c370d5f",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "<div>\n",
+ "<style scoped>\n",
+ " .dataframe tbody tr th:only-of-type {\n",
+ " vertical-align: middle;\n",
+ " }\n",
+ "\n",
+ " .dataframe tbody tr th {\n",
+ " vertical-align: top;\n",
+ " }\n",
+ "\n",
+ " .dataframe thead th {\n",
+ " text-align: right;\n",
+ " }\n",
+ "</style>\n",
+ "<table border=\"1\" class=\"dataframe\">\n",
+ " <thead>\n",
+ " <tr style=\"text-align: right;\">\n",
+ " <th></th>\n",
+ " <th>prop0</th>\n",
+ " <th>prop1</th>\n",
+ " <th>geometry</th>\n",
+ " </tr>\n",
+ " </thead>\n",
+ " <tbody>\n",
+ " <tr>\n",
+ " <th>0</th>\n",
+ " <td>value0</td>\n",
+ " <td>None</td>\n",
+ " <td>POINT (102 0.5)</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>1</th>\n",
+ " <td>value1</td>\n",
+ " <td>0.0</td>\n",
+ " <td>LINESTRING (102 0, 103 1, 104 0, 105 1)</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <th>2</th>\n",
+ " <td>value2</td>\n",
+ " <td>{ \"this\": \"that\" }</td>\n",
+ " <td>POLYGON ((100 0, 101 0, 101 1, 100 1, 100 0))</td>\n",
+ " </tr>\n",
+ " </tbody>\n",
+ "</table>\n",
+ "</div>"
+ ],
+ "text/plain": [
+ " prop0 prop1
geometry\n",
+ "0 value0 None POINT
(102 0.5)\n",
+ "1 value1 0.0 LINESTRING (102 0, 103 1, 104 0,
105 1)\n",
+ "2 value2 { \"this\": \"that\" } POLYGON ((100 0, 101 0, 101 1, 100
1, 100 0))"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "gdf"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "454e08a3-de65-4151-9d29-5d5ee8cf31d3",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "<class 'geopandas.geodataframe.GeoDataFrame'>\n",
+ "RangeIndex: 3 entries, 0 to 2\n",
+ "Data columns (total 3 columns):\n",
+ " # Column Non-Null Count Dtype \n",
+ "--- ------ -------------- ----- \n",
+ " 0 prop0 3 non-null object \n",
+ " 1 prop1 2 non-null object \n",
+ " 2 geometry 3 non-null geometry\n",
+ "dtypes: geometry(1), object(2)\n",
+ "memory usage: 204.0+ bytes\n"
+ ]
+ }
+ ],
+ "source": [
+ "gdf.info()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a5837268-1620-4b2b-bf37-cb6e282daedf",
+ "metadata": {},
+ "source": [
+ "### Convert the GeoPandas DataFrame to a SedonaDB DataFrame"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "id": "385f6333-411d-4d1f-a09b-8816cccceabc",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df = sd.create_data_frame(gdf)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "id": "186059ae-4cf8-48ec-878a-71e7a39ac07e",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+
"┌────────┬────────────────────┬──────────────────────────────────────────┐\n",
+ "│ prop0 ┆ prop1 ┆ geometry
│\n",
+ "│ utf8 ┆ utf8 ┆ geometry
│\n",
+
"╞════════╪════════════════════╪══════════════════════════════════════════╡\n",
+ "│ value0 ┆ ┆ POINT(102 0.5)
│\n",
+
"├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
+ "│ value1 ┆ 0.0 ┆ LINESTRING(102 0,103 1,104 0,105 1)
│\n",
+
"├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
+ "│ value2 ┆ { \"this\": \"that\" } ┆ POLYGON((100 0,101 0,101 1,100
1,100 0)) │\n",
+
"└────────┴────────────────────┴──────────────────────────────────────────┘\n"
+ ]
+ }
+ ],
+ "source": [
+ "df.show()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "2f09cbbe-86b5-4eb4-b920-1b12f018d1a6",
+ "metadata": {},
+ "source": [
+ "## Read a FlatGeobuf file\n",
+ "\n",
+ "This code demonstrates how to read a FlatGeobuf file with GeoPandas and
then convert it to a SedonaDB DataFrame."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "965ae9f3-293b-4e8e-92bf-1359a482bca3",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "path =
\"https://raw.githubusercontent.com/geoarrow/geoarrow-data/v0.2.0/natural-earth/files/natural-earth_cities.fgb\"\n",
+ "gdf = gpd.read_file(path)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "372c937f-da36-4e4b-98da-347890318a80",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df = sd.create_data_frame(gdf)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "d99f4474-da3a-4834-9675-184a667b2a90",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "┌──────────────┬──────────────────────────────┐\n",
+ "│ name ┆ geometry │\n",
+ "│ utf8 ┆ geometry │\n",
+ "╞══════════════╪══════════════════════════════╡\n",
+ "│ Vatican City ┆ POINT(12.4533865 41.9032822) │\n",
+ "├╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
+ "│ San Marino ┆ POINT(12.4417702 43.9360958) │\n",
+ "├╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
+ "│ Vaduz ┆ POINT(9.5166695 47.1337238) │\n",
+ "└──────────────┴──────────────────────────────┘\n"
+ ]
+ }
+ ],
+ "source": [
+ "df.show(3)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.12.4"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/docs/some_data.json b/docs/some_data.json
new file mode 100644
index 0000000..77804aa
--- /dev/null
+++ b/docs/some_data.json
@@ -0,0 +1,33 @@
+{ "type": "FeatureCollection",
+ "features": [
+ { "type": "Feature",
+ "geometry": {"type": "Point", "coordinates": [102.0, 0.5]},
+ "properties": {"prop0": "value0"}
+ },
+ { "type": "Feature",
+ "geometry": {
+ "type": "LineString",
+ "coordinates": [
+ [102.0, 0.0], [103.0, 1.0], [104.0, 0.0], [105.0, 1.0]
+ ]
+ },
+ "properties": {
+ "prop0": "value1",
+ "prop1": 0.0
+ }
+ },
+ { "type": "Feature",
+ "geometry": {
+ "type": "Polygon",
+ "coordinates": [
+ [ [100.0, 0.0], [101.0, 0.0], [101.0, 1.0],
+ [100.0, 1.0], [100.0, 0.0] ]
+ ]
+ },
+ "properties": {
+ "prop0": "value2",
+ "prop1": {"this": "that"}
+ }
+ }
+ ]
+}