MrPowers commented on code in PR #81:
URL: https://github.com/apache/sedona-db/pull/81#discussion_r2359444959
##########
docs/crs-examples.ipynb:
##########
@@ -0,0 +1,446 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "91910e50-a5ae-4d5a-a431-62ac5fbc11ca",
+ "metadata": {},
+ "source": [
+ "# CRS Examples\n",
+ "\n",
+ "This example demonstrates how one table with a EPSG 4326 CRS cannot be
joined with another table that uses EPSG 3857."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "be8ffe47-dc89-4842-bb1e-1e8640afffc3",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import sedonadb\n",
+ "\n",
+ "sd = sedonadb.connect()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "54b48173-6be0-4827-ac42-1439eb31e9f7",
+ "metadata": {},
+ "source": [
+ "Read a table with a geometry column that uses EPSG 4326.\n",
+ "\n",
+ "Note how SedonaDB reads the CRS specified in the Parquet file."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "143f00d5-6878-4dab-a82c-c9fb4dbfaf00",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "countries = sd.read_parquet(\n",
+ "
\"https://raw.githubusercontent.com/geoarrow/geoarrow-data/v0.2.0/natural-earth/files/natural-earth_countries_geo.parquet\"\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "8ef94b7b-b65d-4da5-9443-3253e84e2e7f",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "SedonaSchema with 3 fields:\n",
+ " name: Utf8View\n",
+ " continent: Utf8View\n",
+ " geometry: wkb_view <epsg:4326>"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "countries.schema"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "12d94c4f-5e7f-47c6-b5cc-3a3363bbc290",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "cities = sd.sql(\"\"\"\n",
+ "SELECT city, ST_SetSRID(ST_GeomFromText(wkt), 3857) AS geometry FROM
(VALUES\n",
+ " ('New York', 'POINT(-8238310.24 4969803.34)'),\n",
+ " ('Los Angeles', 'POINT(-13153204.78 4037636.04)'),\n",
+ " ('Chicago', 'POINT(-9757148.04 5138517.44)'))\n",
+ "AS t(city, wkt)\"\"\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "36e53438-c2d2-444e-9f34-d391f0f3f588",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "SedonaSchema with 2 fields:\n",
+ " city: Utf8\n",
+ " geometry: wkb <epsg:3857>"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "cities.schema"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "62c87571-50aa-4f57-a7dd-4afa3210320a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "cities.to_view(\"cities\", overwrite=True)\n",
+ "countries.to_view(\"countries\", overwrite=True)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "561b3c8c-4952-4fa7-9fe1-3fa0522b0d9f",
+ "metadata": {},
+ "source": [
+ "## Join with mismatched CRSs\n",
+ "\n",
+ "The cities and countries tables have different CRSs.\n",
+ "\n",
+ "The cities table uses EPSG:3857 and the countries table uses
EPSG:4326.\n",
+ "\n",
+ "Let's confirm that the code errors out if we try to join the mismatched
tables."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "906bad37-4f3f-4028-82b4-487fabe5957f",
+ "metadata": {},
+ "outputs": [
+ {
+ "ename": "SedonaError",
+ "evalue": "type_coercion\ncaused by\nError during planning: Mismatched
CRS arguments: epsg:3857 vs epsg:4326\nUse ST_Transform() or ST_SetSRID() to
ensure arguments are compatible.",
+ "output_type": "error",
+ "traceback": [
+
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[0;31mSedonaError\u001b[0m
Traceback (most recent call last)",
+ "Cell \u001b[0;32mIn[7], line 6\u001b[0m\n\u001b[1;32m 1\u001b[0m
\u001b[38;5;66;03m# join doesn't work when CRSs don't
match\u001b[39;00m\n\u001b[1;32m 2\u001b[0m
\u001b[43msd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msql\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\"\"\u001b[39;49m\n\u001b[1;32m
3\u001b[0m \u001b[38;5;124;43mselect * from
cities\u001b[39;49m\n\u001b[1;32m 4\u001b[0m \u001b[38;5;124;43mjoin
countries\u001b[39;49m\n\u001b[1;32m 5\u001b[0m \u001b[38;5;124;43mwhere
ST_Intersects(cities.geometry,
countries.geometry)\u001b[39;49m\n\u001b[0;32m----> 6\u001b[0m
\u001b[38;5;124;43m\"\"\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mshow\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
+ "File
\u001b[0;32m/opt/miniconda3/lib/python3.12/site-packages/sedonadb/dataframe.py:297\u001b[0m,
in \u001b[0;36mDataFrame.show\u001b[0;34m(self, limit, width,
ascii)\u001b[0m\n\u001b[1;32m 272\u001b[0m
\u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Print the first limit rows
to the console\u001b[39;00m\n\u001b[1;32m 273\u001b[0m \n\u001b[1;32m
274\u001b[0m \u001b[38;5;124;03mArgs:\u001b[39;00m\n\u001b[0;32m
(...)\u001b[0m\n\u001b[1;32m 294\u001b[0m \n\u001b[1;32m 295\u001b[0m
\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 296\u001b[0m width
\u001b[38;5;241m=\u001b[39m _out_width(width)\n\u001b[0;32m--> 297\u001b[0m
\u001b[38;5;28mprint\u001b[39m(\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_impl\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mshow\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_ctx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m
\u001b[49m
\u001b[43mlimit\u001b[49m\u001b[43m,\u001b[49m\u001b[43m
\u001b[49m\u001b[43mwidth\u001b[49m\u001b[43m,\u001b[49m\u001b[43m
\u001b[49m\u001b[43mascii\u001b[49m\u001b[43m)\u001b[49m,
end\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
+ "\u001b[0;31mSedonaError\u001b[0m: type_coercion\ncaused by\nError
during planning: Mismatched CRS arguments: epsg:3857 vs epsg:4326\nUse
ST_Transform() or ST_SetSRID() to ensure arguments are compatible."
+ ]
+ }
+ ],
+ "source": [
+ "# join doesn't work when CRSs don't match\n",
+ "sd.sql(\"\"\"\n",
+ "select * from cities\n",
+ "join countries\n",
+ "where ST_Intersects(cities.geometry, countries.geometry)\n",
+ "\"\"\").show()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "41e6f59f-5217-40b2-b05a-9c95eae29df8",
+ "metadata": {},
+ "source": [
+ "## Convert CRS and then join\n",
+ "\n",
+ "Let's convert the cities table to use EPSG:4326 and then perform the join
with the two tables once they have matching CRSs."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "id": "122857c1-f68d-4037-9787-54c20706e60f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# update cities to use 4326\n",
+ "cities = sd.sql(\"\"\"\n",
+ "SELECT city, ST_Transform(geometry, 'EPSG:4326') as geometry\n",
+ "FROM cities\n",
+ "\"\"\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "id": "947e085c-62a4-4315-b155-007a95156964",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "SedonaSchema with 2 fields:\n",
+ " city: Utf8\n",
+ " geometry: wkb <ogc:crs84>"
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "cities.schema"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "id": "3bcbaf7a-ec40-4b7e-85c2-db5ef1e3232e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "cities.to_view(\"cities\", overwrite=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "id": "819c8d04-fa03-4ef8-aecb-e79d48f0b820",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+
"┌─────────────┬──────────────────────┬──────────────────────┬───────────────┬──────────────────────┐\n",
+ "│ city ┆ geometry ┆ name ┆
continent ┆ geometry │\n",
+ "│ utf8 ┆ geometry ┆ utf8view ┆
utf8view ┆ geometry │\n",
+
"╞═════════════╪══════════════════════╪══════════════════════╪═══════════════╪══════════════════════╡\n",
+ "│ New York ┆ POINT(-74.006000039… ┆ United States of Am… ┆ North
America ┆ MULTIPOLYGON(((-122… │\n",
+
"├╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
+ "│ Los Angeles ┆ POINT(-118.15724889… ┆ United States of Am… ┆ North
America ┆ MULTIPOLYGON(((-122… │\n",
+
"├╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
+ "│ Chicago ┆ POINT(-87.649952137… ┆ United States of Am… ┆ North
America ┆ MULTIPOLYGON(((-122… │\n",
+
"└─────────────┴──────────────────────┴──────────────────────┴───────────────┴──────────────────────┘\n"
+ ]
+ }
+ ],
+ "source": [
+ "# join works when CRSs match\n",
+ "sd.sql(\"\"\"\n",
+ "select * from cities\n",
+ "join countries\n",
+ "where ST_Intersects(cities.geometry, countries.geometry)\n",
+ "\"\"\").show()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "5279bebd-1d8d-4f33-bcd9-2c1e93ff7221",
+ "metadata": {},
+ "source": [
+ "## Example #2: Joining two tables with different CRSs"
Review Comment:
Not relevant anymore, but smart observation
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]