This is an automated email from the ASF dual-hosted git repository.
jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sedona.git
The following commit(s) were added to refs/heads/master by this push:
new f39dc5850e [CI] pre-commit autoupdate; run pre-commit/black (#2619)
f39dc5850e is described below
commit f39dc5850ee66bbf1cf987b4f814d5acec626b4f
Author: John Bampton <[email protected]>
AuthorDate: Sun Feb 8 16:00:14 2026 +1000
[CI] pre-commit autoupdate; run pre-commit/black (#2619)
---
.pre-commit-config.yaml | 24 +++++-----
docs/blog/posts/h3.md | 54 ++++++++--------------
docs/blog/posts/intro-sedonadb-0-2.md | 12 ++---
docs/blog/posts/intro-sedonadb.md | 18 +++-----
docs/sedonaspark.md | 6 +--
docs/setup/fabric.md | 1 -
docs/tutorial/concepts/spatial-joins.md | 6 +--
docs/tutorial/files/geoparquet-sedona-spark.md | 6 +--
docs/usecases/ApacheSedonaRaster.ipynb | 10 ++--
docs/usecases/ApacheSedonaSQL.ipynb | 12 ++---
.../contrib/PostgresqlConnectionApacheSedona.ipynb | 1 -
python/sedona/spark/geopandas/base.py | 1 +
python/sedona/spark/geopandas/geoseries.py | 1 -
python/sedona/spark/raster/sedona_raster.py | 1 -
python/sedona/spark/sql/functions.py | 1 -
python/sedona/spark/stats/clustering/dbscan.py | 1 +
.../outlier_detection/local_outlier_factor.py | 1 +
python/tests/flink/conftest.py | 1 -
python/tests/geopandas/test_geodataframe.py | 4 +-
python/tests/serialization/test_deserializers.py | 6 +--
python/tests/spatial_rdd/test_rectangle_rdd.py | 1 -
python/tests/sql/test_aggregate_functions.py | 42 ++++++-----------
python/tests/sql/test_function.py | 18 +++-----
23 files changed, 84 insertions(+), 144 deletions(-)
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 0eaa04463f..aa2d66af96 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -20,14 +20,14 @@
default_stages: [pre-commit, pre-push]
default_language_version:
python: python3
- node: 24.12.0
+ node: 24.13.0
minimum_pre_commit_version: '3.2.0'
repos:
- repo: meta
hooks:
- id: identity
name: run identity check
- description: check you have set your git identity
+ description: a simple hook which prints all arguments passed to it,
useful for debugging
- id: check-hooks-apply
name: run check hooks apply
description: check that all the hooks apply to the repository
@@ -46,7 +46,7 @@ repos:
entry: prettier --write '**/*.js' '**/*.yaml' '**/*.yml'
files: \.(js|ya?ml)$
language: node
- additional_dependencies: ['[email protected]']
+ additional_dependencies: ['[email protected]']
pass_filenames: false
- id: maven-spotless-apply
name: maven spotless apply
@@ -79,7 +79,7 @@ repos:
types: [file] # Ensure only regular files are passed, not directories
stages: [manual]
- repo: https://github.com/Lucas-C/pre-commit-hooks
- rev: v1.5.5
+ rev: v1.5.6
hooks:
- id: chmod
name: set file permissions
@@ -89,6 +89,7 @@ repos:
stages: [manual]
- id: insert-license
name: add license for all Batch files
+ description: automatically adds a licence header to all Batch files
that don't have a license header
files: \.bat$
args:
- --comment-style
@@ -195,6 +196,7 @@ repos:
- --fuzzy-match-generates-todo
- id: insert-license
name: add license for all XML files
+ description: automatically adds a licence header to all XML files that
don't have a license header
files: \.xml$
exclude: |
(?x)(
@@ -267,13 +269,13 @@ repos:
description: a tool (and pre-commit hook) to automatically upgrade
syntax for newer versions of the language
args: [--py37-plus]
- repo: https://github.com/psf/black-pre-commit-mirror
- rev: 25.12.0
+ rev: 26.1.0
hooks:
- id: black-jupyter
name: run black-jupyter
description: format Python files and Jupyter Notebooks with black
- repo: https://github.com/pre-commit/mirrors-clang-format
- rev: v21.1.7
+ rev: v21.1.8
hooks:
- id: clang-format
name: run clang-format
@@ -281,7 +283,7 @@ repos:
args: [--style=file:.github/linters/.clang-format]
types_or: [c]
- repo: https://github.com/PyCQA/bandit
- rev: 1.9.2
+ rev: 1.9.3
hooks:
- id: bandit
name: run bandit
@@ -312,7 +314,7 @@ repos:
name: run gitleaks
description: check for secrets with gitleaks
- repo: https://github.com/zizmorcore/zizmor-pre-commit
- rev: v1.20.0
+ rev: v1.22.0
hooks:
- id: zizmor
name: run zizmor
@@ -464,7 +466,7 @@ repos:
name: run shellcheck
description: check Shell scripts with shellcheck
- repo: https://github.com/adrienverge/yamllint
- rev: v1.37.1
+ rev: v1.38.0
hooks:
- id: yamllint
name: run yamllint
@@ -473,7 +475,7 @@ repos:
types: [yaml]
files: \.ya?ml$
- repo: https://github.com/oxipng/oxipng
- rev: v10.0.0
+ rev: v10.1.0
hooks:
- id: oxipng
name: run oxipng
@@ -486,4 +488,4 @@ repos:
- id: blacken-docs
name: run blacken-docs
description: run `black` on python code blocks in documentation files
- additional_dependencies: [black==25.12.0]
+ additional_dependencies: [black==26.1.0]
diff --git a/docs/blog/posts/h3.md b/docs/blog/posts/h3.md
index 0bb289280d..de63e305ee 100644
--- a/docs/blog/posts/h3.md
+++ b/docs/blog/posts/h3.md
@@ -170,13 +170,11 @@ As many of the geometries in this data set are
particularly complex and have a h
```py
# Pull the single area to show on the map
-area = sedona.sql(
- """
+area = sedona.sql("""
select *
from fema
where FLD_AR_ID = '48201C_9129'
-"""
-)
+""")
```
{ align=center width="80%" }
@@ -213,16 +211,14 @@ when transforming a polygon to H3 cells. There are two
methods for doing this:
Here we can test that out for level 8 and the cover method:
```py
-h3_cells_8 = sedona.sql(
- """with a as (
+h3_cells_8 = sedona.sql("""with a as (
select
explode(ST_H3CellIDs(geometry, 8, true)) as hex_id
from fema
where FLD_AR_ID = '48201C_9129')
select h3_int_to_str(hex_id) as hex_id from a
-"""
-)
+""")
```
{ align=center width="80%" }
@@ -232,25 +228,21 @@ As you can see there is a high amount of extra coverage
at this level. Let's go
```py
# Test the same overlap but with H3 Size 12
-h3_cells_12_cover = sedona.sql(
- """with a as (
+h3_cells_12_cover = sedona.sql("""with a as (
select
explode(ST_H3CellIDs(geometry, 12, true)) as hex_id
from fema
where FLD_AR_ID = '48201C_9129')
select h3_int_to_str(hex_id) as hex_id from a
-"""
-)
+""")
-h3_cells_12_fill = sedona.sql(
- """with a as (
+h3_cells_12_fill = sedona.sql("""with a as (
select
explode(ST_H3CellIDs(geometry, 12, false)) as hex_id
from fema
where FLD_AR_ID = '48201C_9129')
select h3_int_to_str(hex_id) as hex_id from a
-"""
-)
+""")
```

@@ -261,8 +253,7 @@ We can also calculate the coverage overlaps. First for the
level 8 layer:
```py
# Find the excess coverage area for H3 coverage level 8
-h3_cells_missing_8 = sedona.sql(
- """with a as (
+h3_cells_missing_8 = sedona.sql("""with a as (
select
explode(ST_H3ToGeom(ST_H3CellIDs(geometry, 8, true))) as h3_geom
from fema
@@ -274,8 +265,7 @@ from a
join fema
on st_intersects(h3_geom, geometry)
where FLD_AR_ID = '48201C_9129'
-"""
-)
+""")
h3_cells_missing_8.show()
```
@@ -295,8 +285,7 @@ We can do the same for the level 12 fill layer which is a
bit better:
```py
# Find the percent of missing area with H3 coverage level 12
-h3_cells_missing = sedona.sql(
- """with a as (
+h3_cells_missing = sedona.sql("""with a as (
select
explode(ST_H3ToGeom(ST_H3CellIDs(geometry, 12, false))) as h3_geom
from fema
@@ -308,8 +297,7 @@ from a
join fema
on st_intersects(h3_geom, geometry)
where FLD_AR_ID = '48201C_9129'
-"""
-)
+""")
h3_cells_missing.show()
```
@@ -333,15 +321,13 @@ First let's look at a baseline joining the original
geometry to the buildings:
```py
# Compare a spatial join at H3 level 8 with Overture Map Buildings
-true_spatial_join = sedona.sql(
- """
+true_spatial_join = sedona.sql("""
select count(overture.id) as buildings
from overture.buildings_building overture
join fema
on st_intersects(fema.geometry, overture.geometry)
where fema.FLD_AR_ID = '48201C_9129'
-"""
-)
+""")
true_spatial_join.show()
```
@@ -361,8 +347,7 @@ Let's test that with the level 8 H3 cells. Note that you
must first transform th
```py
# Compare a spatial join at H3 level 8 with Overture Map Buildings
-h3_cells_join_level_8 = sedona.sql(
- """with a as (
+h3_cells_join_level_8 = sedona.sql("""with a as (
select
explode(
ST_H3ToGeom(
@@ -379,8 +364,7 @@ select count(overture.id) as buildings
from overture.buildings_building overture
join b
on st_intersects(b.h3_geom, overture.geometry)
-"""
-)
+""")
h3_cells_join_level_8.show()
```
@@ -396,8 +380,7 @@ h3_cells_join_level_8.show()
Far more than we would want in a real world analysis. Now let's try with our
level 12 H3 cells using the fill methodology which has the least overlap.
```py
-h3_cells_join_level_12 = sedona.sql(
- """with a as (
+h3_cells_join_level_12 = sedona.sql("""with a as (
select
explode(
ST_H3ToGeom(
@@ -413,8 +396,7 @@ select count(overture.id) as buildings
from overture.buildings_building overture
join b
on st_intersects(b.h3_geom, overture.geometry)
-"""
-)
+""")
h3_cells_join_level_12.show()
```
diff --git a/docs/blog/posts/intro-sedonadb-0-2.md
b/docs/blog/posts/intro-sedonadb-0-2.md
index 8006bc5092..d14c59e0c2 100644
--- a/docs/blog/posts/intro-sedonadb-0-2.md
+++ b/docs/blog/posts/intro-sedonadb-0-2.md
@@ -82,12 +82,10 @@ url = "https://flatgeobuf.septima.dk/population_areas.fgb"
sd.read_pyogrio(url).to_view("population_areas")
wkt = "POLYGON ((-73.978329 40.767412, -73.950005 40.767412, -73.950005
40.795098, -73.978329 40.795098, -73.978329 40.767412))"
-sd.sql(
- f"""
+sd.sql(f"""
SELECT sum(population::INTEGER) FROM population_areas
WHERE ST_Intersects(wkb_geometry, ST_SetSRID(ST_GeomFromWKT('{wkt}'), 4326))
-"""
-).show()
+""").show()
# > ┌──────────────────────────────────┐
# > │ sum(population_areas.population) │
# > │ int64 │
@@ -117,12 +115,10 @@ sd.sql("SET
datafusion.execution.parquet.max_row_group_size = 100000")
sd.read_parquet(url).to_view("water_point")
-sd.sql(
- """
+sd.sql("""
SELECT * FROM water_point
ORDER BY sd_order(geometry)
-"""
-).to_parquet("water_point.parquet", geoparquet_version="1.1")
+""").to_parquet("water_point.parquet", geoparquet_version="1.1")
```
## Python User-Defined Function Support
diff --git a/docs/blog/posts/intro-sedonadb.md
b/docs/blog/posts/intro-sedonadb.md
index a03208f443..e86fa41018 100644
--- a/docs/blog/posts/intro-sedonadb.md
+++ b/docs/blog/posts/intro-sedonadb.md
@@ -111,8 +111,7 @@ And here are a few rows from the countries table:
Here’s how to perform a spatial join to compute the country of each city:
```python
-sd.sql(
- """
+sd.sql("""
select
cities.name as city_name,
countries.name as country_name,
@@ -120,8 +119,7 @@ select
from cities
join countries
where ST_Intersects(cities.geometry, countries.geometry)
-"""
-).show(3)
+""").show(3)
```
The code utilizes `ST_Intersects` to determine if a city is contained within a
given country.
@@ -261,13 +259,11 @@ Let’s expose these two tables as views and run a spatial
join to see how many
buildings.to_view("buildings", overwrite=True)
vermont.to_view("vermont", overwrite=True)
-sd.sql(
- """
+sd.sql("""
select count(*) from buildings
join vermont
where ST_Intersects(buildings.geometry, vermont.geometry)
-"""
-).show()
+""").show()
```
This command correctly errors out because the tables have different CRSs. For
safety, SedonaDB errors out rather than give you the wrong answer! Here's the
error message that's easy to debug:
@@ -282,13 +278,11 @@ Use ST_Transform() or ST_SetSRID() to ensure arguments
are compatible.
Let’s rewrite the spatial join to convert the `vermont` CRS to EPSG:4326, so
it’s compatible with the `buildings` CRS.
```python
-sd.sql(
- """
+sd.sql("""
select count(*) from buildings
join vermont
where ST_Intersects(buildings.geometry, ST_Transform(vermont.geometry,
'EPSG:4326'))
-"""
-).show()
+""").show()
```
We now get the correct result!
diff --git a/docs/sedonaspark.md b/docs/sedonaspark.md
index 3d67573c5e..0b681ba1f8 100644
--- a/docs/sedonaspark.md
+++ b/docs/sedonaspark.md
@@ -33,14 +33,12 @@ SedonaSpark extends Apache Spark with a rich set of
out-of-the-box distributed S
=== "PySpark"
```python
- sedona.sql(
- """
+ sedona.sql("""
SELECT superhero.name
FROM city, superhero
WHERE ST_Contains(city.geom, superhero.geom)
AND city.name = 'Gotham'
- """
- )
+ """)
```
=== "Java"
diff --git a/docs/setup/fabric.md b/docs/setup/fabric.md
index a76ca388c6..3592b61589 100644
--- a/docs/setup/fabric.md
+++ b/docs/setup/fabric.md
@@ -94,7 +94,6 @@ You can verify the installation by running the following code
in the notebook.
```python
from sedona.spark import *
-
sedona = SedonaContext.create(spark)
diff --git a/docs/tutorial/concepts/spatial-joins.md
b/docs/tutorial/concepts/spatial-joins.md
index 68caec5ce3..7ec0a010c2 100644
--- a/docs/tutorial/concepts/spatial-joins.md
+++ b/docs/tutorial/concepts/spatial-joins.md
@@ -164,15 +164,13 @@ Here is the content of the lines table:
Here’s a join that matches any touching values:
```python
-sedona.sql(
- """
+sedona.sql("""
SELECT
lines.id as line_id,
polygons.id as polygon_id
FROM lines
LEFT JOIN polygons ON ST_Touches(lines.geometry, polygons.geometry);
-"""
-).show()
+""").show()
```
Here’s the result of the join:
diff --git a/docs/tutorial/files/geoparquet-sedona-spark.md
b/docs/tutorial/files/geoparquet-sedona-spark.md
index d5412cdff8..bbe8273b3e 100644
--- a/docs/tutorial/files/geoparquet-sedona-spark.md
+++ b/docs/tutorial/files/geoparquet-sedona-spark.md
@@ -263,13 +263,11 @@ Here is the query:
```python
my_shape = "POLYGON((4.0 3.5, 4.0 6.0, 8.0 6.0, 8.0 4.5, 4.0 3.5))"
-res = sedona.sql(
- f"""
+res = sedona.sql(f"""
select *
from points
where st_intersects(geometry, ST_GeomFromWKT('{my_shape}'))
-"""
-)
+""")
res.show(truncate=False)
```
diff --git a/docs/usecases/ApacheSedonaRaster.ipynb
b/docs/usecases/ApacheSedonaRaster.ipynb
index eff186cf09..985d645bb1 100644
--- a/docs/usecases/ApacheSedonaRaster.ipynb
+++ b/docs/usecases/ApacheSedonaRaster.ipynb
@@ -311,22 +311,22 @@
"metadata": {},
"outputs": [],
"source": [
- "(width, height) = sedona.sql(\n",
+ "width, height = sedona.sql(\n",
" \"SELECT RS_Width(raster) as width, RS_Height(raster) as height from
raster_table\"\n",
").first()\n",
- "(p1X, p1Y) = sedona.sql(\n",
+ "p1X, p1Y = sedona.sql(\n",
" f\"SELECT RS_RasterToWorldCoordX(raster, {width / 2}, {height / 2})
\\\n",
" as pX, RS_RasterToWorldCoordY(raster, {width / 2},
{height / 2}) as pY from raster_table\"\n",
").first()\n",
- "(p2X, p2Y) = sedona.sql(\n",
+ "p2X, p2Y = sedona.sql(\n",
" f\"SELECT RS_RasterToWorldCoordX(raster, {(width / 2) + 2}, {height /
2}) \\\n",
" as pX, RS_RasterToWorldCoordY(raster, {(width / 2) +
2}, {height / 2}) as pY from raster_table\"\n",
").first()\n",
- "(p3X, p3Y) = sedona.sql(\n",
+ "p3X, p3Y = sedona.sql(\n",
" f\"SELECT RS_RasterToWorldCoordX(raster, {width / 2}, {(height / 2) +
2}) \\\n",
" as pX, RS_RasterToWorldCoordY(raster, {width / 2},
{(height / 2) + 2}) as pY from raster_table\"\n",
").first()\n",
- "(p4X, p4Y) = sedona.sql(\n",
+ "p4X, p4Y = sedona.sql(\n",
" f\"SELECT RS_RasterToWorldCoordX(raster, {(width / 2) + 2}, {(height
/ 2) + 2}) \\\n",
" as pX, RS_RasterToWorldCoordY(raster, {(width / 2) +
2}, {(height / 2) + 2}) as pY from raster_table\"\n",
").first()"
diff --git a/docs/usecases/ApacheSedonaSQL.ipynb
b/docs/usecases/ApacheSedonaSQL.ipynb
index 108b39ca2b..2b56e65cba 100644
--- a/docs/usecases/ApacheSedonaSQL.ipynb
+++ b/docs/usecases/ApacheSedonaSQL.ipynb
@@ -550,16 +550,14 @@
"metadata": {},
"outputs": [],
"source": [
- "transformed_df = sedona.sql(\n",
- " \"\"\"\n",
+ "transformed_df = sedona.sql(\"\"\"\n",
" SELECT osm_id,\n",
" code,\n",
" fclass,\n",
" name,\n",
" ST_Transform(geometry, 'epsg:4326', 'epsg:2180') as geom
\n",
" FROM points\n",
- " \"\"\"\n",
- ")"
+ " \"\"\")"
]
},
{
@@ -618,15 +616,13 @@
"metadata": {},
"outputs": [],
"source": [
- "neighbours_within_1000m = sedona.sql(\n",
- " \"\"\"\n",
+ "neighbours_within_1000m = sedona.sql(\"\"\"\n",
" SELECT a.osm_id AS id_1,\n",
" b.osm_id AS id_2,\n",
" a.geom \n",
" FROM points_2180 AS a, points_2180 AS b \n",
" WHERE ST_Distance(a.geom,b.geom) < 50\n",
- " \"\"\"\n",
- ")"
+ " \"\"\")"
]
},
{
diff --git a/docs/usecases/contrib/PostgresqlConnectionApacheSedona.ipynb
b/docs/usecases/contrib/PostgresqlConnectionApacheSedona.ipynb
index 55bdfd52dc..3b97cd68a5 100644
--- a/docs/usecases/contrib/PostgresqlConnectionApacheSedona.ipynb
+++ b/docs/usecases/contrib/PostgresqlConnectionApacheSedona.ipynb
@@ -200,7 +200,6 @@
"source": [
"import psycopg2\n",
"\n",
- "\n",
"connection = psycopg2.connect(\n",
" user=properties[\"user\"],\n",
" password=properties[\"password\"],\n",
diff --git a/python/sedona/spark/geopandas/base.py
b/python/sedona/spark/geopandas/base.py
index 1bd3ffabba..83644d0035 100644
--- a/python/sedona/spark/geopandas/base.py
+++ b/python/sedona/spark/geopandas/base.py
@@ -18,6 +18,7 @@
"""
A base class of Sedona/Spark DataFrame/Column to behave like geopandas
GeoDataFrame/GeoSeries.
"""
+
from abc import ABCMeta, abstractmethod
from typing import (
Any,
diff --git a/python/sedona/spark/geopandas/geoseries.py
b/python/sedona/spark/geopandas/geoseries.py
index b4e300f4f7..af9d0f378a 100644
--- a/python/sedona/spark/geopandas/geoseries.py
+++ b/python/sedona/spark/geopandas/geoseries.py
@@ -55,7 +55,6 @@ from pyspark.pandas.internal import (
SPARK_DEFAULT_SERIES_NAME, # '0'
)
-
# ============================================================================
# IMPLEMENTATION STATUS TRACKING
# ============================================================================
diff --git a/python/sedona/spark/raster/sedona_raster.py
b/python/sedona/spark/raster/sedona_raster.py
index 84836e8a1c..55e8cf2bb4 100644
--- a/python/sedona/spark/raster/sedona_raster.py
+++ b/python/sedona/spark/raster/sedona_raster.py
@@ -37,7 +37,6 @@ except:
from .awt_raster import AWTRaster
from .meta import AffineTransform, SampleDimension
-
GDAL_VERSION = rasterio.env.GDALVersion.runtime()
diff --git a/python/sedona/spark/sql/functions.py
b/python/sedona/spark/sql/functions.py
index 2420301d52..85bc9fef91 100644
--- a/python/sedona/spark/sql/functions.py
+++ b/python/sedona/spark/sql/functions.py
@@ -27,7 +27,6 @@ from pyspark.sql.udf import UserDefinedFunction
from pyspark.sql.types import DataType
from shapely.geometry.base import BaseGeometry
-
SEDONA_SCALAR_EVAL_TYPE = 5200
SEDONA_PANDAS_ARROW_NAME = "SedonaPandasArrowUDF"
diff --git a/python/sedona/spark/stats/clustering/dbscan.py
b/python/sedona/spark/stats/clustering/dbscan.py
index d1bca47c65..640f5328e7 100644
--- a/python/sedona/spark/stats/clustering/dbscan.py
+++ b/python/sedona/spark/stats/clustering/dbscan.py
@@ -20,6 +20,7 @@
It identifies groups of data where enough records are close enough to each
other. This implementation leverages spark,
sedona and graphframes to support large scale datasets and various,
heterogeneous geometric feature types.
"""
+
from typing import Optional
from pyspark.sql import DataFrame, SparkSession
diff --git
a/python/sedona/spark/stats/outlier_detection/local_outlier_factor.py
b/python/sedona/spark/stats/outlier_detection/local_outlier_factor.py
index cbc85ac3f5..c6309154fb 100644
--- a/python/sedona/spark/stats/outlier_detection/local_outlier_factor.py
+++ b/python/sedona/spark/stats/outlier_detection/local_outlier_factor.py
@@ -16,6 +16,7 @@
# under the License.
"""Functions related to calculating the local outlier factor of a dataset."""
+
from typing import Optional
from pyspark.sql import DataFrame, SparkSession
diff --git a/python/tests/flink/conftest.py b/python/tests/flink/conftest.py
index 44be77b8f2..7205f2b0cc 100644
--- a/python/tests/flink/conftest.py
+++ b/python/tests/flink/conftest.py
@@ -19,7 +19,6 @@ import os
import pytest
-
EXTRA_JARS = os.getenv("SEDONA_PYFLINK_EXTRA_JARS")
diff --git a/python/tests/geopandas/test_geodataframe.py
b/python/tests/geopandas/test_geodataframe.py
index d3963f0d66..e5d4fae9c5 100644
--- a/python/tests/geopandas/test_geodataframe.py
+++ b/python/tests/geopandas/test_geodataframe.py
@@ -540,11 +540,13 @@ class TestGeoDataFrame(TestGeopandasBase):
assert obj["crs"]["type"] == "name"
assert obj["crs"]["properties"]["name"] == "urn:ogc:def:crs:EPSG::3857"
- expected = '{"type": "FeatureCollection", "features": [{"id": "0",
"type": "Feature", \
+ expected = (
+ '{"type": "FeatureCollection", "features": [{"id": "0", "type":
"Feature", \
"properties": {"col1": "name1"}, "geometry": {"type": "Point", "coordinates":
[1.0,\
2.0]}}, {"id": "1", "type": "Feature", "properties": {"col1": "name2"},
"geometry"\
: {"type": "Point", "coordinates": [2.0, 1.0]}}], "crs": {"type": "name",
"properti\
es": {"name": "urn:ogc:def:crs:EPSG::3857"}}}'
+ )
assert result == expected, f"Expected {expected}, but got {result}"
def test_to_arrow(self):
diff --git a/python/tests/serialization/test_deserializers.py
b/python/tests/serialization/test_deserializers.py
index 8e4f8f7d69..a29ae3e45c 100644
--- a/python/tests/serialization/test_deserializers.py
+++ b/python/tests/serialization/test_deserializers.py
@@ -115,12 +115,10 @@ class TestGeometryConvert(TestBase):
)
def test_geometry_collection_deserialization(self):
- geom = self.spark.sql(
- """SELECT st_geomFromWKT('GEOMETRYCOLLECTION (
+ geom = self.spark.sql("""SELECT st_geomFromWKT('GEOMETRYCOLLECTION (
MULTILINESTRING((1 2, 3 4), (5 6, 7 8)),
MULTILINESTRING((1 2, 3 4), (5 6, 7 8), (9 10, 11 12)),
- POINT(10 20))') as geom"""
- ).collect()[0][0]
+ POINT(10 20))') as geom""").collect()[0][0]
assert type(geom) == GeometryCollection
assert (
diff --git a/python/tests/spatial_rdd/test_rectangle_rdd.py
b/python/tests/spatial_rdd/test_rectangle_rdd.py
index 103a8b4458..8acb2d1d88 100644
--- a/python/tests/spatial_rdd/test_rectangle_rdd.py
+++ b/python/tests/spatial_rdd/test_rectangle_rdd.py
@@ -24,7 +24,6 @@ from sedona.spark.core.enums import FileDataSplitter,
IndexType
from sedona.spark.core.geom.envelope import Envelope
from sedona.spark.core.SpatialRDD import RectangleRDD
-
inputLocation = os.path.join(tests_resource, "zcta510-small.csv")
queryWindowSet = os.path.join(tests_resource, "zcta510-small.csv")
offset = 0
diff --git a/python/tests/sql/test_aggregate_functions.py
b/python/tests/sql/test_aggregate_functions.py
index 5ea7946e29..0b3f397ce2 100644
--- a/python/tests/sql/test_aggregate_functions.py
+++ b/python/tests/sql/test_aggregate_functions.py
@@ -73,15 +73,13 @@ class TestConstructors(TestBase):
assert union.take(1)[0][0].area == 10100
def test_st_collect_aggr_points(self):
- self.spark.sql(
- """
+ self.spark.sql("""
SELECT explode(array(
ST_GeomFromWKT('POINT(1 2)'),
ST_GeomFromWKT('POINT(3 4)'),
ST_GeomFromWKT('POINT(5 6)')
)) AS geom
- """
- ).createOrReplaceTempView("points_table")
+ """).createOrReplaceTempView("points_table")
result = self.spark.sql("SELECT ST_Collect_Agg(geom) FROM
points_table").take(
1
@@ -91,14 +89,12 @@ class TestConstructors(TestBase):
assert len(result.geoms) == 3
def test_st_collect_aggr_polygons(self):
- self.spark.sql(
- """
+ self.spark.sql("""
SELECT explode(array(
ST_GeomFromWKT('POLYGON((0 0, 1 0, 1 1, 0 1, 0 0))'),
ST_GeomFromWKT('POLYGON((2 2, 3 2, 3 3, 2 3, 2 2))')
)) AS geom
- """
- ).createOrReplaceTempView("polygons_table")
+ """).createOrReplaceTempView("polygons_table")
result = self.spark.sql("SELECT ST_Collect_Agg(geom) FROM
polygons_table").take(
1
@@ -109,15 +105,13 @@ class TestConstructors(TestBase):
assert result.area == 2.0
def test_st_collect_aggr_mixed_types(self):
- self.spark.sql(
- """
+ self.spark.sql("""
SELECT explode(array(
ST_GeomFromWKT('POINT(1 2)'),
ST_GeomFromWKT('LINESTRING(0 0, 1 1)'),
ST_GeomFromWKT('POLYGON((0 0, 1 0, 1 1, 0 1, 0 0))')
)) AS geom
- """
- ).createOrReplaceTempView("mixed_geom_table")
+ """).createOrReplaceTempView("mixed_geom_table")
result = self.spark.sql(
"SELECT ST_Collect_Agg(geom) FROM mixed_geom_table"
@@ -128,14 +122,12 @@ class TestConstructors(TestBase):
def test_st_collect_aggr_preserves_duplicates(self):
# Test that ST_Collect_Agg keeps duplicate geometries (unlike
ST_Union_Aggr)
- self.spark.sql(
- """
+ self.spark.sql("""
SELECT explode(array(
ST_GeomFromWKT('POLYGON((0 0, 1 0, 1 1, 0 1, 0 0))'),
ST_GeomFromWKT('POLYGON((0 0, 1 0, 1 1, 0 1, 0 0))')
)) AS geom
- """
- ).createOrReplaceTempView("duplicate_polygons_table")
+ """).createOrReplaceTempView("duplicate_polygons_table")
result = self.spark.sql(
"SELECT ST_Collect_Agg(geom) FROM duplicate_polygons_table"
@@ -148,16 +140,14 @@ class TestConstructors(TestBase):
# Test aliases for *_Aggr functions with *_Agg suffix
def test_st_envelope_agg_alias(self):
- self.spark.sql(
- """
+ self.spark.sql("""
SELECT explode(array(
ST_GeomFromWKT('POINT(1.1 101.1)'),
ST_GeomFromWKT('POINT(1.1 1100.1)'),
ST_GeomFromWKT('POINT(1000.1 1100.1)'),
ST_GeomFromWKT('POINT(1000.1 101.1)')
)) AS arealandmark
- """
- ).createOrReplaceTempView("pointdf_alias")
+ """).createOrReplaceTempView("pointdf_alias")
boundary = self.spark.sql(
"SELECT ST_Envelope_Agg(pointdf_alias.arealandmark) FROM
pointdf_alias"
@@ -175,14 +165,12 @@ class TestConstructors(TestBase):
assert boundary.take(1)[0][0].equals(polygon)
def test_st_intersection_agg_alias(self):
- self.spark.sql(
- """
+ self.spark.sql("""
SELECT explode(array(
ST_GeomFromWKT('POLYGON((0 0, 4 0, 4 4, 0 4, 0 0))'),
ST_GeomFromWKT('POLYGON((2 2, 6 2, 6 6, 2 6, 2 2))')
)) AS countyshape
- """
- ).createOrReplaceTempView("polygondf_alias")
+ """).createOrReplaceTempView("polygondf_alias")
intersection = self.spark.sql(
"SELECT ST_Intersection_Agg(polygondf_alias.countyshape) FROM
polygondf_alias"
@@ -193,14 +181,12 @@ class TestConstructors(TestBase):
assert result.area == 4.0
def test_st_union_agg_alias(self):
- self.spark.sql(
- """
+ self.spark.sql("""
SELECT explode(array(
ST_GeomFromWKT('POLYGON((0 0, 2 0, 2 2, 0 2, 0 0))'),
ST_GeomFromWKT('POLYGON((1 1, 3 1, 3 3, 1 3, 1 1))')
)) AS countyshape
- """
- ).createOrReplaceTempView("polygondf_union_alias")
+ """).createOrReplaceTempView("polygondf_union_alias")
union = self.spark.sql(
"SELECT ST_Union_Agg(polygondf_union_alias.countyshape) FROM
polygondf_union_alias"
diff --git a/python/tests/sql/test_function.py
b/python/tests/sql/test_function.py
index 14844adb44..3b09b12b6a 100644
--- a/python/tests/sql/test_function.py
+++ b/python/tests/sql/test_function.py
@@ -2239,14 +2239,12 @@ class TestPredicateJoin(TestBase):
assert cell_ids is None
def test_st_s2_to_geom(self):
- df = self.spark.sql(
- """
+ df = self.spark.sql("""
SELECT
ST_Intersects(ST_GeomFromWKT('POLYGON ((0.1 0.1, 0.5 0.1, 1 0.3, 1
1, 0.1 1, 0.1 0.1))'), ST_S2ToGeom(ST_S2CellIDs(ST_GeomFromWKT('POLYGON ((0.1
0.1, 0.5 0.1, 1 0.3, 1 1, 0.1 1, 0.1 0.1))'), 10))[0]),
ST_Intersects(ST_GeomFromWKT('POLYGON ((0.1 0.1, 0.5 0.1, 1 0.3, 1
1, 0.1 1, 0.1 0.1))'), ST_S2ToGeom(ST_S2CellIDs(ST_GeomFromWKT('POLYGON ((0.1
0.1, 0.5 0.1, 1 0.3, 1 1, 0.1 1, 0.1 0.1))'), 10))[1]),
ST_Intersects(ST_GeomFromWKT('POLYGON ((0.1 0.1, 0.5 0.1, 1 0.3, 1
1, 0.1 1, 0.1 0.1))'), ST_S2ToGeom(ST_S2CellIDs(ST_GeomFromWKT('POLYGON ((0.1
0.1, 0.5 0.1, 1 0.3, 1 1, 0.1 1, 0.1 0.1))'), 10))[2])
- """
- )
+ """)
res1, res2, res3 = df.take(1)[0]
assert res1 and res2 and res3
@@ -2273,20 +2271,17 @@ class TestPredicateJoin(TestBase):
assert df.take(1)[0][0] == 78
def test_st_h3_kring(self):
- df = self.spark.sql(
- """
+ df = self.spark.sql("""
SELECT
ST_H3KRing(ST_H3CellIDs(ST_GeomFromWKT('POINT(1 2)'), 8, true)[0],
1, true) exactRings,
ST_H3KRing(ST_H3CellIDs(ST_GeomFromWKT('POINT(1 2)'), 8, true)[0],
1, false) allRings,
ST_H3CellIDs(ST_GeomFromWKT('POINT(1 2)'), 8, true) original_cells
- """
- )
+ """)
exact_rings, all_rings, original_cells = df.take(1)[0]
assert set(exact_rings + original_cells) == set(all_rings)
def test_st_h3_togeom(self):
- df = self.spark.sql(
- """
+ df = self.spark.sql("""
SELECT
ST_Intersects(
ST_H3ToGeom(ST_H3CellIDs(ST_GeomFromText('POLYGON((-1 0, 1 0,
0 0, 0 1, -1 0))'), 6, true))[10],
@@ -2300,8 +2295,7 @@ class TestPredicateJoin(TestBase):
ST_H3ToGeom(ST_H3CellIDs(ST_GeomFromText('POLYGON((-1 0, 1 0,
0 0, 0 1, -1 0))'), 6, false))[50],
ST_GeomFromText('POLYGON((-1 0, 1 0, 0 0, 0 1, -1 0))')
)
- """
- )
+ """)
res1, res2, res3 = df.take(1)[0]
assert res1 and res2 and res3