This is an automated email from the ASF dual-hosted git repository.
jiayu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/sedona-db.git
The following commit(s) were added to refs/heads/main by this push:
new 8f00164 feat: Pytest benchmark for comparing against other engines
locally (#10)
8f00164 is described below
commit 8f001641c83e3a20703214b32765e14c69d9c643
Author: Peter Nguyen <[email protected]>
AuthorDate: Wed Sep 3 11:08:13 2025 -0700
feat: Pytest benchmark for comparing against other engines locally (#10)
* Initial support
* Create tables of geometry columns in advance and use them
* move benchmarks folder from python to root folder
* Parametrize st_area as an example
* Use 'simple' and 'complex' tables instead of different sized columns
* Parametrize rest of functions
* Add licenses
* Add benchmarks/README.md
* Update README.md
---
benchmarks/README.md | 77 +++++++++++++++++++++++++++
benchmarks/test_bench_base.py | 94 +++++++++++++++++++++++++++++++++
benchmarks/test_distance.py | 37 +++++++++++++
benchmarks/test_functions.py | 117 ++++++++++++++++++++++++++++++++++++++++++
benchmarks/test_overlay.py | 37 +++++++++++++
benchmarks/test_predicates.py | 55 ++++++++++++++++++++
6 files changed, 417 insertions(+)
diff --git a/benchmarks/README.md b/benchmarks/README.md
new file mode 100644
index 0000000..6d6633b
--- /dev/null
+++ b/benchmarks/README.md
@@ -0,0 +1,77 @@
+<!-- Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License. -->
+
+# Running Benchmarks
+
+## pytest-benchmark
+
+These benchmarks provide a convenient way to compare the results of running
queries on sedona-db to other engines like DuckDB and postgis.
+
+### Setup
+
+Install pytest-benchmark:
+```bash
+pip install pytest-benchmark
+```
+
+### Running benchmarks
+
+The below commands assume your working directory is in `benchmarks`.
+
+```bash
+cd benchmarks/
+```
+
+To run a benchmark, simply run the corresponding test function. For example,
to run the benchmarks for st_buffer, you can run
+
+```bash
+pytest test_functions.py::TestBenchFunctions::test_st_buffer
+```
+
+Most of the time, you'll also want to group by `param:table` or `func`
(function) by using the `--benchmark-group-by=param:table` flag.
pytest-benchmark will highlight the "best" value in green (e.g fastest for
median, lowest for stddev) and "worse" value in red for each column per each
group.
+
+```bash
+pytest --benchmark-group-by=param:table
test_functions.py::TestBenchFunctions::test_st_buffer
+```
+
+You can also reduce the number of columns that display by using the
`--benchmark-columns` flag.
+
+```bash
+pytest --benchmark-group-by=param:table --benchmark-columns=median,mean,stddev
test_functions.py::TestBenchFunctions::test_st_buffer
+```
+
+Example output of the last command:
+
+```
+----------------------------- benchmark 'table=collections_complex': 3 tests
-----------------------------
+Name (time in ms) Median Mean
StdDev
+----------------------------------------------------------------------------------------------------------
+test_st_buffer[collections_complex-SedonaDB] 87.0095 (1.0) 87.7874
(1.0) 3.7269 (1.0)
+test_st_buffer[collections_complex-DuckDB] 440.4810 (5.06) 444.6948
(5.07) 12.1143 (3.25)
+test_st_buffer[collections_complex-PostGIS] 864.5841 (9.94) 883.3661
(10.06) 50.4996 (13.55)
+----------------------------------------------------------------------------------------------------------
+
+---------------------------- benchmark 'table=collections_simple': 3 tests
-----------------------------
+Name (time in ms) Median Mean
StdDev
+--------------------------------------------------------------------------------------------------------
+test_st_buffer[collections_simple-SedonaDB] 85.8510 (1.0) 86.5050
(1.0) 3.8481 (1.0)
+test_st_buffer[collections_simple-DuckDB] 442.6664 (5.16) 444.5187
(5.14) 5.6186 (1.46)
+test_st_buffer[collections_simple-PostGIS] 855.3329 (9.96) 854.7194
(9.88) 7.6190 (1.98)
+--------------------------------------------------------------------------------------------------------
+```
+
+For more details and command line options, refer to the official
[pytest-benchmark
documentation](https://pytest-benchmark.readthedocs.io/en/latest/usage.html)
diff --git a/benchmarks/test_bench_base.py b/benchmarks/test_bench_base.py
new file mode 100644
index 0000000..8e62f35
--- /dev/null
+++ b/benchmarks/test_bench_base.py
@@ -0,0 +1,94 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+import json
+from sedonadb.testing import DuckDB, PostGIS, SedonaDB
+
+
+class TestBenchBase:
+ def setup_class(self):
+ self.sedonadb = SedonaDB.create_or_skip()
+ self.postgis = PostGIS.create_or_skip()
+ self.duckdb = DuckDB.create_or_skip()
+
+ num_geoms = 100_000
+
+ # Setup tables
+ for name, options in [
+ (
+ "segments_large",
+ {
+ "geom_type": "LineString",
+ "target_rows": num_geoms,
+ "vertices_per_linestring_range": [2, 2],
+ },
+ ),
+ (
+ "polygons_simple",
+ {
+ "geom_type": "Polygon",
+ "target_rows": num_geoms,
+ "vertices_per_linestring_range": [10, 10],
+ },
+ ),
+ (
+ "polygons_complex",
+ {
+ "geom_type": "Polygon",
+ "target_rows": num_geoms,
+ "vertices_per_linestring_range": [500, 500],
+ },
+ ),
+ (
+ "collections_simple",
+ {
+ "geom_type": "GeometryCollection",
+ "target_rows": num_geoms,
+ "vertices_per_linestring_range": [10, 10],
+ },
+ ),
+ (
+ "collections_complex",
+ {
+ "geom_type": "GeometryCollection",
+ "target_rows": num_geoms,
+ "vertices_per_linestring_range": [500, 500],
+ },
+ ),
+ ]:
+ # Generate synthetic data
+ query = f"""
+ SELECT
+ geometry as geom1,
+ geometry as geom2,
+ round(random() * 100) as integer
+ FROM sd_random_geometry('{json.dumps(options)}')
+ """
+ tab = self.sedonadb.execute_and_collect(query)
+
+ self.sedonadb.create_table_arrow(name, tab)
+ self.postgis.create_table_arrow(name, tab)
+ self.duckdb.create_table_arrow(name, tab)
+
+ def _get_eng(self, eng):
+ if eng == SedonaDB:
+ return self.sedonadb
+ elif eng == PostGIS:
+ return self.postgis
+ elif eng == DuckDB:
+ return self.duckdb
+ else:
+ raise ValueError(f"Unsupported engine: {eng}")
diff --git a/benchmarks/test_distance.py b/benchmarks/test_distance.py
new file mode 100644
index 0000000..5744c01
--- /dev/null
+++ b/benchmarks/test_distance.py
@@ -0,0 +1,37 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+import pytest
+from test_bench_base import TestBenchBase
+from sedonadb.testing import DuckDB, PostGIS, SedonaDB
+
+
+class TestBenchPredicates(TestBenchBase):
+ @pytest.mark.parametrize("eng", [SedonaDB, PostGIS, DuckDB])
+ @pytest.mark.parametrize(
+ "table",
+ [
+ "polygons_simple",
+ "polygons_complex",
+ ],
+ )
+ def test_st_distance(self, benchmark, eng, table):
+ eng = self._get_eng(eng)
+
+ def queries():
+ eng.execute_and_collect(f"SELECT ST_Distance(geom1, geom2) from
{table}")
+
+ benchmark(queries)
diff --git a/benchmarks/test_functions.py b/benchmarks/test_functions.py
new file mode 100644
index 0000000..8be56d7
--- /dev/null
+++ b/benchmarks/test_functions.py
@@ -0,0 +1,117 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+
+# http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+import pytest
+from test_bench_base import TestBenchBase
+from sedonadb.testing import DuckDB, PostGIS, SedonaDB
+
+
+class TestBenchFunctions(TestBenchBase):
+ @pytest.mark.parametrize("eng", [SedonaDB, PostGIS, DuckDB])
+ @pytest.mark.parametrize(
+ "table",
+ [
+ "polygons_simple",
+ "polygons_complex",
+ ],
+ )
+ def test_st_area(self, benchmark, eng, table):
+ eng = self._get_eng(eng)
+
+ def queries():
+ eng.execute_and_collect(f"SELECT ST_Area(geom1) from {table}")
+
+ benchmark(queries)
+
+ @pytest.mark.parametrize("eng", [SedonaDB, PostGIS, DuckDB])
+ @pytest.mark.parametrize(
+ "table",
+ [
+ "collections_simple",
+ "collections_complex",
+ ],
+ )
+ def test_st_buffer(self, benchmark, eng, table):
+ eng = self._get_eng(eng)
+
+ def queries():
+ eng.execute_and_collect(f"SELECT ST_Buffer(geom1, 2.0) from
{table}")
+
+ benchmark(queries)
+
+ @pytest.mark.parametrize("eng", [SedonaDB, PostGIS, DuckDB])
+ @pytest.mark.parametrize(
+ "table",
+ [
+ "polygons_simple",
+ "polygons_complex",
+ ],
+ )
+ def test_st_centroid(self, benchmark, eng, table):
+ eng = self._get_eng(eng)
+
+ def queries():
+ eng.execute_and_collect(f"SELECT ST_Centroid(geom1) from {table}")
+
+ benchmark(queries)
+
+ @pytest.mark.parametrize("eng", [SedonaDB, PostGIS, DuckDB])
+ @pytest.mark.parametrize(
+ "table",
+ [
+ "collections_simple",
+ "collections_complex",
+ ],
+ )
+ def test_st_dimension(self, benchmark, eng, table):
+ eng = self._get_eng(eng)
+
+ def queries():
+ eng.execute_and_collect(f"SELECT ST_Dimension(geom1) from {table}")
+
+ benchmark(queries)
+
+ @pytest.mark.parametrize("eng", [SedonaDB, PostGIS, DuckDB])
+ @pytest.mark.parametrize(
+ "table",
+ [
+ "collections_simple",
+ "collections_complex",
+ ],
+ )
+ def test_st_envelope(self, benchmark, eng, table):
+ eng = self._get_eng(eng)
+
+ def queries():
+ eng.execute_and_collect(f"SELECT ST_Envelope(geom1) from {table}")
+
+ benchmark(queries)
+
+ @pytest.mark.parametrize("eng", [SedonaDB, PostGIS, DuckDB])
+ @pytest.mark.parametrize(
+ "table",
+ [
+ "collections_simple",
+ "collections_complex",
+ ],
+ )
+ def test_st_geometrytype(self, benchmark, eng, table):
+ eng = self._get_eng(eng)
+
+ def queries():
+ eng.execute_and_collect(f"SELECT ST_GeometryType(geom1) from
{table}")
+
+ benchmark(queries)
diff --git a/benchmarks/test_overlay.py b/benchmarks/test_overlay.py
new file mode 100644
index 0000000..cc8a513
--- /dev/null
+++ b/benchmarks/test_overlay.py
@@ -0,0 +1,37 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+import pytest
+from test_bench_base import TestBenchBase
+from sedonadb.testing import DuckDB, PostGIS, SedonaDB
+
+
+class TestBenchPredicates(TestBenchBase):
+ @pytest.mark.parametrize("eng", [SedonaDB, PostGIS, DuckDB])
+ @pytest.mark.parametrize(
+ "table",
+ [
+ "polygons_simple",
+ "polygons_complex",
+ ],
+ )
+ def test_st_difference(self, benchmark, eng, table):
+ eng = self._get_eng(eng)
+
+ def queries():
+ eng.execute_and_collect(f"SELECT ST_Difference(geom1, geom2) from
{table}")
+
+ benchmark(queries)
diff --git a/benchmarks/test_predicates.py b/benchmarks/test_predicates.py
new file mode 100644
index 0000000..cd8b871
--- /dev/null
+++ b/benchmarks/test_predicates.py
@@ -0,0 +1,55 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+import pytest
+from test_bench_base import TestBenchBase
+from sedonadb.testing import DuckDB, PostGIS, SedonaDB
+
+
+class TestBenchPredicates(TestBenchBase):
+ @pytest.mark.parametrize("eng", [SedonaDB, PostGIS, DuckDB])
+ @pytest.mark.parametrize(
+ "table",
+ [
+ "polygons_simple",
+ "polygons_complex",
+ ],
+ )
+ def test_st_contains(self, benchmark, eng, table):
+ eng = self._get_eng(eng)
+
+ def queries():
+ eng.execute_and_collect(f"SELECT ST_Contains(geom1, geom2) from
{table}")
+
+ benchmark(queries)
+
+ @pytest.mark.parametrize("eng", [SedonaDB, PostGIS, DuckDB])
+ @pytest.mark.parametrize(
+ "table",
+ [
+ "polygons_simple",
+ "polygons_complex",
+ ],
+ )
+ def test_st_dwithin(self, benchmark, eng, table):
+ eng = self._get_eng(eng)
+
+ def queries():
+ eng.execute_and_collect(
+ f"SELECT ST_DWithin(geom1, geom2, 1.0) from {table}"
+ )
+
+ benchmark(queries)