This is an automated email from the ASF dual-hosted git repository.
petern pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/sedona-db.git
The following commit(s) were added to refs/heads/main by this push:
new 1954402 feat(sedona-functions): Implement native ST_NumGeometries
using WKBHeader (#270)
1954402 is described below
commit 195440292cf9cfe66ba5a49682f2bef84382c1dc
Author: Peter Nguyen <[email protected]>
AuthorDate: Sun Nov 2 20:00:58 2025 -0800
feat(sedona-functions): Implement native ST_NumGeometries using WKBHeader
(#270)
Co-authored-by: Dewey Dunnington <[email protected]>
---
benchmarks/test_functions.py | 18 +++
python/sedonadb/tests/functions/test_functions.py | 31 ++++
rust/sedona-functions/benches/native-functions.rs | 3 +
rust/sedona-functions/src/lib.rs | 1 +
rust/sedona-functions/src/register.rs | 1 +
rust/sedona-functions/src/st_numgeometries.rs | 177 ++++++++++++++++++++++
rust/sedona-geometry/src/wkb_header.rs | 57 +++++++
7 files changed, 288 insertions(+)
diff --git a/benchmarks/test_functions.py b/benchmarks/test_functions.py
index 3e2ae04..e609039 100644
--- a/benchmarks/test_functions.py
+++ b/benchmarks/test_functions.py
@@ -239,6 +239,24 @@ class TestBenchFunctions(TestBenchBase):
benchmark(queries)
+ @pytest.mark.parametrize(
+ "eng", [SedonaDBSingleThread, PostGISSingleThread, DuckDBSingleThread]
+ )
+ @pytest.mark.parametrize(
+ "table",
+ [
+ "collections_simple",
+ "collections_complex",
+ ],
+ )
+ def test_st_numgeometries(self, benchmark, eng, table):
+ eng = self._get_eng(eng)
+
+ def queries():
+ eng.execute_and_collect(f"SELECT ST_NumGeometries(geom1) from
{table}")
+
+ benchmark(queries)
+
@pytest.mark.parametrize(
"eng", [SedonaDBSingleThread, PostGISSingleThread, DuckDBSingleThread]
)
diff --git a/python/sedonadb/tests/functions/test_functions.py
b/python/sedonadb/tests/functions/test_functions.py
index 1420d6b..a30f783 100644
--- a/python/sedonadb/tests/functions/test_functions.py
+++ b/python/sedonadb/tests/functions/test_functions.py
@@ -1071,6 +1071,37 @@ def test_st_length(eng, geom, expected):
eng.assert_query_result(f"SELECT ST_Length({geom_or_null(geom)})",
expected)
[email protected]("eng", [SedonaDB, PostGIS])
[email protected](
+ ("geom", "expected"),
+ [
+ (None, None),
+ ("POINT EMPTY", 0),
+ ("LINESTRING EMPTY", 0),
+ ("POLYGON EMPTY", 0),
+ ("MULTIPOINT EMPTY", 0),
+ ("MULTILINESTRING EMPTY", 0),
+ ("MULTIPOLYGON EMPTY", 0),
+ ("GEOMETRYCOLLECTION EMPTY", 0),
+ ("GEOMETRYCOLLECTION (LINESTRING EMPTY, MULTIPOINT ((0 0), (1 1), (2
2)))", 2),
+ ("POINT(0 0)", 1),
+ ("LINESTRING(0 0, 1 1)", 1),
+ ("POLYGON((0 0, 1 0, 0 1, 0 0))", 1),
+ ("MULTIPOINT ((0 0), (1 1), (2 2))", 3),
+ ("MULTILINESTRING((0 0, 0 1, 1 1, 0 0), (0 0, 1 1))", 2),
+ ("MULTIPOLYGON (((0 0, 1 0, 1 1, 0 1, 0 0)), ((0 0, 1 0, 1 1, 0 1, 0
0)))", 2),
+ ("GEOMETRYCOLLECTION (MULTIPOINT ((0 0), (1 1), (2 2)))", 1),
+ (
+ "GEOMETRYCOLLECTION (GEOMETRYCOLLECTION (POINT (0 0), POINT (1
1)), MULTIPOINT((2 2), (3 3)))",
+ 2,
+ ),
+ ],
+)
+def test_st_numgeometries(eng, geom, expected):
+ eng = eng.create_or_skip()
+ eng.assert_query_result(f"SELECT ST_NumGeometries({geom_or_null(geom)})",
expected)
+
+
@pytest.mark.parametrize("eng", [SedonaDB, PostGIS])
@pytest.mark.parametrize(
("geom", "expected"),
diff --git a/rust/sedona-functions/benches/native-functions.rs
b/rust/sedona-functions/benches/native-functions.rs
index 8a89aa9..8061130 100644
--- a/rust/sedona-functions/benches/native-functions.rs
+++ b/rust/sedona-functions/benches/native-functions.rs
@@ -99,6 +99,9 @@ fn criterion_benchmark(c: &mut Criterion) {
BenchmarkArgs::ArrayArray(Point, Point),
);
+ benchmark::scalar(c, &f, "native", "st_numgeometries", Point);
+ benchmark::scalar(c, &f, "native", "st_numgeometries", MultiPoint(10));
+
benchmark::scalar(
c,
&f,
diff --git a/rust/sedona-functions/src/lib.rs b/rust/sedona-functions/src/lib.rs
index 2c87924..ed93709 100644
--- a/rust/sedona-functions/src/lib.rs
+++ b/rust/sedona-functions/src/lib.rs
@@ -45,6 +45,7 @@ mod st_iscollection;
pub mod st_isempty;
mod st_length;
mod st_makeline;
+mod st_numgeometries;
mod st_perimeter;
mod st_point;
mod st_pointn;
diff --git a/rust/sedona-functions/src/register.rs
b/rust/sedona-functions/src/register.rs
index 2678a85..7069fcb 100644
--- a/rust/sedona-functions/src/register.rs
+++ b/rust/sedona-functions/src/register.rs
@@ -83,6 +83,7 @@ pub fn default_function_set() -> FunctionSet {
crate::st_isempty::st_isempty_udf,
crate::st_length::st_length_udf,
crate::st_makeline::st_makeline_udf,
+ crate::st_numgeometries::st_numgeometries_udf,
crate::st_perimeter::st_perimeter_udf,
crate::st_point::st_geogpoint_udf,
crate::st_point::st_point_udf,
diff --git a/rust/sedona-functions/src/st_numgeometries.rs
b/rust/sedona-functions/src/st_numgeometries.rs
new file mode 100644
index 0000000..c6574f0
--- /dev/null
+++ b/rust/sedona-functions/src/st_numgeometries.rs
@@ -0,0 +1,177 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::sync::Arc;
+
+use arrow_array::builder::UInt32Builder;
+use arrow_schema::DataType;
+use datafusion_common::error::{DataFusionError, Result};
+use datafusion_expr::{scalar_doc_sections::DOC_SECTION_OTHER, Documentation,
Volatility};
+use sedona_common::sedona_internal_err;
+use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF};
+use sedona_geometry::types::GeometryTypeId;
+use sedona_geometry::wkb_header::WkbHeader;
+use sedona_schema::{datatypes::SedonaType, matchers::ArgMatcher};
+
+use crate::executor::WkbBytesExecutor;
+
+pub fn st_numgeometries_udf() -> SedonaScalarUDF {
+ SedonaScalarUDF::new(
+ "st_numgeometries",
+ vec![Arc::new(STNumGeometries {})],
+ Volatility::Immutable,
+ Some(st_numgeometries_doc()),
+ )
+}
+
+fn st_numgeometries_doc() -> Documentation {
+ Documentation::builder(
+ DOC_SECTION_OTHER,
+ "Return the number of geometries in the geometry collection",
+ "ST_NumGeometries (A: Geometry)",
+ )
+ .with_argument("geom", "geometry: Input geometry")
+ .with_sql_example("SELECT
ST_NumGeometries(ST_GeomFromWKT('GEOMETRYCOLLECTION(POINT(0 0), LINESTRING(0 0,
1 1))'))")
+ .build()
+}
+
+#[derive(Debug)]
+struct STNumGeometries {}
+
+impl SedonaScalarKernel for STNumGeometries {
+ fn return_type(&self, args: &[SedonaType]) -> Result<Option<SedonaType>> {
+ let matcher = ArgMatcher::new(
+ vec![ArgMatcher::is_geometry()],
+ SedonaType::Arrow(DataType::UInt32),
+ );
+
+ matcher.match_args(args)
+ }
+
+ fn invoke_batch(
+ &self,
+ arg_types: &[SedonaType],
+ args: &[datafusion_expr::ColumnarValue],
+ ) -> Result<datafusion_expr::ColumnarValue> {
+ let executor = WkbBytesExecutor::new(arg_types, args);
+ let mut builder =
UInt32Builder::with_capacity(executor.num_iterations());
+
+ executor.execute_wkb_void(|maybe_item| {
+ match maybe_item {
+ Some(item) => {
+ builder.append_value(invoke_scalar(item)?);
+ }
+ None => builder.append_null(),
+ }
+ Ok(())
+ })?;
+
+ executor.finish(Arc::new(builder.finish()))
+ }
+}
+
+fn invoke_scalar(buf: &[u8]) -> Result<u32> {
+ let header = WkbHeader::try_new(buf).map_err(|e|
DataFusionError::External(Box::new(e)))?;
+
+ let size = header.size();
+ let is_empty = header
+ .is_empty()
+ .map_err(|e| DataFusionError::External(Box::new(e)))?;
+ if is_empty {
+ return Ok(0);
+ }
+
+ let geometry_type = header
+ .geometry_type_id()
+ .map_err(|e| DataFusionError::External(Box::new(e)))?;
+ match geometry_type {
+ // Returns 1, for these since they are non-empty
+ GeometryTypeId::Point | GeometryTypeId::LineString |
GeometryTypeId::Polygon => Ok(1),
+ GeometryTypeId::MultiPoint
+ | GeometryTypeId::MultiLineString
+ | GeometryTypeId::MultiPolygon
+ | GeometryTypeId::GeometryCollection => Ok(size),
+ _ => sedona_internal_err!("Invalid geometry type"),
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use arrow_array::{create_array as arrow_array, ArrayRef};
+ use datafusion_common::ScalarValue;
+ use datafusion_expr::ScalarUDF;
+ use rstest::rstest;
+ use sedona_schema::datatypes::{WKB_GEOMETRY, WKB_VIEW_GEOMETRY};
+ use sedona_testing::{compare::assert_array_equal,
testers::ScalarUdfTester};
+
+ use super::*;
+
+ #[test]
+ fn udf_metadata() {
+ let udf: ScalarUDF = st_numgeometries_udf().into();
+ assert_eq!(udf.name(), "st_numgeometries");
+ assert!(udf.documentation().is_some());
+ }
+
+ #[rstest]
+ fn udf(#[values(WKB_GEOMETRY, WKB_VIEW_GEOMETRY)] sedona_type: SedonaType)
{
+ let tester = ScalarUdfTester::new(st_numgeometries_udf().into(),
vec![sedona_type.clone()]);
+
+ tester.assert_return_type(DataType::UInt32);
+
+ let result = tester
+ .invoke_wkb_scalar(Some("GEOMETRYCOLLECTION(POINT(0 0),
LINESTRING(0 0, 1 1))"))
+ .unwrap();
+ tester.assert_scalar_result_equals(result,
ScalarValue::UInt32(Some(2)));
+
+ let result = tester.invoke_wkb_scalar(None).unwrap();
+ tester.assert_scalar_result_equals(result, ScalarValue::Null);
+
+ let input_wkt = vec![
+ None,
+ Some("POINT EMPTY"),
+ Some("MULTIPOLYGON EMPTY"),
+ Some("POINT(0 0)"),
+ Some("LINESTRING(0 0, 1 1)"),
+ Some("POLYGON((0 0, 1 0, 0 1, 0 0))"),
+ Some("MULTIPOINT((0 0), (1 1))"),
+ Some("MULTILINESTRING((0 0, 0 1, 1 1, 0 0),(0 0, 1 1))"),
+ Some("MULTIPOLYGON (((0 0, 1 0, 1 1, 0 1, 0 0)), ((0 0, 1 0, 1 1,
0 1, 0 0)))"),
+ Some("GEOMETRYCOLLECTION EMPTY"),
+ Some("GEOMETRYCOLLECTION(POINT EMPTY, LINESTRING(0 0, 1 1))"),
+ Some("GEOMETRYCOLLECTION(POINT(0 0), MULTIPOINT((0 0), (1 1)))"),
+ ];
+ let expected: ArrayRef = arrow_array!(
+ UInt32,
+ [
+ None,
+ Some(0),
+ Some(0),
+ Some(1),
+ Some(1),
+ Some(1),
+ Some(2),
+ Some(2),
+ Some(2),
+ Some(0),
+ Some(2),
+ Some(2)
+ ]
+ );
+ assert_array_equal(&tester.invoke_wkb_array(input_wkt).unwrap(),
&expected);
+ }
+}
diff --git a/rust/sedona-geometry/src/wkb_header.rs
b/rust/sedona-geometry/src/wkb_header.rs
index e5179b2..b1cdfca 100644
--- a/rust/sedona-geometry/src/wkb_header.rs
+++ b/rust/sedona-geometry/src/wkb_header.rs
@@ -140,6 +140,16 @@ impl WkbHeader {
pub fn first_geom_dimensions(&self) -> Option<Dimensions> {
self.first_geom_dimensions
}
+
+ /// Returns true if this geometry is EMPTY or false otherwise
+ pub fn is_empty(&self) -> Result<bool, SedonaGeometryError> {
+ let geometry_type_id = self.geometry_type_id()?;
+ if geometry_type_id == GeometryTypeId::Point {
+ let (x, y) = self.first_xy();
+ return Ok(x.is_nan() && y.is_nan());
+ }
+ Ok(self.size == 0)
+ }
}
// A helper struct for calculating the WKBHeader
@@ -1002,4 +1012,51 @@ mod tests {
assert_eq!(header.dimensions().unwrap(), Dimensions::Xy);
assert_eq!(header.first_geom_dimensions().unwrap(), Dimensions::Xyzm);
}
+
+ #[test]
+ fn is_empty() {
+ let wkb = make_wkb("POINT EMPTY");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ assert!(header.is_empty().unwrap());
+
+ let wkb = make_wkb("POINT Z EMPTY");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ assert!(header.is_empty().unwrap());
+
+ let wkb = make_wkb("LINESTRING EMPTY");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ assert!(header.is_empty().unwrap());
+
+ let wkb = make_wkb("POLYGON EMPTY");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ assert!(header.is_empty().unwrap());
+
+ let wkb = make_wkb("MULTIPOINT EMPTY");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ assert!(header.is_empty().unwrap());
+
+ let wkb = make_wkb("MULTILINESTRING EMPTY");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ assert!(header.is_empty().unwrap());
+
+ let wkb = make_wkb("MULTIPOLYGON EMPTY");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ assert!(header.is_empty().unwrap());
+
+ let wkb = make_wkb("GEOMETRYCOLLECTION EMPTY");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ assert!(header.is_empty().unwrap());
+
+ let wkb = make_wkb("GEOMETRYCOLLECTION Z EMPTY");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ assert!(header.is_empty().unwrap());
+
+ let wkb = make_wkb("GEOMETRYCOLLECTION M EMPTY");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ assert!(header.is_empty().unwrap());
+
+ let wkb = make_wkb("GEOMETRYCOLLECTION ZM EMPTY");
+ let header = WkbHeader::try_new(&wkb).unwrap();
+ assert!(header.is_empty().unwrap());
+ }
}