This is an automated email from the ASF dual-hosted git repository.

petern pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/sedona-db.git


The following commit(s) were added to refs/heads/main by this push:
     new 1954402  feat(sedona-functions): Implement native ST_NumGeometries 
using WKBHeader (#270)
1954402 is described below

commit 195440292cf9cfe66ba5a49682f2bef84382c1dc
Author: Peter Nguyen <[email protected]>
AuthorDate: Sun Nov 2 20:00:58 2025 -0800

    feat(sedona-functions): Implement native ST_NumGeometries using WKBHeader 
(#270)
    
    Co-authored-by: Dewey Dunnington <[email protected]>
---
 benchmarks/test_functions.py                      |  18 +++
 python/sedonadb/tests/functions/test_functions.py |  31 ++++
 rust/sedona-functions/benches/native-functions.rs |   3 +
 rust/sedona-functions/src/lib.rs                  |   1 +
 rust/sedona-functions/src/register.rs             |   1 +
 rust/sedona-functions/src/st_numgeometries.rs     | 177 ++++++++++++++++++++++
 rust/sedona-geometry/src/wkb_header.rs            |  57 +++++++
 7 files changed, 288 insertions(+)

diff --git a/benchmarks/test_functions.py b/benchmarks/test_functions.py
index 3e2ae04..e609039 100644
--- a/benchmarks/test_functions.py
+++ b/benchmarks/test_functions.py
@@ -239,6 +239,24 @@ class TestBenchFunctions(TestBenchBase):
 
         benchmark(queries)
 
+    @pytest.mark.parametrize(
+        "eng", [SedonaDBSingleThread, PostGISSingleThread, DuckDBSingleThread]
+    )
+    @pytest.mark.parametrize(
+        "table",
+        [
+            "collections_simple",
+            "collections_complex",
+        ],
+    )
+    def test_st_numgeometries(self, benchmark, eng, table):
+        eng = self._get_eng(eng)
+
+        def queries():
+            eng.execute_and_collect(f"SELECT ST_NumGeometries(geom1) from 
{table}")
+
+        benchmark(queries)
+
     @pytest.mark.parametrize(
         "eng", [SedonaDBSingleThread, PostGISSingleThread, DuckDBSingleThread]
     )
diff --git a/python/sedonadb/tests/functions/test_functions.py 
b/python/sedonadb/tests/functions/test_functions.py
index 1420d6b..a30f783 100644
--- a/python/sedonadb/tests/functions/test_functions.py
+++ b/python/sedonadb/tests/functions/test_functions.py
@@ -1071,6 +1071,37 @@ def test_st_length(eng, geom, expected):
     eng.assert_query_result(f"SELECT ST_Length({geom_or_null(geom)})", 
expected)
 
 
[email protected]("eng", [SedonaDB, PostGIS])
[email protected](
+    ("geom", "expected"),
+    [
+        (None, None),
+        ("POINT EMPTY", 0),
+        ("LINESTRING EMPTY", 0),
+        ("POLYGON EMPTY", 0),
+        ("MULTIPOINT EMPTY", 0),
+        ("MULTILINESTRING EMPTY", 0),
+        ("MULTIPOLYGON EMPTY", 0),
+        ("GEOMETRYCOLLECTION EMPTY", 0),
+        ("GEOMETRYCOLLECTION (LINESTRING EMPTY, MULTIPOINT ((0 0), (1 1), (2 
2)))", 2),
+        ("POINT(0 0)", 1),
+        ("LINESTRING(0 0, 1 1)", 1),
+        ("POLYGON((0 0, 1 0, 0 1, 0 0))", 1),
+        ("MULTIPOINT ((0 0), (1 1), (2 2))", 3),
+        ("MULTILINESTRING((0 0, 0 1, 1 1, 0 0), (0 0, 1 1))", 2),
+        ("MULTIPOLYGON (((0 0, 1 0, 1 1, 0 1, 0 0)), ((0 0, 1 0, 1 1, 0 1, 0 
0)))", 2),
+        ("GEOMETRYCOLLECTION (MULTIPOINT ((0 0), (1 1), (2 2)))", 1),
+        (
+            "GEOMETRYCOLLECTION (GEOMETRYCOLLECTION (POINT (0 0), POINT (1 
1)), MULTIPOINT((2 2), (3 3)))",
+            2,
+        ),
+    ],
+)
+def test_st_numgeometries(eng, geom, expected):
+    eng = eng.create_or_skip()
+    eng.assert_query_result(f"SELECT ST_NumGeometries({geom_or_null(geom)})", 
expected)
+
+
 @pytest.mark.parametrize("eng", [SedonaDB, PostGIS])
 @pytest.mark.parametrize(
     ("geom", "expected"),
diff --git a/rust/sedona-functions/benches/native-functions.rs 
b/rust/sedona-functions/benches/native-functions.rs
index 8a89aa9..8061130 100644
--- a/rust/sedona-functions/benches/native-functions.rs
+++ b/rust/sedona-functions/benches/native-functions.rs
@@ -99,6 +99,9 @@ fn criterion_benchmark(c: &mut Criterion) {
         BenchmarkArgs::ArrayArray(Point, Point),
     );
 
+    benchmark::scalar(c, &f, "native", "st_numgeometries", Point);
+    benchmark::scalar(c, &f, "native", "st_numgeometries", MultiPoint(10));
+
     benchmark::scalar(
         c,
         &f,
diff --git a/rust/sedona-functions/src/lib.rs b/rust/sedona-functions/src/lib.rs
index 2c87924..ed93709 100644
--- a/rust/sedona-functions/src/lib.rs
+++ b/rust/sedona-functions/src/lib.rs
@@ -45,6 +45,7 @@ mod st_iscollection;
 pub mod st_isempty;
 mod st_length;
 mod st_makeline;
+mod st_numgeometries;
 mod st_perimeter;
 mod st_point;
 mod st_pointn;
diff --git a/rust/sedona-functions/src/register.rs 
b/rust/sedona-functions/src/register.rs
index 2678a85..7069fcb 100644
--- a/rust/sedona-functions/src/register.rs
+++ b/rust/sedona-functions/src/register.rs
@@ -83,6 +83,7 @@ pub fn default_function_set() -> FunctionSet {
         crate::st_isempty::st_isempty_udf,
         crate::st_length::st_length_udf,
         crate::st_makeline::st_makeline_udf,
+        crate::st_numgeometries::st_numgeometries_udf,
         crate::st_perimeter::st_perimeter_udf,
         crate::st_point::st_geogpoint_udf,
         crate::st_point::st_point_udf,
diff --git a/rust/sedona-functions/src/st_numgeometries.rs 
b/rust/sedona-functions/src/st_numgeometries.rs
new file mode 100644
index 0000000..c6574f0
--- /dev/null
+++ b/rust/sedona-functions/src/st_numgeometries.rs
@@ -0,0 +1,177 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::sync::Arc;
+
+use arrow_array::builder::UInt32Builder;
+use arrow_schema::DataType;
+use datafusion_common::error::{DataFusionError, Result};
+use datafusion_expr::{scalar_doc_sections::DOC_SECTION_OTHER, Documentation, 
Volatility};
+use sedona_common::sedona_internal_err;
+use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF};
+use sedona_geometry::types::GeometryTypeId;
+use sedona_geometry::wkb_header::WkbHeader;
+use sedona_schema::{datatypes::SedonaType, matchers::ArgMatcher};
+
+use crate::executor::WkbBytesExecutor;
+
+pub fn st_numgeometries_udf() -> SedonaScalarUDF {
+    SedonaScalarUDF::new(
+        "st_numgeometries",
+        vec![Arc::new(STNumGeometries {})],
+        Volatility::Immutable,
+        Some(st_numgeometries_doc()),
+    )
+}
+
+fn st_numgeometries_doc() -> Documentation {
+    Documentation::builder(
+        DOC_SECTION_OTHER,
+        "Return the number of geometries in the geometry collection",
+        "ST_NumGeometries (A: Geometry)",
+    )
+    .with_argument("geom", "geometry: Input geometry")
+    .with_sql_example("SELECT 
ST_NumGeometries(ST_GeomFromWKT('GEOMETRYCOLLECTION(POINT(0 0), LINESTRING(0 0, 
1 1))'))")
+    .build()
+}
+
+#[derive(Debug)]
+struct STNumGeometries {}
+
+impl SedonaScalarKernel for STNumGeometries {
+    fn return_type(&self, args: &[SedonaType]) -> Result<Option<SedonaType>> {
+        let matcher = ArgMatcher::new(
+            vec![ArgMatcher::is_geometry()],
+            SedonaType::Arrow(DataType::UInt32),
+        );
+
+        matcher.match_args(args)
+    }
+
+    fn invoke_batch(
+        &self,
+        arg_types: &[SedonaType],
+        args: &[datafusion_expr::ColumnarValue],
+    ) -> Result<datafusion_expr::ColumnarValue> {
+        let executor = WkbBytesExecutor::new(arg_types, args);
+        let mut builder = 
UInt32Builder::with_capacity(executor.num_iterations());
+
+        executor.execute_wkb_void(|maybe_item| {
+            match maybe_item {
+                Some(item) => {
+                    builder.append_value(invoke_scalar(item)?);
+                }
+                None => builder.append_null(),
+            }
+            Ok(())
+        })?;
+
+        executor.finish(Arc::new(builder.finish()))
+    }
+}
+
+fn invoke_scalar(buf: &[u8]) -> Result<u32> {
+    let header = WkbHeader::try_new(buf).map_err(|e| 
DataFusionError::External(Box::new(e)))?;
+
+    let size = header.size();
+    let is_empty = header
+        .is_empty()
+        .map_err(|e| DataFusionError::External(Box::new(e)))?;
+    if is_empty {
+        return Ok(0);
+    }
+
+    let geometry_type = header
+        .geometry_type_id()
+        .map_err(|e| DataFusionError::External(Box::new(e)))?;
+    match geometry_type {
+        // Returns 1, for these since they are non-empty
+        GeometryTypeId::Point | GeometryTypeId::LineString | 
GeometryTypeId::Polygon => Ok(1),
+        GeometryTypeId::MultiPoint
+        | GeometryTypeId::MultiLineString
+        | GeometryTypeId::MultiPolygon
+        | GeometryTypeId::GeometryCollection => Ok(size),
+        _ => sedona_internal_err!("Invalid geometry type"),
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use arrow_array::{create_array as arrow_array, ArrayRef};
+    use datafusion_common::ScalarValue;
+    use datafusion_expr::ScalarUDF;
+    use rstest::rstest;
+    use sedona_schema::datatypes::{WKB_GEOMETRY, WKB_VIEW_GEOMETRY};
+    use sedona_testing::{compare::assert_array_equal, 
testers::ScalarUdfTester};
+
+    use super::*;
+
+    #[test]
+    fn udf_metadata() {
+        let udf: ScalarUDF = st_numgeometries_udf().into();
+        assert_eq!(udf.name(), "st_numgeometries");
+        assert!(udf.documentation().is_some());
+    }
+
+    #[rstest]
+    fn udf(#[values(WKB_GEOMETRY, WKB_VIEW_GEOMETRY)] sedona_type: SedonaType) 
{
+        let tester = ScalarUdfTester::new(st_numgeometries_udf().into(), 
vec![sedona_type.clone()]);
+
+        tester.assert_return_type(DataType::UInt32);
+
+        let result = tester
+            .invoke_wkb_scalar(Some("GEOMETRYCOLLECTION(POINT(0 0), 
LINESTRING(0 0, 1 1))"))
+            .unwrap();
+        tester.assert_scalar_result_equals(result, 
ScalarValue::UInt32(Some(2)));
+
+        let result = tester.invoke_wkb_scalar(None).unwrap();
+        tester.assert_scalar_result_equals(result, ScalarValue::Null);
+
+        let input_wkt = vec![
+            None,
+            Some("POINT EMPTY"),
+            Some("MULTIPOLYGON EMPTY"),
+            Some("POINT(0 0)"),
+            Some("LINESTRING(0 0, 1 1)"),
+            Some("POLYGON((0 0, 1 0, 0 1, 0 0))"),
+            Some("MULTIPOINT((0 0), (1 1))"),
+            Some("MULTILINESTRING((0 0, 0 1, 1 1, 0 0),(0 0, 1 1))"),
+            Some("MULTIPOLYGON (((0 0, 1 0, 1 1, 0 1, 0 0)), ((0 0, 1 0, 1 1, 
0 1, 0 0)))"),
+            Some("GEOMETRYCOLLECTION EMPTY"),
+            Some("GEOMETRYCOLLECTION(POINT EMPTY, LINESTRING(0 0, 1 1))"),
+            Some("GEOMETRYCOLLECTION(POINT(0 0), MULTIPOINT((0 0), (1 1)))"),
+        ];
+        let expected: ArrayRef = arrow_array!(
+            UInt32,
+            [
+                None,
+                Some(0),
+                Some(0),
+                Some(1),
+                Some(1),
+                Some(1),
+                Some(2),
+                Some(2),
+                Some(2),
+                Some(0),
+                Some(2),
+                Some(2)
+            ]
+        );
+        assert_array_equal(&tester.invoke_wkb_array(input_wkt).unwrap(), 
&expected);
+    }
+}
diff --git a/rust/sedona-geometry/src/wkb_header.rs 
b/rust/sedona-geometry/src/wkb_header.rs
index e5179b2..b1cdfca 100644
--- a/rust/sedona-geometry/src/wkb_header.rs
+++ b/rust/sedona-geometry/src/wkb_header.rs
@@ -140,6 +140,16 @@ impl WkbHeader {
     pub fn first_geom_dimensions(&self) -> Option<Dimensions> {
         self.first_geom_dimensions
     }
+
+    /// Returns true if this geometry is EMPTY or false otherwise
+    pub fn is_empty(&self) -> Result<bool, SedonaGeometryError> {
+        let geometry_type_id = self.geometry_type_id()?;
+        if geometry_type_id == GeometryTypeId::Point {
+            let (x, y) = self.first_xy();
+            return Ok(x.is_nan() && y.is_nan());
+        }
+        Ok(self.size == 0)
+    }
 }
 
 // A helper struct for calculating the WKBHeader
@@ -1002,4 +1012,51 @@ mod tests {
         assert_eq!(header.dimensions().unwrap(), Dimensions::Xy);
         assert_eq!(header.first_geom_dimensions().unwrap(), Dimensions::Xyzm);
     }
+
+    #[test]
+    fn is_empty() {
+        let wkb = make_wkb("POINT EMPTY");
+        let header = WkbHeader::try_new(&wkb).unwrap();
+        assert!(header.is_empty().unwrap());
+
+        let wkb = make_wkb("POINT Z EMPTY");
+        let header = WkbHeader::try_new(&wkb).unwrap();
+        assert!(header.is_empty().unwrap());
+
+        let wkb = make_wkb("LINESTRING EMPTY");
+        let header = WkbHeader::try_new(&wkb).unwrap();
+        assert!(header.is_empty().unwrap());
+
+        let wkb = make_wkb("POLYGON EMPTY");
+        let header = WkbHeader::try_new(&wkb).unwrap();
+        assert!(header.is_empty().unwrap());
+
+        let wkb = make_wkb("MULTIPOINT EMPTY");
+        let header = WkbHeader::try_new(&wkb).unwrap();
+        assert!(header.is_empty().unwrap());
+
+        let wkb = make_wkb("MULTILINESTRING EMPTY");
+        let header = WkbHeader::try_new(&wkb).unwrap();
+        assert!(header.is_empty().unwrap());
+
+        let wkb = make_wkb("MULTIPOLYGON EMPTY");
+        let header = WkbHeader::try_new(&wkb).unwrap();
+        assert!(header.is_empty().unwrap());
+
+        let wkb = make_wkb("GEOMETRYCOLLECTION EMPTY");
+        let header = WkbHeader::try_new(&wkb).unwrap();
+        assert!(header.is_empty().unwrap());
+
+        let wkb = make_wkb("GEOMETRYCOLLECTION Z EMPTY");
+        let header = WkbHeader::try_new(&wkb).unwrap();
+        assert!(header.is_empty().unwrap());
+
+        let wkb = make_wkb("GEOMETRYCOLLECTION M EMPTY");
+        let header = WkbHeader::try_new(&wkb).unwrap();
+        assert!(header.is_empty().unwrap());
+
+        let wkb = make_wkb("GEOMETRYCOLLECTION ZM EMPTY");
+        let header = WkbHeader::try_new(&wkb).unwrap();
+        assert!(header.is_empty().unwrap());
+    }
 }

Reply via email to