This is an automated email from the ASF dual-hosted git repository.
paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/sedona-db.git
The following commit(s) were added to refs/heads/main by this push:
new ffaa20d feat(rust/sedona-functions): Implement native ST_ZMFlag using
WKBHeader (#260)
ffaa20d is described below
commit ffaa20d5fa52c4b6025ff3f11b54c5a644ddaefa
Author: Peter Nguyen <[email protected]>
AuthorDate: Thu Oct 30 07:17:08 2025 -0700
feat(rust/sedona-functions): Implement native ST_ZMFlag using WKBHeader
(#260)
---
benchmarks/test_functions.py | 18 ++
python/sedonadb/tests/functions/test_functions.py | 30 ++++
rust/sedona-functions/benches/native-functions.rs | 3 +
rust/sedona-functions/src/lib.rs | 1 +
rust/sedona-functions/src/register.rs | 1 +
rust/sedona-functions/src/st_zmflag.rs | 193 ++++++++++++++++++++++
6 files changed, 246 insertions(+)
diff --git a/benchmarks/test_functions.py b/benchmarks/test_functions.py
index 270d631..52537e4 100644
--- a/benchmarks/test_functions.py
+++ b/benchmarks/test_functions.py
@@ -293,3 +293,21 @@ class TestBenchFunctions(TestBenchBase):
eng.execute_and_collect(f"SELECT ST_EndPoint(geom1) from {table}")
benchmark(queries)
+
+ @pytest.mark.parametrize(
+ "eng", [SedonaDBSingleThread, PostGISSingleThread, DuckDBSingleThread]
+ )
+ @pytest.mark.parametrize(
+ "table",
+ [
+ "collections_simple",
+ "collections_complex",
+ ],
+ )
+ def test_st_zmflag(self, benchmark, eng, table):
+ eng = self._get_eng(eng)
+
+ def queries():
+ eng.execute_and_collect(f"SELECT ST_ZmFlag(geom1) from {table}")
+
+ benchmark(queries)
diff --git a/python/sedonadb/tests/functions/test_functions.py
b/python/sedonadb/tests/functions/test_functions.py
index a43c701..c9a31b7 100644
--- a/python/sedonadb/tests/functions/test_functions.py
+++ b/python/sedonadb/tests/functions/test_functions.py
@@ -1694,3 +1694,33 @@ def test_st_simplifypreservetopology(eng, geom,
tolerance, expected):
f"SELECT ST_SimplifyPreserveTopology({geom_or_null(geom)},
{val_or_null(tolerance)})",
expected,
)
+
+
[email protected]("eng", [SedonaDB, PostGIS])
[email protected](
+ ("geom", "expected"),
+ [
+ (None, None),
+ ("POINT EMPTY", 0),
+ ("POINT Z EMPTY", 2),
+ ("POINT M EMPTY", 1),
+ ("POINT ZM EMPTY", 3),
+ ("POINT Z (0 0 0)", 2),
+ ("POINT M (0 0 0)", 1),
+ ("POINT ZM (0 0 0 0)", 3),
+ ("LINESTRING EMPTY", 0),
+ ("LINESTRING Z EMPTY", 2),
+ ("LINESTRING Z (0 0 0, 1 1 1)", 2),
+ ("POLYGON EMPTY", 0),
+ ("MULTIPOINT ((0 0), (1 1))", 0),
+ ("MULTIPOINT Z ((0 0 0))", 2),
+ ("MULTIPOINT ZM ((0 0 0 0))", 3),
+ ("GEOMETRYCOLLECTION EMPTY", 0),
+ ("GEOMETRYCOLLECTION (POINT Z (0 0 0))", 2),
+ ("GEOMETRYCOLLECTION Z (POINT Z (0 0 0))", 2),
+ ("GEOMETRYCOLLECTION (GEOMETRYCOLLECTION (POINT Z (0 0 0)))", 2),
+ ],
+)
+def test_st_zmflag(eng, geom, expected):
+ eng = eng.create_or_skip()
+ eng.assert_query_result(f"SELECT ST_ZmFlag({geom_or_null(geom)})",
expected)
diff --git a/rust/sedona-functions/benches/native-functions.rs
b/rust/sedona-functions/benches/native-functions.rs
index af89a46..adf9101 100644
--- a/rust/sedona-functions/benches/native-functions.rs
+++ b/rust/sedona-functions/benches/native-functions.rs
@@ -157,6 +157,9 @@ fn criterion_benchmark(c: &mut Criterion) {
benchmark::scalar(c, &f, "native", "st_mmin", LineString(10));
benchmark::scalar(c, &f, "native", "st_mmax", LineString(10));
+ benchmark::scalar(c, &f, "native", "st_zmflag", Point);
+ benchmark::scalar(c, &f, "native", "st_zmflag", LineString(10));
+
benchmark::scalar(
c,
&f,
diff --git a/rust/sedona-functions/src/lib.rs b/rust/sedona-functions/src/lib.rs
index da35789..8c53a99 100644
--- a/rust/sedona-functions/src/lib.rs
+++ b/rust/sedona-functions/src/lib.rs
@@ -55,3 +55,4 @@ mod st_transform;
pub mod st_union_aggr;
mod st_xyzm;
mod st_xyzm_minmax;
+mod st_zmflag;
diff --git a/rust/sedona-functions/src/register.rs
b/rust/sedona-functions/src/register.rs
index a5d0f69..e90f941 100644
--- a/rust/sedona-functions/src/register.rs
+++ b/rust/sedona-functions/src/register.rs
@@ -108,6 +108,7 @@ pub fn default_function_set() -> FunctionSet {
crate::st_xyzm_minmax::st_mmin_udf,
crate::st_xyzm_minmax::st_mmax_udf,
crate::st_isclosed::st_isclosed_udf,
+ crate::st_zmflag::st_zmflag_udf,
);
register_aggregate_udfs!(
diff --git a/rust/sedona-functions/src/st_zmflag.rs
b/rust/sedona-functions/src/st_zmflag.rs
new file mode 100644
index 0000000..f0633d9
--- /dev/null
+++ b/rust/sedona-functions/src/st_zmflag.rs
@@ -0,0 +1,193 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+use std::sync::Arc;
+
+use crate::executor::WkbBytesExecutor;
+use arrow_array::builder::Int8Builder;
+use arrow_schema::DataType;
+use datafusion_common::{error::Result, DataFusionError};
+use datafusion_expr::{
+ scalar_doc_sections::DOC_SECTION_OTHER, ColumnarValue, Documentation,
Volatility,
+};
+use geo_traits::Dimensions;
+use sedona_common::sedona_internal_err;
+use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF};
+use sedona_geometry::wkb_header::WkbHeader;
+use sedona_schema::{datatypes::SedonaType, matchers::ArgMatcher};
+
+pub fn st_zmflag_udf() -> SedonaScalarUDF {
+ SedonaScalarUDF::new(
+ "st_zmflag",
+ vec![Arc::new(STZmFlag {})],
+ Volatility::Immutable,
+ Some(st_zmflag_doc()),
+ )
+}
+
+fn st_zmflag_doc() -> Documentation {
+ Documentation::builder(
+ DOC_SECTION_OTHER,
+ "Returns a code indicating the ZM coordinate dimension of a geometry.
Values are 0 for 2D, 1 for 3D-M, 2 for 3D-Z, and 3 for 4D.".to_string(),
+ "ST_ZmFlag (A: Geometry)".to_string(),
+ )
+ .with_argument("geom", "geometry: Input geometry")
+ .with_sql_example("SELECT ST_ZmFlag(ST_GeomFromWKT('POLYGON ((0 0, 1 0, 0
1, 0 0))'))")
+ .build()
+}
+
+#[derive(Debug)]
+struct STZmFlag {}
+
+impl SedonaScalarKernel for STZmFlag {
+ fn return_type(&self, args: &[SedonaType]) -> Result<Option<SedonaType>> {
+ let matcher = ArgMatcher::new(
+ vec![ArgMatcher::is_geometry()],
+ SedonaType::Arrow(DataType::Int8),
+ );
+
+ matcher.match_args(args)
+ }
+
+ fn invoke_batch(
+ &self,
+ arg_types: &[SedonaType],
+ args: &[ColumnarValue],
+ ) -> Result<ColumnarValue> {
+ let executor = WkbBytesExecutor::new(arg_types, args);
+ let mut builder =
Int8Builder::with_capacity(executor.num_iterations());
+
+ executor.execute_wkb_void(|maybe_item| {
+ match maybe_item {
+ Some(item) => {
+ builder.append_value(invoke_scalar(item)?);
+ }
+ None => builder.append_null(),
+ }
+ Ok(())
+ })?;
+
+ executor.finish(Arc::new(builder.finish()))
+ }
+}
+
+fn invoke_scalar(buf: &[u8]) -> Result<i8> {
+ let header = WkbHeader::try_new(buf).map_err(|e|
DataFusionError::External(Box::new(e)))?;
+ let top_level_dimensions = header
+ .dimensions()
+ .map_err(|e| DataFusionError::External(Box::new(e)))?;
+
+ // Infer dimension based on first coordinate dimension for cases where it
differs from top-level
+ // e.g GEOMETRYCOLLECTION (POINT Z (1 2 3))
+ let dimensions;
+ if let Some(first_geom_dimensions) = header.first_geom_dimensions() {
+ dimensions = first_geom_dimensions;
+ } else {
+ dimensions = top_level_dimensions;
+ }
+
+ match dimensions {
+ Dimensions::Xy => Ok(0),
+ Dimensions::Xym => Ok(1),
+ Dimensions::Xyz => Ok(2),
+ Dimensions::Xyzm => Ok(3),
+ _ => sedona_internal_err!("Invalid dimensions: {:?}", dimensions),
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use datafusion_common::ScalarValue;
+ use datafusion_expr::ScalarUDF;
+ use rstest::rstest;
+ use sedona_schema::datatypes::{WKB_GEOMETRY, WKB_VIEW_GEOMETRY};
+ use sedona_testing::{
+ fixtures::MULTIPOINT_WITH_INFERRED_Z_DIMENSION_WKB,
testers::ScalarUdfTester,
+ };
+
+ use super::*;
+
+ #[test]
+ fn udf_metadata() {
+ let udf: ScalarUDF = st_zmflag_udf().into();
+ assert_eq!(udf.name(), "st_zmflag");
+ assert!(udf.documentation().is_some());
+ }
+
+ #[rstest]
+ fn udf(#[values(WKB_GEOMETRY, WKB_VIEW_GEOMETRY)] sedona_type: SedonaType)
{
+ let tester = ScalarUdfTester::new(st_zmflag_udf().into(),
vec![sedona_type.clone()]);
+
+ tester.assert_return_type(DataType::Int8);
+
+ let result = tester.invoke_scalar("POINT ZM (1 2 3 4)").unwrap();
+ tester.assert_scalar_result_equals(result, 3);
+
+ let result = tester.invoke_scalar("POINT (1 2)").unwrap();
+ tester.assert_scalar_result_equals(result, 0);
+
+ let result = tester.invoke_scalar("POINT Z (1 2 3)").unwrap();
+ tester.assert_scalar_result_equals(result, 2);
+
+ let result = tester.invoke_wkb_scalar(None).unwrap();
+ tester.assert_scalar_result_equals(result, ScalarValue::Null);
+
+ // Z-dimension specified only in the nested geometry, but not the geom
collection level
+ let result = tester
+ .invoke_wkb_scalar(Some("GEOMETRYCOLLECTION (POINT Z (1 2 3))"))
+ .unwrap();
+ tester.assert_scalar_result_equals(result, 2);
+
+ // Z-dimension specified on both the geom collection and nested
geometry level
+ // Geometry collection with Z dimension both on the geom collection
and nested geometry level
+ let result = tester
+ .invoke_wkb_scalar(Some("GEOMETRYCOLLECTION Z (POINT Z (1 2 3))"))
+ .unwrap();
+ tester.assert_scalar_result_equals(result, 2);
+
+ let result = tester
+ .invoke_wkb_scalar(Some("GEOMETRYCOLLECTION (POINT M (1 2 3))"))
+ .unwrap();
+ tester.assert_scalar_result_equals(result, 1);
+
+ let result = tester
+ .invoke_wkb_scalar(Some("GEOMETRYCOLLECTION EMPTY"))
+ .unwrap();
+ tester.assert_scalar_result_equals(result, 0);
+
+ // Empty geometry collections with Z or M dimensions
+ let result = tester
+ .invoke_wkb_scalar(Some("GEOMETRYCOLLECTION Z EMPTY"))
+ .unwrap();
+ tester.assert_scalar_result_equals(result, 2);
+
+ let result = tester
+ .invoke_wkb_scalar(Some("GEOMETRYCOLLECTION M EMPTY"))
+ .unwrap();
+ tester.assert_scalar_result_equals(result, 1);
+ }
+
+ #[test]
+ fn multipoint_with_inferred_z_dimension() {
+ let tester = ScalarUdfTester::new(st_zmflag_udf().into(),
vec![WKB_GEOMETRY]);
+
+ let scalar =
ScalarValue::Binary(Some(MULTIPOINT_WITH_INFERRED_Z_DIMENSION_WKB.to_vec()));
+ assert_eq!(
+ tester.invoke_scalar(scalar.clone()).unwrap(),
+ ScalarValue::Int8(Some(2))
+ );
+ }
+}