This is an automated email from the ASF dual-hosted git repository.

paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/sedona-db.git


The following commit(s) were added to refs/heads/main by this push:
     new 9e3365f  feat(c/sedona-geos): Implement ST_IsValidReason using geos 
library (#230)
9e3365f is described below

commit 9e3365f60cba629536c7ffda8764c6eabf441fde
Author: Abeeujah <[email protected]>
AuthorDate: Wed Oct 22 04:00:12 2025 +0100

    feat(c/sedona-geos): Implement ST_IsValidReason using geos library (#230)
    
    Co-authored-by: Dewey Dunnington <[email protected]>
---
 c/sedona-geos/benches/geos-functions.rs           |  88 +++++++-------
 c/sedona-geos/src/lib.rs                          |   1 +
 c/sedona-geos/src/register.rs                     |   6 +-
 c/sedona-geos/src/st_isvalidreason.rs             | 134 ++++++++++++++++++++++
 python/sedonadb/tests/functions/test_functions.py |  21 ++++
 5 files changed, 205 insertions(+), 45 deletions(-)

diff --git a/c/sedona-geos/benches/geos-functions.rs 
b/c/sedona-geos/benches/geos-functions.rs
index d9e6ae0..2a9eaf1 100644
--- a/c/sedona-geos/benches/geos-functions.rs
+++ b/c/sedona-geos/benches/geos-functions.rs
@@ -46,35 +46,35 @@ fn criterion_benchmark(c: &mut Criterion) {
     benchmark::scalar(c, &f, "geos", "st_centroid", Polygon(10));
     benchmark::scalar(c, &f, "geos", "st_centroid", Polygon(500));
 
-    benchmark::scalar(c, &f, "geos", "st_convexhull", MultiPoint(10));
-
     benchmark::scalar(
         c,
         &f,
         "geos",
-        "st_distance",
+        "st_contains",
         ArrayScalar(Polygon(10), Polygon(10)),
     );
     benchmark::scalar(
         c,
         &f,
         "geos",
-        "st_distance",
+        "st_contains",
         ArrayScalar(Polygon(10), Polygon(500)),
     );
 
+    benchmark::scalar(c, &f, "geos", "st_convexhull", MultiPoint(10));
+
     benchmark::scalar(
         c,
         &f,
         "geos",
-        "st_contains",
+        "st_coveredby",
         ArrayScalar(Polygon(10), Polygon(10)),
     );
     benchmark::scalar(
         c,
         &f,
         "geos",
-        "st_contains",
+        "st_coveredby",
         ArrayScalar(Polygon(10), Polygon(500)),
     );
 
@@ -82,14 +82,14 @@ fn criterion_benchmark(c: &mut Criterion) {
         c,
         &f,
         "geos",
-        "st_coveredby",
+        "st_covers",
         ArrayScalar(Polygon(10), Polygon(10)),
     );
     benchmark::scalar(
         c,
         &f,
         "geos",
-        "st_coveredby",
+        "st_covers",
         ArrayScalar(Polygon(10), Polygon(500)),
     );
 
@@ -97,14 +97,14 @@ fn criterion_benchmark(c: &mut Criterion) {
         c,
         &f,
         "geos",
-        "st_covers",
+        "st_crosses",
         ArrayScalar(Polygon(10), Polygon(10)),
     );
     benchmark::scalar(
         c,
         &f,
         "geos",
-        "st_covers",
+        "st_crosses",
         ArrayScalar(Polygon(10), Polygon(500)),
     );
 
@@ -115,7 +115,6 @@ fn criterion_benchmark(c: &mut Criterion) {
         "st_difference",
         ArrayScalar(Polygon(10), Polygon(10)),
     );
-
     benchmark::scalar(
         c,
         &f,
@@ -139,6 +138,21 @@ fn criterion_benchmark(c: &mut Criterion) {
         ArrayScalar(Polygon(10), Polygon(500)),
     );
 
+    benchmark::scalar(
+        c,
+        &f,
+        "geos",
+        "st_distance",
+        ArrayScalar(Polygon(10), Polygon(10)),
+    );
+    benchmark::scalar(
+        c,
+        &f,
+        "geos",
+        "st_distance",
+        ArrayScalar(Polygon(10), Polygon(500)),
+    );
+
     benchmark::scalar(
         c,
         &f,
@@ -146,7 +160,6 @@ fn criterion_benchmark(c: &mut Criterion) {
         "st_dwithin",
         ArrayArrayScalar(Polygon(10), Polygon(10), Float64(1.0, 2.0)),
     );
-
     benchmark::scalar(
         c,
         &f,
@@ -200,56 +213,45 @@ fn criterion_benchmark(c: &mut Criterion) {
         ArrayScalar(Polygon(10), Polygon(500)),
     );
 
+    benchmark::scalar(c, &f, "geos", "st_isvalid", Polygon(10));
+    benchmark::scalar(c, &f, "geos", "st_isvalid", Polygon(500));
+
+    benchmark::scalar(c, &f, "geos", "st_isvalidreason", Polygon(10));
+    benchmark::scalar(c, &f, "geos", "st_isvalidreason", Polygon(500));
+
     benchmark::scalar(c, &f, "geos", "st_length", LineString(10));
     benchmark::scalar(c, &f, "geos", "st_length", LineString(500));
 
-    benchmark::scalar(c, &f, "geos", "st_perimeter", Polygon(10));
-    benchmark::scalar(c, &f, "geos", "st_perimeter", Polygon(500));
-
     benchmark::scalar(
         c,
         &f,
         "geos",
-        "st_symdifference",
+        "st_overlaps",
         ArrayScalar(Polygon(10), Polygon(10)),
     );
-
     benchmark::scalar(
         c,
         &f,
         "geos",
-        "st_symdifference",
+        "st_overlaps",
         ArrayScalar(Polygon(10), Polygon(500)),
     );
 
-    benchmark::scalar(
-        c,
-        &f,
-        "geos",
-        "st_touches",
-        ArrayScalar(Polygon(10), Polygon(10)),
-    );
-    benchmark::scalar(
-        c,
-        &f,
-        "geos",
-        "st_touches",
-        ArrayScalar(Polygon(10), Polygon(500)),
-    );
+    benchmark::scalar(c, &f, "geos", "st_perimeter", Polygon(10));
+    benchmark::scalar(c, &f, "geos", "st_perimeter", Polygon(500));
 
     benchmark::scalar(
         c,
         &f,
         "geos",
-        "st_union",
+        "st_symdifference",
         ArrayScalar(Polygon(10), Polygon(10)),
     );
-
     benchmark::scalar(
         c,
         &f,
         "geos",
-        "st_union",
+        "st_symdifference",
         ArrayScalar(Polygon(10), Polygon(500)),
     );
 
@@ -257,46 +259,46 @@ fn criterion_benchmark(c: &mut Criterion) {
         c,
         &f,
         "geos",
-        "st_within",
+        "st_touches",
         ArrayScalar(Polygon(10), Polygon(10)),
     );
     benchmark::scalar(
         c,
         &f,
         "geos",
-        "st_within",
+        "st_touches",
         ArrayScalar(Polygon(10), Polygon(500)),
     );
+
     benchmark::scalar(
         c,
         &f,
         "geos",
-        "st_crosses",
+        "st_union",
         ArrayScalar(Polygon(10), Polygon(10)),
     );
     benchmark::scalar(
         c,
         &f,
         "geos",
-        "st_crosses",
+        "st_union",
         ArrayScalar(Polygon(10), Polygon(500)),
     );
+
     benchmark::scalar(
         c,
         &f,
         "geos",
-        "st_overlaps",
+        "st_within",
         ArrayScalar(Polygon(10), Polygon(10)),
     );
     benchmark::scalar(
         c,
         &f,
         "geos",
-        "st_overlaps",
+        "st_within",
         ArrayScalar(Polygon(10), Polygon(500)),
     );
-    benchmark::scalar(c, &f, "geos", "st_isvalid", Polygon(10));
-    benchmark::scalar(c, &f, "geos", "st_isvalid", Polygon(500));
 }
 
 criterion_group!(benches, criterion_benchmark);
diff --git a/c/sedona-geos/src/lib.rs b/c/sedona-geos/src/lib.rs
index b27bf98..037d8e9 100644
--- a/c/sedona-geos/src/lib.rs
+++ b/c/sedona-geos/src/lib.rs
@@ -27,6 +27,7 @@ mod st_convexhull;
 mod st_dwithin;
 mod st_isring;
 mod st_isvalid;
+mod st_isvalidreason;
 mod st_length;
 mod st_perimeter;
 pub mod wkb_to_geos;
diff --git a/c/sedona-geos/src/register.rs b/c/sedona-geos/src/register.rs
index 377891d..9a819b2 100644
--- a/c/sedona-geos/src/register.rs
+++ b/c/sedona-geos/src/register.rs
@@ -19,7 +19,8 @@ use sedona_expr::scalar_udf::ScalarKernelRef;
 use crate::{
     distance::st_distance_impl, st_area::st_area_impl, 
st_buffer::st_buffer_impl,
     st_centroid::st_centroid_impl, st_convexhull::st_convex_hull_impl, 
st_dwithin::st_dwithin_impl,
-    st_isring::st_is_ring_impl, st_isvalid::st_is_valid_impl, 
st_length::st_length_impl,
+    st_isring::st_is_ring_impl, st_isvalid::st_is_valid_impl,
+    st_isvalidreason::st_is_valid_reason_impl, st_length::st_length_impl,
     st_perimeter::st_perimeter_impl,
 };
 
@@ -50,6 +51,8 @@ pub fn scalar_kernels() -> Vec<(&'static str, 
ScalarKernelRef)> {
         ("st_intersection", st_intersection_impl()),
         ("st_intersects", st_intersects_impl()),
         ("st_isring", st_is_ring_impl()),
+        ("st_isvalid", st_is_valid_impl()),
+        ("st_isvalidreason", st_is_valid_reason_impl()),
         ("st_length", st_length_impl()),
         ("st_overlaps", st_overlaps_impl()),
         ("st_perimeter", st_perimeter_impl()),
@@ -59,6 +62,5 @@ pub fn scalar_kernels() -> Vec<(&'static str, 
ScalarKernelRef)> {
         ("st_within", st_within_impl()),
         ("st_crosses", st_crosses_impl()),
         ("st_overlaps", st_overlaps_impl()),
-        ("st_isvalid", st_is_valid_impl()),
     ]
 }
diff --git a/c/sedona-geos/src/st_isvalidreason.rs 
b/c/sedona-geos/src/st_isvalidreason.rs
new file mode 100644
index 0000000..d032040
--- /dev/null
+++ b/c/sedona-geos/src/st_isvalidreason.rs
@@ -0,0 +1,134 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::sync::Arc;
+
+use arrow_array::builder::StringBuilder;
+use arrow_schema::DataType;
+use datafusion_common::{DataFusionError, Result};
+use geos::Geom;
+use sedona_expr::scalar_udf::{ScalarKernelRef, SedonaScalarKernel};
+use sedona_geometry::wkb_factory::WKB_MIN_PROBABLE_BYTES;
+use sedona_schema::{datatypes::SedonaType, matchers::ArgMatcher};
+
+use crate::executor::GeosExecutor;
+
+/// ST_IsValidReason() implementation using the geos crate
+pub fn st_is_valid_reason_impl() -> ScalarKernelRef {
+    Arc::new(STIsValidReason {})
+}
+
+#[derive(Debug)]
+struct STIsValidReason {}
+
+impl SedonaScalarKernel for STIsValidReason {
+    fn return_type(&self, args: &[SedonaType]) -> Result<Option<SedonaType>> {
+        let matcher = ArgMatcher::new(
+            vec![ArgMatcher::is_geometry()],
+            SedonaType::Arrow(DataType::Utf8),
+        );
+
+        matcher.match_args(args)
+    }
+
+    fn invoke_batch(
+        &self,
+        arg_types: &[SedonaType],
+        args: &[datafusion_expr::ColumnarValue],
+    ) -> Result<datafusion_expr::ColumnarValue> {
+        let executor = GeosExecutor::new(arg_types, args);
+        let mut builder = StringBuilder::with_capacity(
+            executor.num_iterations(),
+            WKB_MIN_PROBABLE_BYTES * executor.num_iterations(),
+        );
+        executor.execute_wkb_void(|maybe_wkb| {
+            match maybe_wkb {
+                Some(wkb) => {
+                    builder.append_value(invoke_scalar(&wkb)?);
+                }
+                _ => builder.append_null(),
+            }
+            Ok(())
+        })?;
+
+        executor.finish(Arc::new(builder.finish()))
+    }
+}
+
+fn invoke_scalar(geos_geom: &geos::Geometry) -> Result<String> {
+    geos_geom
+        .is_valid_reason()
+        .map_err(|e| DataFusionError::Execution(format!("Invalid Geometry: 
{e}")))
+}
+
+#[cfg(test)]
+mod tests {
+    use arrow_array::StringArray;
+    use arrow_schema::DataType;
+    use datafusion_common::ScalarValue;
+    use rstest::rstest;
+    use sedona_expr::scalar_udf::SedonaScalarUDF;
+    use sedona_schema::datatypes::{WKB_GEOMETRY, WKB_VIEW_GEOMETRY};
+    use sedona_testing::testers::ScalarUdfTester;
+
+    use super::*;
+
+    #[rstest]
+    fn udf(#[values(WKB_GEOMETRY, WKB_VIEW_GEOMETRY)] sedona_type: SedonaType) 
{
+        use arrow_array::Array;
+
+        let udf = SedonaScalarUDF::from_kernel("st_isvalidreason", 
st_is_valid_reason_impl());
+        let tester = ScalarUdfTester::new(udf.into(), vec![sedona_type]);
+        tester.assert_return_type(DataType::Utf8);
+
+        // Test with a valid geometry
+        let result = tester
+            .invoke_scalar("POLYGON ((0 0, 0 1, 1 1, 1 0, 0 0))")
+            .unwrap();
+        tester.assert_scalar_result_equals(result, "Valid Geometry");
+
+        // Test with an invalid geometry (self-intersection)
+        let result = tester
+            .invoke_scalar("POLYGON ((0 0, 1 1, 0 1, 1 0, 0 0))")
+            .unwrap();
+        if let ScalarValue::Utf8(Some(reason)) = result {
+            assert!(reason.starts_with("Self-intersection"));
+        } else {
+            panic!("Expected a reason string for invalid geometry");
+        }
+
+        let result = tester.invoke_scalar(ScalarValue::Null).unwrap();
+        assert!(result.is_null());
+
+        let input_wkt = vec![
+            None,
+            Some("POLYGON ((0 0, 0 1, 1 1, 1 0, 0 0))"),
+            Some("POLYGON ((0 0, 1 1, 0 1, 1 0, 0 0))"),
+            Some("LINESTRING (0 0, 1 1)"),
+            Some("Polygon((0 0, 2 0, 1 1, 2 2, 0 2, 1 1, 0 0))"),
+        ];
+
+        let result_array = tester.invoke_wkb_array(input_wkt).unwrap();
+        let result_array = 
result_array.as_any().downcast_ref::<StringArray>().unwrap();
+
+        assert!(result_array.is_null(0));
+        assert_eq!(result_array.value(1), "Valid Geometry");
+        assert!(result_array.value(2).starts_with("Self-intersection"));
+        assert_eq!(result_array.value(3), "Valid Geometry");
+        assert!(result_array.value(4).starts_with("Ring Self-intersection"),);
+    }
+}
diff --git a/python/sedonadb/tests/functions/test_functions.py 
b/python/sedonadb/tests/functions/test_functions.py
index dc2f284..67cb81c 100644
--- a/python/sedonadb/tests/functions/test_functions.py
+++ b/python/sedonadb/tests/functions/test_functions.py
@@ -1220,3 +1220,24 @@ def test_st_mmin(eng, geom, expected):
 def test_st_mmax(eng, geom, expected):
     eng = eng.create_or_skip()
     eng.assert_query_result(f"SELECT ST_MMax({geom_or_null(geom)})", expected)
+
+
[email protected]("eng", [SedonaDB, PostGIS])
[email protected](
+    ("geom", "expected"),
+    [
+        (None, None),
+        ("POINT (0 0)", "Valid Geometry"),
+        ("POLYGON ((0 0, 0 1, 1 1, 1 0, 0 0))", "Valid Geometry"),
+        ("POLYGON ((0 0, 1 1, 0 1, 1 0, 0 0))", "Self-intersection%"),
+        ("Polygon((0 0, 2 0, 1 1, 2 2, 0 2, 1 1, 0 0))", "Ring 
Self-intersection%"),
+    ],
+)
+def test_st_isvalidreason(eng, geom, expected):
+    eng = eng.create_or_skip()
+    if expected is not None and "%" in str(expected):
+        query = f"SELECT ST_IsValidReason({geom_or_null(geom)}) LIKE 
'{expected}'"
+        eng.assert_query_result(query, True)
+    else:
+        query = f"SELECT ST_IsValidReason({geom_or_null(geom)})"
+        eng.assert_query_result(query, expected)

Reply via email to