This is an automated email from the ASF dual-hosted git repository.

paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/sedona-db.git


The following commit(s) were added to refs/heads/main by this push:
     new 6ba858b  feat(c/sedona-geos): Implement ST_IsValid using geos library 
(#229)
6ba858b is described below

commit 6ba858bb6ac1c28b6cb40a6e4eb4607bfcff8a67
Author: Abeeujah <[email protected]>
AuthorDate: Mon Oct 20 02:28:30 2025 +0100

    feat(c/sedona-geos): Implement ST_IsValid using geos library (#229)
---
 c/sedona-geos/benches/geos-functions.rs           |   2 +
 c/sedona-geos/src/lib.rs                          |   1 +
 c/sedona-geos/src/register.rs                     |   2 +
 c/sedona-geos/src/st_isvalid.rs                   | 125 ++++++++++++++++++++++
 python/sedonadb/tests/functions/test_functions.py |  57 ++++++++++
 5 files changed, 187 insertions(+)

diff --git a/c/sedona-geos/benches/geos-functions.rs 
b/c/sedona-geos/benches/geos-functions.rs
index 34382bd..d9e6ae0 100644
--- a/c/sedona-geos/benches/geos-functions.rs
+++ b/c/sedona-geos/benches/geos-functions.rs
@@ -295,6 +295,8 @@ fn criterion_benchmark(c: &mut Criterion) {
         "st_overlaps",
         ArrayScalar(Polygon(10), Polygon(500)),
     );
+    benchmark::scalar(c, &f, "geos", "st_isvalid", Polygon(10));
+    benchmark::scalar(c, &f, "geos", "st_isvalid", Polygon(500));
 }
 
 criterion_group!(benches, criterion_benchmark);
diff --git a/c/sedona-geos/src/lib.rs b/c/sedona-geos/src/lib.rs
index 74d57b9..4314c4b 100644
--- a/c/sedona-geos/src/lib.rs
+++ b/c/sedona-geos/src/lib.rs
@@ -25,6 +25,7 @@ mod st_buffer;
 mod st_centroid;
 mod st_convexhull;
 mod st_dwithin;
+mod st_isvalid;
 mod st_length;
 mod st_perimeter;
 pub mod wkb_to_geos;
diff --git a/c/sedona-geos/src/register.rs b/c/sedona-geos/src/register.rs
index f349f7e..6025b46 100644
--- a/c/sedona-geos/src/register.rs
+++ b/c/sedona-geos/src/register.rs
@@ -17,6 +17,7 @@
 use sedona_expr::scalar_udf::ScalarKernelRef;
 
 use crate::st_convexhull::st_convex_hull_impl;
+use crate::st_isvalid::st_is_valid_impl;
 use crate::{
     distance::st_distance_impl, st_area::st_area_impl, 
st_buffer::st_buffer_impl,
     st_centroid::st_centroid_impl, st_dwithin::st_dwithin_impl, 
st_length::st_length_impl,
@@ -56,5 +57,6 @@ pub fn scalar_kernels() -> Vec<(&'static str, 
ScalarKernelRef)> {
         ("st_within", st_within_impl()),
         ("st_crosses", st_crosses_impl()),
         ("st_overlaps", st_overlaps_impl()),
+        ("st_isvalid", st_is_valid_impl()),
     ]
 }
diff --git a/c/sedona-geos/src/st_isvalid.rs b/c/sedona-geos/src/st_isvalid.rs
new file mode 100644
index 0000000..3756a10
--- /dev/null
+++ b/c/sedona-geos/src/st_isvalid.rs
@@ -0,0 +1,125 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+use std::sync::Arc;
+
+use arrow_array::builder::BooleanBuilder;
+use arrow_schema::DataType;
+use datafusion_common::Result;
+use datafusion_expr::ColumnarValue;
+use geos::Geom;
+use sedona_expr::scalar_udf::{ScalarKernelRef, SedonaScalarKernel};
+use sedona_schema::{datatypes::SedonaType, matchers::ArgMatcher};
+
+use crate::executor::GeosExecutor;
+
+/// ST_IsValid() implementation using the geos crate
+pub fn st_is_valid_impl() -> ScalarKernelRef {
+    Arc::new(STIsValid {})
+}
+
+#[derive(Debug)]
+struct STIsValid {}
+
+impl SedonaScalarKernel for STIsValid {
+    fn return_type(&self, args: &[SedonaType]) -> Result<Option<SedonaType>> {
+        let matcher = ArgMatcher::new(
+            vec![ArgMatcher::is_geometry()],
+            SedonaType::Arrow(DataType::Boolean),
+        );
+
+        matcher.match_args(args)
+    }
+
+    fn invoke_batch(
+        &self,
+        arg_types: &[SedonaType],
+        args: &[ColumnarValue],
+    ) -> Result<ColumnarValue> {
+        let executor = GeosExecutor::new(arg_types, args);
+        let mut builder = 
BooleanBuilder::with_capacity(executor.num_iterations());
+        executor.execute_wkb_void(|maybe_wkb| {
+            match maybe_wkb {
+                Some(wkb) => {
+                    builder.append_value(invoke_scalar(&wkb));
+                }
+                _ => builder.append_null(),
+            }
+
+            Ok(())
+        })?;
+
+        executor.finish(Arc::new(builder.finish()))
+    }
+}
+
+fn invoke_scalar(geos_geom: &geos::Geometry) -> bool {
+    geos_geom.is_valid()
+}
+
+#[cfg(test)]
+mod tests {
+    use std::sync::Arc;
+
+    use arrow_array::{ArrayRef, BooleanArray};
+    use arrow_schema::DataType;
+    use datafusion_common::ScalarValue;
+    use rstest::rstest;
+    use sedona_expr::scalar_udf::SedonaScalarUDF;
+    use sedona_schema::datatypes::{WKB_GEOMETRY, WKB_VIEW_GEOMETRY};
+    use sedona_testing::testers::ScalarUdfTester;
+
+    use super::*;
+
+    #[rstest]
+    fn udf(#[values(WKB_GEOMETRY, WKB_VIEW_GEOMETRY)] sedona_type: SedonaType) 
{
+        let udf = SedonaScalarUDF::from_kernel("st_isvalid", 
st_is_valid_impl());
+        let tester = ScalarUdfTester::new(udf.into(), vec![sedona_type]);
+        tester.assert_return_type(DataType::Boolean);
+
+        // Valid polygon
+        let result = tester
+            .invoke_scalar("POLYGON ((0 0, 0 1, 1 1, 1 0, 0 0))")
+            .unwrap();
+        tester.assert_scalar_result_equals(result, true);
+
+        // Invalid polygon (self-intersecting)
+        let result = tester
+            .invoke_scalar("POLYGON ((0 0, 1 1, 0 1, 1 0, 0 0))")
+            .unwrap();
+        tester.assert_scalar_result_equals(result, false);
+
+        let result = tester.invoke_scalar(ScalarValue::Null).unwrap();
+        assert!(result.is_null());
+
+        let input_wkt = vec![
+            None,
+            Some("POLYGON ((0 0, 0 1, 1 1, 1 0, 0 0))"),
+            Some("POLYGON ((0 0, 1 1, 0 1, 1 0, 0 0))"),
+            Some("LINESTRING (0 0, 1 1)"),
+            Some("Polygon((0 0, 2 0, 1 1, 2 2, 0 2, 1 1, 0 0))"),
+        ];
+
+        let expected: ArrayRef = Arc::new(BooleanArray::from(vec![
+            None,
+            Some(true),
+            Some(false),
+            Some(true),
+            Some(false),
+        ]));
+        assert_eq!(&tester.invoke_wkb_array(input_wkt).unwrap(), &expected);
+    }
+}
diff --git a/python/sedonadb/tests/functions/test_functions.py 
b/python/sedonadb/tests/functions/test_functions.py
index 5ddd5f9..e638134 100644
--- a/python/sedonadb/tests/functions/test_functions.py
+++ b/python/sedonadb/tests/functions/test_functions.py
@@ -211,6 +211,63 @@ def test_st_centroid(eng, geom, expected):
     eng.assert_query_result(f"SELECT ST_Centroid({geom_or_null(geom)})", 
expected)
 
 
[email protected]("eng", [SedonaDB, PostGIS])
[email protected](
+    ("geom", "expected"),
+    [
+        (None, None),
+        ("POINT (0 0)", True),
+        ("POINT EMPTY", True),
+        ("LINESTRING (0 0, 1 1)", True),
+        ("LINESTRING (0 0, 1 1, 1 0, 0 1)", True),
+        (
+            "MULTILINESTRING ((0 0, 1 1), (0 0, 1 1, 1 0, 0 1))",
+            True,
+        ),
+        ("LINESTRING EMPTY", True),
+        # Invalid LineStrings
+        ("LINESTRING (0 0, 0 0)", False),  # Degenerate - both points identical
+        ("LINESTRING (0 0, 0 0, 0 0)", False),  # All points identical
+        # Invalid MultiLineStrings
+        ("MULTILINESTRING ((0 0, 0 0), (1 1, 2 2))", False),  # Degenerate 
component
+        (
+            "MULTILINESTRING ((0 0, 0 0), (1 1, 1 1))",
+            False,
+        ),  # Multiple degenerate components
+        ("POLYGON ((0 0, 0 1, 1 1, 1 0, 0 0))", True),
+        ("POLYGON EMPTY", True),
+        ("POLYGON ((0 0, 10 0, 10 10, 0 10, 0 0), (1 1, 1 2, 2 2, 2 1, 1 1))", 
True),
+        # Invalid Polygons
+        # Self-intersecting polygon (bowtie)
+        ("POLYGON ((0 0, 1 1, 0 1, 1 0, 0 0))", False),
+        # Inner ring shares an edge with the outer ring
+        ("POLYGON ((0 0, 10 0, 10 10, 0 10, 0 0), (0 0, 0 1, 1 1, 1 0, 0 0))", 
False),
+        # Self-intersecting polygon (figure-8)
+        ("Polygon((0 0, 2 0, 1 1, 2 2, 0 2, 1 1, 0 0))", False),
+        # Inner ring touches the outer ring at a point
+        (
+            "POLYGON ((0 0, 10 0, 10 10, 0 10, 0 0), (1 10, 1 9, 2 9, 2 10, 1 
10))",
+            False,
+        ),
+        # Overlapping polygons in a multipolygon
+        (
+            "MULTIPOLYGON (((0 0, 2 0, 2 2, 0 2, 0 0)), ((1 1, 3 1, 3 3, 1 3, 
1 1)))",
+            False,
+        ),
+        (
+            "MULTIPOLYGON (((0 0, 1 0, 1 1, 0 1, 0 0)), ((2 2, 3 2, 3 3, 2 3, 
2 2)))",
+            True,
+        ),
+        # Geometry collection with an invalid polygon
+        ("GEOMETRYCOLLECTION (POLYGON ((0 0, 1 1, 0 1, 1 0, 0 0)))", False),
+        ("GEOMETRYCOLLECTION (POLYGON ((0 0, 0 1, 1 1, 1 0, 0 0)))", True),
+    ],
+)
+def test_st_isvalid(eng, geom, expected):
+    eng = eng.create_or_skip()
+    eng.assert_query_result(f"SELECT ST_IsValid({geom_or_null(geom)})", 
expected)
+
+
 @pytest.mark.parametrize("eng", [SedonaDB, PostGIS])
 @pytest.mark.parametrize(
     ("geom", "expected"),

Reply via email to