This is an automated email from the ASF dual-hosted git repository.
paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/sedona-db.git
The following commit(s) were added to refs/heads/main by this push:
new 6ba858b feat(c/sedona-geos): Implement ST_IsValid using geos library
(#229)
6ba858b is described below
commit 6ba858bb6ac1c28b6cb40a6e4eb4607bfcff8a67
Author: Abeeujah <[email protected]>
AuthorDate: Mon Oct 20 02:28:30 2025 +0100
feat(c/sedona-geos): Implement ST_IsValid using geos library (#229)
---
c/sedona-geos/benches/geos-functions.rs | 2 +
c/sedona-geos/src/lib.rs | 1 +
c/sedona-geos/src/register.rs | 2 +
c/sedona-geos/src/st_isvalid.rs | 125 ++++++++++++++++++++++
python/sedonadb/tests/functions/test_functions.py | 57 ++++++++++
5 files changed, 187 insertions(+)
diff --git a/c/sedona-geos/benches/geos-functions.rs
b/c/sedona-geos/benches/geos-functions.rs
index 34382bd..d9e6ae0 100644
--- a/c/sedona-geos/benches/geos-functions.rs
+++ b/c/sedona-geos/benches/geos-functions.rs
@@ -295,6 +295,8 @@ fn criterion_benchmark(c: &mut Criterion) {
"st_overlaps",
ArrayScalar(Polygon(10), Polygon(500)),
);
+ benchmark::scalar(c, &f, "geos", "st_isvalid", Polygon(10));
+ benchmark::scalar(c, &f, "geos", "st_isvalid", Polygon(500));
}
criterion_group!(benches, criterion_benchmark);
diff --git a/c/sedona-geos/src/lib.rs b/c/sedona-geos/src/lib.rs
index 74d57b9..4314c4b 100644
--- a/c/sedona-geos/src/lib.rs
+++ b/c/sedona-geos/src/lib.rs
@@ -25,6 +25,7 @@ mod st_buffer;
mod st_centroid;
mod st_convexhull;
mod st_dwithin;
+mod st_isvalid;
mod st_length;
mod st_perimeter;
pub mod wkb_to_geos;
diff --git a/c/sedona-geos/src/register.rs b/c/sedona-geos/src/register.rs
index f349f7e..6025b46 100644
--- a/c/sedona-geos/src/register.rs
+++ b/c/sedona-geos/src/register.rs
@@ -17,6 +17,7 @@
use sedona_expr::scalar_udf::ScalarKernelRef;
use crate::st_convexhull::st_convex_hull_impl;
+use crate::st_isvalid::st_is_valid_impl;
use crate::{
distance::st_distance_impl, st_area::st_area_impl,
st_buffer::st_buffer_impl,
st_centroid::st_centroid_impl, st_dwithin::st_dwithin_impl,
st_length::st_length_impl,
@@ -56,5 +57,6 @@ pub fn scalar_kernels() -> Vec<(&'static str,
ScalarKernelRef)> {
("st_within", st_within_impl()),
("st_crosses", st_crosses_impl()),
("st_overlaps", st_overlaps_impl()),
+ ("st_isvalid", st_is_valid_impl()),
]
}
diff --git a/c/sedona-geos/src/st_isvalid.rs b/c/sedona-geos/src/st_isvalid.rs
new file mode 100644
index 0000000..3756a10
--- /dev/null
+++ b/c/sedona-geos/src/st_isvalid.rs
@@ -0,0 +1,125 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+use std::sync::Arc;
+
+use arrow_array::builder::BooleanBuilder;
+use arrow_schema::DataType;
+use datafusion_common::Result;
+use datafusion_expr::ColumnarValue;
+use geos::Geom;
+use sedona_expr::scalar_udf::{ScalarKernelRef, SedonaScalarKernel};
+use sedona_schema::{datatypes::SedonaType, matchers::ArgMatcher};
+
+use crate::executor::GeosExecutor;
+
+/// ST_IsValid() implementation using the geos crate
+pub fn st_is_valid_impl() -> ScalarKernelRef {
+ Arc::new(STIsValid {})
+}
+
+#[derive(Debug)]
+struct STIsValid {}
+
+impl SedonaScalarKernel for STIsValid {
+ fn return_type(&self, args: &[SedonaType]) -> Result<Option<SedonaType>> {
+ let matcher = ArgMatcher::new(
+ vec![ArgMatcher::is_geometry()],
+ SedonaType::Arrow(DataType::Boolean),
+ );
+
+ matcher.match_args(args)
+ }
+
+ fn invoke_batch(
+ &self,
+ arg_types: &[SedonaType],
+ args: &[ColumnarValue],
+ ) -> Result<ColumnarValue> {
+ let executor = GeosExecutor::new(arg_types, args);
+ let mut builder =
BooleanBuilder::with_capacity(executor.num_iterations());
+ executor.execute_wkb_void(|maybe_wkb| {
+ match maybe_wkb {
+ Some(wkb) => {
+ builder.append_value(invoke_scalar(&wkb));
+ }
+ _ => builder.append_null(),
+ }
+
+ Ok(())
+ })?;
+
+ executor.finish(Arc::new(builder.finish()))
+ }
+}
+
+fn invoke_scalar(geos_geom: &geos::Geometry) -> bool {
+ geos_geom.is_valid()
+}
+
+#[cfg(test)]
+mod tests {
+ use std::sync::Arc;
+
+ use arrow_array::{ArrayRef, BooleanArray};
+ use arrow_schema::DataType;
+ use datafusion_common::ScalarValue;
+ use rstest::rstest;
+ use sedona_expr::scalar_udf::SedonaScalarUDF;
+ use sedona_schema::datatypes::{WKB_GEOMETRY, WKB_VIEW_GEOMETRY};
+ use sedona_testing::testers::ScalarUdfTester;
+
+ use super::*;
+
+ #[rstest]
+ fn udf(#[values(WKB_GEOMETRY, WKB_VIEW_GEOMETRY)] sedona_type: SedonaType)
{
+ let udf = SedonaScalarUDF::from_kernel("st_isvalid",
st_is_valid_impl());
+ let tester = ScalarUdfTester::new(udf.into(), vec![sedona_type]);
+ tester.assert_return_type(DataType::Boolean);
+
+ // Valid polygon
+ let result = tester
+ .invoke_scalar("POLYGON ((0 0, 0 1, 1 1, 1 0, 0 0))")
+ .unwrap();
+ tester.assert_scalar_result_equals(result, true);
+
+ // Invalid polygon (self-intersecting)
+ let result = tester
+ .invoke_scalar("POLYGON ((0 0, 1 1, 0 1, 1 0, 0 0))")
+ .unwrap();
+ tester.assert_scalar_result_equals(result, false);
+
+ let result = tester.invoke_scalar(ScalarValue::Null).unwrap();
+ assert!(result.is_null());
+
+ let input_wkt = vec![
+ None,
+ Some("POLYGON ((0 0, 0 1, 1 1, 1 0, 0 0))"),
+ Some("POLYGON ((0 0, 1 1, 0 1, 1 0, 0 0))"),
+ Some("LINESTRING (0 0, 1 1)"),
+ Some("Polygon((0 0, 2 0, 1 1, 2 2, 0 2, 1 1, 0 0))"),
+ ];
+
+ let expected: ArrayRef = Arc::new(BooleanArray::from(vec![
+ None,
+ Some(true),
+ Some(false),
+ Some(true),
+ Some(false),
+ ]));
+ assert_eq!(&tester.invoke_wkb_array(input_wkt).unwrap(), &expected);
+ }
+}
diff --git a/python/sedonadb/tests/functions/test_functions.py
b/python/sedonadb/tests/functions/test_functions.py
index 5ddd5f9..e638134 100644
--- a/python/sedonadb/tests/functions/test_functions.py
+++ b/python/sedonadb/tests/functions/test_functions.py
@@ -211,6 +211,63 @@ def test_st_centroid(eng, geom, expected):
eng.assert_query_result(f"SELECT ST_Centroid({geom_or_null(geom)})",
expected)
[email protected]("eng", [SedonaDB, PostGIS])
[email protected](
+ ("geom", "expected"),
+ [
+ (None, None),
+ ("POINT (0 0)", True),
+ ("POINT EMPTY", True),
+ ("LINESTRING (0 0, 1 1)", True),
+ ("LINESTRING (0 0, 1 1, 1 0, 0 1)", True),
+ (
+ "MULTILINESTRING ((0 0, 1 1), (0 0, 1 1, 1 0, 0 1))",
+ True,
+ ),
+ ("LINESTRING EMPTY", True),
+ # Invalid LineStrings
+ ("LINESTRING (0 0, 0 0)", False), # Degenerate - both points identical
+ ("LINESTRING (0 0, 0 0, 0 0)", False), # All points identical
+ # Invalid MultiLineStrings
+ ("MULTILINESTRING ((0 0, 0 0), (1 1, 2 2))", False), # Degenerate
component
+ (
+ "MULTILINESTRING ((0 0, 0 0), (1 1, 1 1))",
+ False,
+ ), # Multiple degenerate components
+ ("POLYGON ((0 0, 0 1, 1 1, 1 0, 0 0))", True),
+ ("POLYGON EMPTY", True),
+ ("POLYGON ((0 0, 10 0, 10 10, 0 10, 0 0), (1 1, 1 2, 2 2, 2 1, 1 1))",
True),
+ # Invalid Polygons
+ # Self-intersecting polygon (bowtie)
+ ("POLYGON ((0 0, 1 1, 0 1, 1 0, 0 0))", False),
+ # Inner ring shares an edge with the outer ring
+ ("POLYGON ((0 0, 10 0, 10 10, 0 10, 0 0), (0 0, 0 1, 1 1, 1 0, 0 0))",
False),
+ # Self-intersecting polygon (figure-8)
+ ("Polygon((0 0, 2 0, 1 1, 2 2, 0 2, 1 1, 0 0))", False),
+ # Inner ring touches the outer ring at a point
+ (
+ "POLYGON ((0 0, 10 0, 10 10, 0 10, 0 0), (1 10, 1 9, 2 9, 2 10, 1
10))",
+ False,
+ ),
+ # Overlapping polygons in a multipolygon
+ (
+ "MULTIPOLYGON (((0 0, 2 0, 2 2, 0 2, 0 0)), ((1 1, 3 1, 3 3, 1 3,
1 1)))",
+ False,
+ ),
+ (
+ "MULTIPOLYGON (((0 0, 1 0, 1 1, 0 1, 0 0)), ((2 2, 3 2, 3 3, 2 3,
2 2)))",
+ True,
+ ),
+ # Geometry collection with an invalid polygon
+ ("GEOMETRYCOLLECTION (POLYGON ((0 0, 1 1, 0 1, 1 0, 0 0)))", False),
+ ("GEOMETRYCOLLECTION (POLYGON ((0 0, 0 1, 1 1, 1 0, 0 0)))", True),
+ ],
+)
+def test_st_isvalid(eng, geom, expected):
+ eng = eng.create_or_skip()
+ eng.assert_query_result(f"SELECT ST_IsValid({geom_or_null(geom)})",
expected)
+
+
@pytest.mark.parametrize("eng", [SedonaDB, PostGIS])
@pytest.mark.parametrize(
("geom", "expected"),