This is an automated email from the ASF dual-hosted git repository.

paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/sedona-db.git


The following commit(s) were added to refs/heads/main by this push:
     new e8f4c5e3 feat(c/sedona-geos): Implement ST_MakeValid() (#312)
e8f4c5e3 is described below

commit e8f4c5e338c61a52dd2994c11943f3f0b546536d
Author: Abeeujah <[email protected]>
AuthorDate: Mon Nov 17 04:29:53 2025 +0100

    feat(c/sedona-geos): Implement ST_MakeValid() (#312)
---
 c/sedona-geos/src/lib.rs                          |   1 +
 c/sedona-geos/src/register.rs                     |   2 +
 c/sedona-geos/src/st_makevalid.rs                 | 161 ++++++++++++++++++++++
 python/sedonadb/tests/functions/test_functions.py |  69 ++++++++++
 4 files changed, 233 insertions(+)

diff --git a/c/sedona-geos/src/lib.rs b/c/sedona-geos/src/lib.rs
index 94667123..29ba87d3 100644
--- a/c/sedona-geos/src/lib.rs
+++ b/c/sedona-geos/src/lib.rs
@@ -31,6 +31,7 @@ mod st_issimple;
 mod st_isvalid;
 mod st_isvalidreason;
 mod st_length;
+mod st_makevalid;
 mod st_perimeter;
 mod st_polygonize_agg;
 mod st_reverse;
diff --git a/c/sedona-geos/src/register.rs b/c/sedona-geos/src/register.rs
index e077c992..36c6cf6a 100644
--- a/c/sedona-geos/src/register.rs
+++ b/c/sedona-geos/src/register.rs
@@ -30,6 +30,7 @@ use crate::{
     st_isvalid::st_is_valid_impl,
     st_isvalidreason::st_is_valid_reason_impl,
     st_length::st_length_impl,
+    st_makevalid::st_make_valid_impl,
     st_perimeter::st_perimeter_impl,
     st_polygonize_agg::st_polygonize_agg_impl,
     st_reverse::st_reverse_impl,
@@ -72,6 +73,7 @@ pub fn scalar_kernels() -> Vec<(&'static str, 
ScalarKernelRef)> {
         ("st_isvalid", st_is_valid_impl()),
         ("st_isvalidreason", st_is_valid_reason_impl()),
         ("st_length", st_length_impl()),
+        ("st_makevalid", st_make_valid_impl()),
         ("st_overlaps", st_overlaps_impl()),
         ("st_perimeter", st_perimeter_impl()),
         ("st_reverse", st_reverse_impl()),
diff --git a/c/sedona-geos/src/st_makevalid.rs 
b/c/sedona-geos/src/st_makevalid.rs
new file mode 100644
index 00000000..74c63a2b
--- /dev/null
+++ b/c/sedona-geos/src/st_makevalid.rs
@@ -0,0 +1,161 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::sync::Arc;
+
+use arrow_array::builder::BinaryBuilder;
+use datafusion_common::{DataFusionError, Result};
+use datafusion_expr::ColumnarValue;
+use geos::Geom;
+use sedona_expr::scalar_udf::{ScalarKernelRef, SedonaScalarKernel};
+use sedona_geometry::wkb_factory::WKB_MIN_PROBABLE_BYTES;
+use sedona_schema::{
+    datatypes::{SedonaType, WKB_GEOMETRY},
+    matchers::ArgMatcher,
+};
+
+use crate::executor::GeosExecutor;
+
+/// ST_MakeValid() implementation using the geos crate
+pub fn st_make_valid_impl() -> ScalarKernelRef {
+    Arc::new(STMakeValid {})
+}
+
+#[derive(Debug)]
+struct STMakeValid {}
+
+impl SedonaScalarKernel for STMakeValid {
+    fn return_type(&self, args: &[SedonaType]) -> Result<Option<SedonaType>> {
+        let matcher = ArgMatcher::new(vec![ArgMatcher::is_geometry()], 
WKB_GEOMETRY);
+
+        matcher.match_args(args)
+    }
+
+    fn invoke_batch(
+        &self,
+        arg_types: &[SedonaType],
+        args: &[ColumnarValue],
+    ) -> Result<ColumnarValue> {
+        let executor = GeosExecutor::new(arg_types, args);
+        let mut builder = BinaryBuilder::with_capacity(
+            executor.num_iterations(),
+            WKB_MIN_PROBABLE_BYTES * executor.num_iterations(),
+        );
+
+        executor.execute_wkb_void(|maybe_wkb| {
+            match maybe_wkb {
+                Some(wkb) => {
+                    invoke_scalar(&wkb, &mut builder)?;
+                    builder.append_value([]);
+                }
+                _ => builder.append_null(),
+            }
+
+            Ok(())
+        })?;
+
+        executor.finish(Arc::new(builder.finish()))
+    }
+}
+
+fn invoke_scalar(geos_geom: &geos::Geometry, writer: &mut impl std::io::Write) 
-> Result<()> {
+    let geometry = geos_geom
+        .make_valid()
+        .map_err(|e| DataFusionError::Execution(format!("Failed to make 
geometry valid: {e}")))?;
+
+    let wkb = geometry
+        .to_wkb()
+        .map_err(|e| DataFusionError::Execution(format!("Failed to convert to 
wkb: {e}")))?;
+
+    writer.write_all(wkb.as_ref())?;
+    Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+    use rstest::rstest;
+    use sedona_expr::scalar_udf::SedonaScalarUDF;
+    use sedona_schema::datatypes::WKB_VIEW_GEOMETRY;
+    use sedona_testing::{create::create_array, testers::ScalarUdfTester};
+
+    use super::*;
+
+    #[rstest]
+    fn st_make_valid_udf(#[values(WKB_GEOMETRY, WKB_VIEW_GEOMETRY)] 
sedona_type: SedonaType) {
+        let udf = SedonaScalarUDF::from_kernel("st_makevalid", 
st_make_valid_impl());
+        let tester = ScalarUdfTester::new(udf.into(), 
vec![sedona_type.clone()]);
+        tester.assert_return_type(WKB_GEOMETRY);
+
+        let input_wkt = vec![
+            Some("POLYGON ((0 0, 0 1, 1 1, 1 0, 0 0))"), // Already valid 
polygon should remain unchanged
+            Some("POLYGON ((0 0, 2 2, 2 0, 0 2, 0 0))"), // Self-intersecting 
polygon (bowtie) should be fixed
+            Some("POLYGON ((0 0, 0 3, 3 3, 3 0, 0 0), (1 1, 1 2, 2 2, 2 1, 1 
1))"), // Polygon with incorrect ring orientation should be fixed
+            Some("POLYGON ((0 0, 0 1, 0 1, 1 1, 1 0, 0 0, 0 0))"), //Polygon 
with repeated points should be cleaned
+            Some("LINESTRING (0 0, 1 1, 2 2)"), // LineString that is already 
valid
+            Some("LINESTRING (0 0, 0 0, 1 1, 1 1, 2 2)"), // LineString with 
repeated points should be simplified
+            Some("MULTIPOLYGON (((0 0, 1 1, 1 0, 0 1, 0 0)), ((2 2, 3 3, 3 2, 
2 3, 2 2)))"), // MultiPolygon with invalid components
+            Some("POINT (1 1)"), // Point geometry (always valid)
+            Some("GEOMETRYCOLLECTION (POINT (1 1), POLYGON ((0 0, 2 2, 2 0, 0 
2, 0 0)))"), // GeometryCollection with mixed valid/invalid geometries
+            Some("POLYGON EMPTY"), // Empty geometry
+            Some("POLYGON ((0 0, 3 0, 3 3, 2 1, 1 3, 0 3, 0 0))"), // Polygon 
with spike (almost self-intersecting)
+        ];
+
+        let expected = create_array(&[
+            Some("POLYGON ((0 0, 0 1, 1 1, 1 0, 0 0))"),
+            Some("MULTIPOLYGON(((0 2,1 1,0 0,0 2)),((2 0,1 1,2 2,2 0)))"),
+            Some("POLYGON((0 0,0 3,3 3,3 0,0 0),(1 1,1 2,2 2,2 1,1 1))"),
+            Some("POLYGON((0 0,0 1,0 1,1 1,1 0,0 0,0 0))"),
+            Some("LINESTRING (0 0, 1 1, 2 2)"),
+            Some("LINESTRING(0 0,0 0,1 1,1 1,2 2)"),
+            Some("MULTIPOLYGON(((0.5 0.5,0 0,0 1,0.5 0.5)),((0.5 0.5,1 1,1 
0,0.5 0.5)),((2.5 2.5,2 2,2 3,2.5 2.5)),((2.5 2.5,3 3,3 2,2.5 2.5)))"),
+            Some("POINT (1 1)"),
+            Some("GEOMETRYCOLLECTION(POINT(1 1),MULTIPOLYGON(((0 2,1 1,0 0,0 
2)),((2 0,1 1,2 2,2 0))))"),
+            Some("POLYGON EMPTY"),
+            Some("POLYGON((0 0,3 0,3 3,2 1,1 3,0 3,0 0))"),
+        ], &WKB_GEOMETRY);
+
+        assert_eq!(&tester.invoke_wkb_array(input_wkt).unwrap(), &expected);
+    }
+
+    #[rstest]
+    fn st_make_valid_edge_cases(#[values(WKB_GEOMETRY)] sedona_type: 
SedonaType) {
+        let udf = SedonaScalarUDF::from_kernel("st_makevalid", 
st_make_valid_impl());
+        let tester = ScalarUdfTester::new(udf.into(), 
vec![sedona_type.clone()]);
+
+        // Test with very close points (floating point precision issues)
+        let result = tester
+            .invoke_scalar("POLYGON ((0 0, 0 1, 1 1, 1 0, 0.0000000001 
0.0000000001, 0 0))")
+            .unwrap();
+        tester.assert_scalar_result_equals(result, "POLYGON((0 0,0 1,1 1,1 
0,1e-10 1e-10,0 0))");
+
+        // Test with degenerate polygon (collinear points)
+        let result = tester
+            .invoke_scalar("POLYGON ((0 0, 1 1, 2 2, 3 3, 0 0))")
+            .unwrap();
+        tester
+            .assert_scalar_result_equals(result, "MULTILINESTRING((0 0,1 1),(1 
1,2 2),(2 2,3 3))");
+
+        // Test with polygon that has a tiny hole
+        let result = tester
+            .invoke_scalar("POLYGON ((0 0, 0 10, 10 10, 10 0, 0 0), (5 5, 5 
5.0001, 5.0001 5.0001, 5.0001 5, 5 5))")
+            .unwrap();
+        tester.assert_scalar_result_equals(
+            result,
+            "POLYGON((0 0,0 10,10 10,10 0,0 0),(5 5,5 5.0001,5.0001 
5.0001,5.0001 5,5 5))",
+        );
+    }
+}
diff --git a/python/sedonadb/tests/functions/test_functions.py 
b/python/sedonadb/tests/functions/test_functions.py
index bb1494c2..8cb1847a 100644
--- a/python/sedonadb/tests/functions/test_functions.py
+++ b/python/sedonadb/tests/functions/test_functions.py
@@ -2023,6 +2023,75 @@ def test_st_mmax(eng, geom, expected):
     eng.assert_query_result(f"SELECT ST_MMax({geom_or_null(geom)})", expected)
 
 
[email protected]("eng", [SedonaDB, PostGIS])
[email protected](
+    ("geom", "expected"),
+    [
+        # Already valid polygon should remain unchanged
+        (
+            "POLYGON ((0 0, 0 1, 1 1, 1 0, 0 0))",
+            "POLYGON ((0 0, 0 1, 1 1, 1 0, 0 0))",
+        ),
+        # Self-intersecting polygon (bowtie) should be fixed
+        (
+            "POLYGON ((0 0, 2 2, 2 0, 0 2, 0 0))",
+            "MULTIPOLYGON (((0 2, 1 1, 0 0, 0 2)), ((2 0, 1 1, 2 2, 2 0)))",
+        ),
+        # Polygon with incorrect ring orientation should be fixed
+        (
+            "POLYGON ((0 0, 0 3, 3 3, 3 0, 0 0), (1 1, 1 2, 2 2, 2 1, 1 1))",
+            "POLYGON ((0 0, 0 3, 3 3, 3 0, 0 0), (1 1, 1 2, 2 2, 2 1, 1 1))",
+        ),
+        # Polygon with repeated points should be cleaned (Note: JTS 'make 
valid' might preserve the duplicate points in WKT but the effective geometry is 
valid)
+        (
+            "POLYGON ((0 0, 0 1, 0 1, 1 1, 1 0, 0 0, 0 0))",
+            "POLYGON ((0 0, 0 1, 0 1, 1 1, 1 0, 0 0, 0 0))",
+        ),
+        # LineString that is already valid
+        (
+            "LINESTRING (0 0, 1 1, 2 2)",
+            "LINESTRING (0 0, 1 1, 2 2)",
+        ),
+        # LineString with repeated points should be simplified
+        (
+            "LINESTRING (0 0, 0 0, 1 1, 1 1, 2 2)",
+            "LINESTRING (0 0, 0 0, 1 1, 1 1, 2 2)",
+        ),
+        # MultiPolygon with invalid components (two bowtie polygons)
+        (
+            "MULTIPOLYGON (((0 0, 1 1, 1 0, 0 1, 0 0)), ((2 2, 3 3, 3 2, 2 3, 
2 2)))",
+            "MULTIPOLYGON (((0.5 0.5, 0 0, 0 1, 0.5 0.5)), ((0.5 0.5, 1 1, 1 
0, 0.5 0.5)), ((2.5 2.5, 2 2, 2 3, 2.5 2.5)), ((2.5 2.5, 3 3, 3 2, 2.5 2.5)))",
+        ),
+        # Point geometry (always valid)
+        (
+            "POINT (1 1)",
+            "POINT (1 1)",
+        ),
+        # GeometryCollection with mixed valid/invalid geometries
+        (
+            "GEOMETRYCOLLECTION (POINT (1 1), POLYGON ((0 0, 2 2, 2 0, 0 2, 0 
0)))",
+            "GEOMETRYCOLLECTION (POINT (1 1), MULTIPOLYGON (((0 2, 1 1, 0 0, 0 
2)), ((2 0, 1 1, 2 2, 2 0))))",
+        ),
+        # Empty geometry
+        (
+            "POLYGON EMPTY",
+            "POLYGON EMPTY",
+        ),
+        # Polygon with spike (almost self-intersecting)
+        (
+            "POLYGON ((0 0, 3 0, 3 3, 2 1, 1 3, 0 3, 0 0))",
+            "POLYGON ((0 0, 3 0, 3 3, 2 1, 1 3, 0 3, 0 0))",
+        ),
+    ],
+)
+def test_st_makevalid(eng, geom, expected):
+    eng = eng.create_or_skip()
+    eng.assert_query_result(
+        f"SELECT ST_MakeValid({geom_or_null(geom)})",
+        expected,
+    )
+
+
 @pytest.mark.parametrize("eng", [SedonaDB, PostGIS])
 @pytest.mark.parametrize(
     ("geom", "expected"),

Reply via email to