This is an automated email from the ASF dual-hosted git repository.
paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/sedona-db.git
The following commit(s) were added to refs/heads/main by this push:
new e8f4c5e3 feat(c/sedona-geos): Implement ST_MakeValid() (#312)
e8f4c5e3 is described below
commit e8f4c5e338c61a52dd2994c11943f3f0b546536d
Author: Abeeujah <[email protected]>
AuthorDate: Mon Nov 17 04:29:53 2025 +0100
feat(c/sedona-geos): Implement ST_MakeValid() (#312)
---
c/sedona-geos/src/lib.rs | 1 +
c/sedona-geos/src/register.rs | 2 +
c/sedona-geos/src/st_makevalid.rs | 161 ++++++++++++++++++++++
python/sedonadb/tests/functions/test_functions.py | 69 ++++++++++
4 files changed, 233 insertions(+)
diff --git a/c/sedona-geos/src/lib.rs b/c/sedona-geos/src/lib.rs
index 94667123..29ba87d3 100644
--- a/c/sedona-geos/src/lib.rs
+++ b/c/sedona-geos/src/lib.rs
@@ -31,6 +31,7 @@ mod st_issimple;
mod st_isvalid;
mod st_isvalidreason;
mod st_length;
+mod st_makevalid;
mod st_perimeter;
mod st_polygonize_agg;
mod st_reverse;
diff --git a/c/sedona-geos/src/register.rs b/c/sedona-geos/src/register.rs
index e077c992..36c6cf6a 100644
--- a/c/sedona-geos/src/register.rs
+++ b/c/sedona-geos/src/register.rs
@@ -30,6 +30,7 @@ use crate::{
st_isvalid::st_is_valid_impl,
st_isvalidreason::st_is_valid_reason_impl,
st_length::st_length_impl,
+ st_makevalid::st_make_valid_impl,
st_perimeter::st_perimeter_impl,
st_polygonize_agg::st_polygonize_agg_impl,
st_reverse::st_reverse_impl,
@@ -72,6 +73,7 @@ pub fn scalar_kernels() -> Vec<(&'static str,
ScalarKernelRef)> {
("st_isvalid", st_is_valid_impl()),
("st_isvalidreason", st_is_valid_reason_impl()),
("st_length", st_length_impl()),
+ ("st_makevalid", st_make_valid_impl()),
("st_overlaps", st_overlaps_impl()),
("st_perimeter", st_perimeter_impl()),
("st_reverse", st_reverse_impl()),
diff --git a/c/sedona-geos/src/st_makevalid.rs
b/c/sedona-geos/src/st_makevalid.rs
new file mode 100644
index 00000000..74c63a2b
--- /dev/null
+++ b/c/sedona-geos/src/st_makevalid.rs
@@ -0,0 +1,161 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::sync::Arc;
+
+use arrow_array::builder::BinaryBuilder;
+use datafusion_common::{DataFusionError, Result};
+use datafusion_expr::ColumnarValue;
+use geos::Geom;
+use sedona_expr::scalar_udf::{ScalarKernelRef, SedonaScalarKernel};
+use sedona_geometry::wkb_factory::WKB_MIN_PROBABLE_BYTES;
+use sedona_schema::{
+ datatypes::{SedonaType, WKB_GEOMETRY},
+ matchers::ArgMatcher,
+};
+
+use crate::executor::GeosExecutor;
+
+/// ST_MakeValid() implementation using the geos crate
+pub fn st_make_valid_impl() -> ScalarKernelRef {
+ Arc::new(STMakeValid {})
+}
+
+#[derive(Debug)]
+struct STMakeValid {}
+
+impl SedonaScalarKernel for STMakeValid {
+ fn return_type(&self, args: &[SedonaType]) -> Result<Option<SedonaType>> {
+ let matcher = ArgMatcher::new(vec![ArgMatcher::is_geometry()],
WKB_GEOMETRY);
+
+ matcher.match_args(args)
+ }
+
+ fn invoke_batch(
+ &self,
+ arg_types: &[SedonaType],
+ args: &[ColumnarValue],
+ ) -> Result<ColumnarValue> {
+ let executor = GeosExecutor::new(arg_types, args);
+ let mut builder = BinaryBuilder::with_capacity(
+ executor.num_iterations(),
+ WKB_MIN_PROBABLE_BYTES * executor.num_iterations(),
+ );
+
+ executor.execute_wkb_void(|maybe_wkb| {
+ match maybe_wkb {
+ Some(wkb) => {
+ invoke_scalar(&wkb, &mut builder)?;
+ builder.append_value([]);
+ }
+ _ => builder.append_null(),
+ }
+
+ Ok(())
+ })?;
+
+ executor.finish(Arc::new(builder.finish()))
+ }
+}
+
+fn invoke_scalar(geos_geom: &geos::Geometry, writer: &mut impl std::io::Write)
-> Result<()> {
+ let geometry = geos_geom
+ .make_valid()
+ .map_err(|e| DataFusionError::Execution(format!("Failed to make
geometry valid: {e}")))?;
+
+ let wkb = geometry
+ .to_wkb()
+ .map_err(|e| DataFusionError::Execution(format!("Failed to convert to
wkb: {e}")))?;
+
+ writer.write_all(wkb.as_ref())?;
+ Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+ use rstest::rstest;
+ use sedona_expr::scalar_udf::SedonaScalarUDF;
+ use sedona_schema::datatypes::WKB_VIEW_GEOMETRY;
+ use sedona_testing::{create::create_array, testers::ScalarUdfTester};
+
+ use super::*;
+
+ #[rstest]
+ fn st_make_valid_udf(#[values(WKB_GEOMETRY, WKB_VIEW_GEOMETRY)]
sedona_type: SedonaType) {
+ let udf = SedonaScalarUDF::from_kernel("st_makevalid",
st_make_valid_impl());
+ let tester = ScalarUdfTester::new(udf.into(),
vec![sedona_type.clone()]);
+ tester.assert_return_type(WKB_GEOMETRY);
+
+ let input_wkt = vec![
+ Some("POLYGON ((0 0, 0 1, 1 1, 1 0, 0 0))"), // Already valid
polygon should remain unchanged
+ Some("POLYGON ((0 0, 2 2, 2 0, 0 2, 0 0))"), // Self-intersecting
polygon (bowtie) should be fixed
+ Some("POLYGON ((0 0, 0 3, 3 3, 3 0, 0 0), (1 1, 1 2, 2 2, 2 1, 1
1))"), // Polygon with incorrect ring orientation should be fixed
+ Some("POLYGON ((0 0, 0 1, 0 1, 1 1, 1 0, 0 0, 0 0))"), //Polygon
with repeated points should be cleaned
+ Some("LINESTRING (0 0, 1 1, 2 2)"), // LineString that is already
valid
+ Some("LINESTRING (0 0, 0 0, 1 1, 1 1, 2 2)"), // LineString with
repeated points should be simplified
+ Some("MULTIPOLYGON (((0 0, 1 1, 1 0, 0 1, 0 0)), ((2 2, 3 3, 3 2,
2 3, 2 2)))"), // MultiPolygon with invalid components
+ Some("POINT (1 1)"), // Point geometry (always valid)
+ Some("GEOMETRYCOLLECTION (POINT (1 1), POLYGON ((0 0, 2 2, 2 0, 0
2, 0 0)))"), // GeometryCollection with mixed valid/invalid geometries
+ Some("POLYGON EMPTY"), // Empty geometry
+ Some("POLYGON ((0 0, 3 0, 3 3, 2 1, 1 3, 0 3, 0 0))"), // Polygon
with spike (almost self-intersecting)
+ ];
+
+ let expected = create_array(&[
+ Some("POLYGON ((0 0, 0 1, 1 1, 1 0, 0 0))"),
+ Some("MULTIPOLYGON(((0 2,1 1,0 0,0 2)),((2 0,1 1,2 2,2 0)))"),
+ Some("POLYGON((0 0,0 3,3 3,3 0,0 0),(1 1,1 2,2 2,2 1,1 1))"),
+ Some("POLYGON((0 0,0 1,0 1,1 1,1 0,0 0,0 0))"),
+ Some("LINESTRING (0 0, 1 1, 2 2)"),
+ Some("LINESTRING(0 0,0 0,1 1,1 1,2 2)"),
+ Some("MULTIPOLYGON(((0.5 0.5,0 0,0 1,0.5 0.5)),((0.5 0.5,1 1,1
0,0.5 0.5)),((2.5 2.5,2 2,2 3,2.5 2.5)),((2.5 2.5,3 3,3 2,2.5 2.5)))"),
+ Some("POINT (1 1)"),
+ Some("GEOMETRYCOLLECTION(POINT(1 1),MULTIPOLYGON(((0 2,1 1,0 0,0
2)),((2 0,1 1,2 2,2 0))))"),
+ Some("POLYGON EMPTY"),
+ Some("POLYGON((0 0,3 0,3 3,2 1,1 3,0 3,0 0))"),
+ ], &WKB_GEOMETRY);
+
+ assert_eq!(&tester.invoke_wkb_array(input_wkt).unwrap(), &expected);
+ }
+
+ #[rstest]
+ fn st_make_valid_edge_cases(#[values(WKB_GEOMETRY)] sedona_type:
SedonaType) {
+ let udf = SedonaScalarUDF::from_kernel("st_makevalid",
st_make_valid_impl());
+ let tester = ScalarUdfTester::new(udf.into(),
vec![sedona_type.clone()]);
+
+ // Test with very close points (floating point precision issues)
+ let result = tester
+ .invoke_scalar("POLYGON ((0 0, 0 1, 1 1, 1 0, 0.0000000001
0.0000000001, 0 0))")
+ .unwrap();
+ tester.assert_scalar_result_equals(result, "POLYGON((0 0,0 1,1 1,1
0,1e-10 1e-10,0 0))");
+
+ // Test with degenerate polygon (collinear points)
+ let result = tester
+ .invoke_scalar("POLYGON ((0 0, 1 1, 2 2, 3 3, 0 0))")
+ .unwrap();
+ tester
+ .assert_scalar_result_equals(result, "MULTILINESTRING((0 0,1 1),(1
1,2 2),(2 2,3 3))");
+
+ // Test with polygon that has a tiny hole
+ let result = tester
+ .invoke_scalar("POLYGON ((0 0, 0 10, 10 10, 10 0, 0 0), (5 5, 5
5.0001, 5.0001 5.0001, 5.0001 5, 5 5))")
+ .unwrap();
+ tester.assert_scalar_result_equals(
+ result,
+ "POLYGON((0 0,0 10,10 10,10 0,0 0),(5 5,5 5.0001,5.0001
5.0001,5.0001 5,5 5))",
+ );
+ }
+}
diff --git a/python/sedonadb/tests/functions/test_functions.py
b/python/sedonadb/tests/functions/test_functions.py
index bb1494c2..8cb1847a 100644
--- a/python/sedonadb/tests/functions/test_functions.py
+++ b/python/sedonadb/tests/functions/test_functions.py
@@ -2023,6 +2023,75 @@ def test_st_mmax(eng, geom, expected):
eng.assert_query_result(f"SELECT ST_MMax({geom_or_null(geom)})", expected)
[email protected]("eng", [SedonaDB, PostGIS])
[email protected](
+ ("geom", "expected"),
+ [
+ # Already valid polygon should remain unchanged
+ (
+ "POLYGON ((0 0, 0 1, 1 1, 1 0, 0 0))",
+ "POLYGON ((0 0, 0 1, 1 1, 1 0, 0 0))",
+ ),
+ # Self-intersecting polygon (bowtie) should be fixed
+ (
+ "POLYGON ((0 0, 2 2, 2 0, 0 2, 0 0))",
+ "MULTIPOLYGON (((0 2, 1 1, 0 0, 0 2)), ((2 0, 1 1, 2 2, 2 0)))",
+ ),
+ # Polygon with incorrect ring orientation should be fixed
+ (
+ "POLYGON ((0 0, 0 3, 3 3, 3 0, 0 0), (1 1, 1 2, 2 2, 2 1, 1 1))",
+ "POLYGON ((0 0, 0 3, 3 3, 3 0, 0 0), (1 1, 1 2, 2 2, 2 1, 1 1))",
+ ),
+ # Polygon with repeated points should be cleaned (Note: JTS 'make
valid' might preserve the duplicate points in WKT but the effective geometry is
valid)
+ (
+ "POLYGON ((0 0, 0 1, 0 1, 1 1, 1 0, 0 0, 0 0))",
+ "POLYGON ((0 0, 0 1, 0 1, 1 1, 1 0, 0 0, 0 0))",
+ ),
+ # LineString that is already valid
+ (
+ "LINESTRING (0 0, 1 1, 2 2)",
+ "LINESTRING (0 0, 1 1, 2 2)",
+ ),
+ # LineString with repeated points should be simplified
+ (
+ "LINESTRING (0 0, 0 0, 1 1, 1 1, 2 2)",
+ "LINESTRING (0 0, 0 0, 1 1, 1 1, 2 2)",
+ ),
+ # MultiPolygon with invalid components (two bowtie polygons)
+ (
+ "MULTIPOLYGON (((0 0, 1 1, 1 0, 0 1, 0 0)), ((2 2, 3 3, 3 2, 2 3,
2 2)))",
+ "MULTIPOLYGON (((0.5 0.5, 0 0, 0 1, 0.5 0.5)), ((0.5 0.5, 1 1, 1
0, 0.5 0.5)), ((2.5 2.5, 2 2, 2 3, 2.5 2.5)), ((2.5 2.5, 3 3, 3 2, 2.5 2.5)))",
+ ),
+ # Point geometry (always valid)
+ (
+ "POINT (1 1)",
+ "POINT (1 1)",
+ ),
+ # GeometryCollection with mixed valid/invalid geometries
+ (
+ "GEOMETRYCOLLECTION (POINT (1 1), POLYGON ((0 0, 2 2, 2 0, 0 2, 0
0)))",
+ "GEOMETRYCOLLECTION (POINT (1 1), MULTIPOLYGON (((0 2, 1 1, 0 0, 0
2)), ((2 0, 1 1, 2 2, 2 0))))",
+ ),
+ # Empty geometry
+ (
+ "POLYGON EMPTY",
+ "POLYGON EMPTY",
+ ),
+ # Polygon with spike (almost self-intersecting)
+ (
+ "POLYGON ((0 0, 3 0, 3 3, 2 1, 1 3, 0 3, 0 0))",
+ "POLYGON ((0 0, 3 0, 3 3, 2 1, 1 3, 0 3, 0 0))",
+ ),
+ ],
+)
+def test_st_makevalid(eng, geom, expected):
+ eng = eng.create_or_skip()
+ eng.assert_query_result(
+ f"SELECT ST_MakeValid({geom_or_null(geom)})",
+ expected,
+ )
+
+
@pytest.mark.parametrize("eng", [SedonaDB, PostGIS])
@pytest.mark.parametrize(
("geom", "expected"),