This is an automated email from the ASF dual-hosted git repository.

paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/sedona-db.git


The following commit(s) were added to refs/heads/main by this push:
     new b65a941c feat(rust/sedona-geo): ST_ConcaveHull Geo Implementation 
(#462)
b65a941c is described below

commit b65a941cbdea312458b676d2b60c91109c5c913f
Author: Abeeujah <[email protected]>
AuthorDate: Fri Dec 19 05:08:06 2025 +0100

    feat(rust/sedona-geo): ST_ConcaveHull Geo Implementation (#462)
---
 rust/sedona-geo/src/lib.rs            |   1 +
 rust/sedona-geo/src/st_concavehull.rs | 462 ++++++++++++++++++++++++++++++++++
 2 files changed, 463 insertions(+)

diff --git a/rust/sedona-geo/src/lib.rs b/rust/sedona-geo/src/lib.rs
index 48a9430b..52182beb 100644
--- a/rust/sedona-geo/src/lib.rs
+++ b/rust/sedona-geo/src/lib.rs
@@ -19,6 +19,7 @@ pub mod register;
 mod st_area;
 mod st_buffer;
 mod st_centroid;
+pub mod st_concavehull;
 mod st_distance;
 mod st_dwithin;
 mod st_intersection_agg;
diff --git a/rust/sedona-geo/src/st_concavehull.rs 
b/rust/sedona-geo/src/st_concavehull.rs
new file mode 100644
index 00000000..7349d0b4
--- /dev/null
+++ b/rust/sedona-geo/src/st_concavehull.rs
@@ -0,0 +1,462 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::sync::Arc;
+
+use arrow_array::builder::BinaryBuilder;
+use datafusion_common::error::Result;
+use datafusion_common::{cast::as_float64_array, DataFusionError};
+use datafusion_expr::ColumnarValue;
+use geo::{ConcaveHull, CoordsIter, Geometry, GeometryCollection, Point, 
Polygon};
+use geo_traits::to_geo::{ToGeoGeometry, ToGeoPoint};
+use geo_traits::{GeometryCollectionTrait, GeometryTrait, MultiPointTrait, 
PointTrait};
+use sedona_expr::scalar_udf::{ScalarKernelRef, SedonaScalarKernel};
+use sedona_functions::executor::WkbExecutor;
+use sedona_geometry::is_empty;
+use sedona_geometry::wkb_factory::WKB_MIN_PROBABLE_BYTES;
+use sedona_schema::datatypes::SedonaType;
+use sedona_schema::{datatypes::WKB_GEOMETRY, matchers::ArgMatcher};
+use wkb::reader::Wkb;
+use wkb::writer::{write_geometry, WriteOptions};
+
+use crate::to_geo::item_to_geometry;
+
+/// ST_ConcaveHull implementation using [ConcaveHull]
+///
+/// Geo returns a Polygon for every concave hull computation
+/// whereas the Geos implementation returns a MultiPolygon for
+/// certain geometries concave hull computation.
+pub fn st_concavehull_impl() -> ScalarKernelRef {
+    Arc::new(STConcaveHull {})
+}
+
+#[derive(Debug)]
+struct STConcaveHull {}
+
+impl SedonaScalarKernel for STConcaveHull {
+    fn return_type(&self, args: &[SedonaType]) -> Result<Option<SedonaType>> {
+        let matcher = ArgMatcher::new(
+            vec![ArgMatcher::is_geometry(), ArgMatcher::is_numeric()],
+            WKB_GEOMETRY,
+        );
+
+        matcher.match_args(args)
+    }
+
+    fn invoke_batch(
+        &self,
+        arg_types: &[SedonaType],
+        args: &[ColumnarValue],
+    ) -> Result<ColumnarValue> {
+        invoke_batch_impl(arg_types, args)
+    }
+}
+
+fn invoke_batch_impl(arg_types: &[SedonaType], args: &[ColumnarValue]) -> 
Result<ColumnarValue> {
+    let executor = WkbExecutor::new(arg_types, args);
+    let mut builder = BinaryBuilder::with_capacity(
+        executor.num_iterations(),
+        WKB_MIN_PROBABLE_BYTES * executor.num_iterations(),
+    );
+
+    // Extract Args
+    let pct_convex_val = args[1]
+        .cast_to(&arrow_schema::DataType::Float64, None)?
+        .to_array(executor.num_iterations())?;
+    let pct_convex_array = as_float64_array(&pct_convex_val)?;
+    let mut pct_convex_iter = pct_convex_array.iter();
+
+    executor.execute_wkb_void(|maybe_wkb| {
+        match (maybe_wkb, pct_convex_iter.next().unwrap()) {
+            (Some(wkb), Some(pct_convex)) => {
+                invoke_scalar(&wkb, pct_convex, &mut builder)?;
+                builder.append_value([]);
+            }
+            _ => builder.append_null(),
+        }
+
+        Ok(())
+    })?;
+
+    executor.finish(Arc::new(builder.finish()))
+}
+
+fn invoke_scalar(geom: &Wkb, pct_convex: f64, writer: &mut impl 
std::io::Write) -> Result<()> {
+    if is_empty::is_geometry_empty(geom).map_err(|e| 
DataFusionError::Execution(e.to_string()))? {
+        write_geometry(writer, &Polygon::<f64>::empty(), &write_opts())
+            .map_err(|e| DataFusionError::Execution(e.to_string()))?;
+        return Ok(());
+    }
+    compute_and_write_hull(&normalize_geometry(geom)?, pct_convex, writer)
+}
+
+fn write_opts() -> WriteOptions {
+    WriteOptions {
+        endianness: wkb::Endianness::LittleEndian,
+    }
+}
+
+fn normalize_geometry(geom: &Wkb) -> Result<Geometry> {
+    match geom.as_type() {
+        geo_traits::GeometryType::GeometryCollection(gc) => {
+            let filtered: Vec<Geometry> = gc
+                .geometries()
+                .filter(|g| !is_empty::is_geometry_empty(g).unwrap_or(true))
+                .map(|g| g.to_geometry())
+                .collect();
+            Ok(Geometry::GeometryCollection(GeometryCollection::new_from(
+                filtered,
+            )))
+        }
+
+        geo_traits::GeometryType::MultiPoint(mp) => {
+            let filtered: Vec<geo_types::Point> = mp
+                .points()
+                .filter_map(|pt| pt.coord().map(|_| pt.to_point()))
+                .collect();
+            Ok(Geometry::MultiPoint(geo_types::MultiPoint::new(filtered)))
+        }
+
+        geo_traits::GeometryType::LineString(ls) => item_to_geometry(ls),
+        geo_traits::GeometryType::MultiLineString(mls) => 
item_to_geometry(mls),
+        geo_traits::GeometryType::Polygon(pgn) => item_to_geometry(pgn),
+        geo_traits::GeometryType::MultiPolygon(mpgn) => item_to_geometry(mpgn),
+        geo_traits::GeometryType::Point(pt) => item_to_geometry(pt),
+
+        _ => Err(DataFusionError::Execution(
+            "Unsupported geometry type".to_string(),
+        )),
+    }
+}
+
+fn compute_and_write_hull(
+    geom: &Geometry,
+    pct_convex: f64,
+    writer: &mut impl std::io::Write,
+) -> Result<()> {
+    match geom.as_type() {
+        geo_traits::GeometryType::Point(pt) => 
wkb::writer::write_point(writer, pt, &write_opts())
+            .map_err(|e| DataFusionError::Execution(e.to_string()))?,
+
+        geo_traits::GeometryType::MultiPoint(mpt) => {
+            let hull = geo_types::MultiPoint::new(
+                mpt.points()
+                    .filter(|pt| pt.coord().is_some())
+                    .copied()
+                    .collect::<Vec<Point>>(),
+            )
+            .concave_hull(pct_convex);
+            write_concave_hull(writer, hull)?;
+        }
+
+        geo_traits::GeometryType::LineString(ls) => {
+            let hull = ls.concave_hull(pct_convex);
+            write_concave_hull(writer, hull)?;
+        }
+
+        geo_traits::GeometryType::Polygon(pgn) => {
+            let hull = pgn.concave_hull(pct_convex);
+            write_concave_hull(writer, hull)?;
+        }
+
+        geo_traits::GeometryType::MultiLineString(mls) => {
+            let hull = mls.concave_hull(pct_convex);
+            write_concave_hull(writer, hull)?;
+        }
+
+        geo_traits::GeometryType::MultiPolygon(mpgn) => {
+            let hull = mpgn.concave_hull(pct_convex);
+            write_concave_hull(writer, hull)?;
+        }
+
+        geo_traits::GeometryType::GeometryCollection(gcn) => {
+            let coords: Vec<geo_types::Coord> = gcn
+                .geometries()
+                .flat_map(|geom| geom.coords_iter())
+                .collect();
+
+            let multi_point = geo_types::MultiPoint::new(
+                coords.into_iter().map(geo_types::Point::from).collect(),
+            );
+
+            let hull = multi_point.concave_hull(pct_convex);
+            write_concave_hull(writer, hull)?;
+        }
+
+        _ => {
+            return Err(DataFusionError::Execution(
+                "Unsupported geometry type for concave hull".to_string(),
+            ))
+        }
+    }
+
+    Ok(())
+}
+
+fn write_concave_hull<W: std::io::Write>(
+    writer: &mut W,
+    hull: impl GeometryTrait<T = f64>,
+) -> Result<()> {
+    wkb::writer::write_geometry(writer, &hull, &write_opts())
+        .map_err(|e| DataFusionError::Execution(e.to_string()))?;
+    Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use arrow_array::Float64Array;
+    use arrow_schema::DataType;
+    use datafusion_common::ScalarValue;
+    use rstest::rstest;
+    use sedona_expr::scalar_udf::SedonaScalarUDF;
+    use sedona_schema::datatypes::{WKB_GEOMETRY, WKB_VIEW_GEOMETRY};
+    use sedona_testing::{
+        compare::{assert_array_equal, 
assert_scalar_equal_wkb_geometry_topologically},
+        create::create_array,
+        testers::ScalarUdfTester,
+    };
+
+    /// Helper to initialize the UDF tester to avoid boilerplate in every test.
+    fn create_tester(sedona_type: SedonaType) -> ScalarUdfTester {
+        let udf = SedonaScalarUDF::from_kernel("st_concavehull", 
st_concavehull_impl());
+        ScalarUdfTester::new(
+            udf.into(),
+            vec![sedona_type, SedonaType::Arrow(DataType::Float64)],
+        )
+    }
+
+    #[rstest]
+    fn test_return_type(#[values(WKB_GEOMETRY, WKB_VIEW_GEOMETRY)] 
sedona_type: SedonaType) {
+        let tester = create_tester(sedona_type);
+        assert_eq!(tester.return_type().unwrap(), WKB_GEOMETRY);
+    }
+
+    #[rstest]
+    fn test_null_inputs(#[values(WKB_GEOMETRY, WKB_VIEW_GEOMETRY)] 
sedona_type: SedonaType) {
+        let tester = create_tester(sedona_type);
+        let result = tester
+            .invoke_scalar_scalar(ScalarValue::Null, ScalarValue::Null)
+            .unwrap();
+        assert!(result.is_null());
+    }
+
+    #[rstest]
+    fn test_empty_geometries(#[values(WKB_GEOMETRY, WKB_VIEW_GEOMETRY)] 
sedona_type: SedonaType) {
+        let udf = SedonaScalarUDF::from_kernel("st_concavehull", 
st_concavehull_impl());
+        let tester = ScalarUdfTester::new(
+            udf.into(),
+            vec![sedona_type.clone(), SedonaType::Arrow(DataType::Float64)],
+        );
+
+        let input_wkt_array = vec![
+            Some("POINT EMPTY"),
+            Some("LINESTRING EMPTY"),
+            Some("POLYGON EMPTY"),
+            Some("MULTIPOINT EMPTY"),
+            Some("MULTILINESTRING EMPTY"),
+            Some("MULTIPOLYGON EMPTY"),
+            Some("GEOMETRYCOLLECTION EMPTY"),
+        ];
+
+        let tolerance_array: Arc<Float64Array> = 
Arc::new(Float64Array::from(vec![
+            Some(0.1),
+            Some(0.1),
+            Some(0.1),
+            Some(0.1),
+            Some(0.1),
+            Some(0.1),
+            Some(0.1),
+        ]));
+
+        let expected_array = create_array(
+            &[
+                Some("POLYGON EMPTY"),
+                Some("POLYGON EMPTY"),
+                Some("POLYGON EMPTY"),
+                Some("POLYGON EMPTY"),
+                Some("POLYGON EMPTY"),
+                Some("POLYGON EMPTY"),
+                Some("POLYGON EMPTY"),
+            ],
+            &WKB_GEOMETRY,
+        );
+
+        assert_array_equal(
+            &tester
+                .invoke_arrays(vec![
+                    create_array(&input_wkt_array, &sedona_type),
+                    tolerance_array,
+                ])
+                .unwrap(),
+            &expected_array,
+        );
+    }
+
+    /// For testing here, the Python method was adopted as the 
`assert_array_equal`
+    /// doesn't assert `topologically`
+    #[rstest]
+    fn test_different_geometry_types_fixed_parameter(
+        #[values(WKB_GEOMETRY, WKB_VIEW_GEOMETRY)] sedona_type: SedonaType,
+    ) {
+        let tester = create_tester(sedona_type);
+
+        // 
=========================================================================
+        // Differing Geometry Types, Identical Percentage (0.1)
+        // 
=========================================================================
+        let test_cases = vec![
+            ("POINT (2.5 3.1)", "POINT (2.5 3.1)"),
+            (
+                "LINESTRING (50 50, 150 150, 150 50)",
+                "POLYGON ((50 50, 150 150, 150 50, 50 50))",
+            ),
+            (
+                "MULTIPOINT ((0 0), (10 0), (0 10), (10 10), (5 5))",
+                "POLYGON ((10 0, 10 10, 0 10, 0 0, 5 5, 10 0))",
+            ),
+            (
+                "MULTILINESTRING ((10 10, 20 20, 10 40), (40 40, 30 30, 40 20, 
30 10))",
+                "POLYGON ((20 20, 10 40, 30 30, 40 40, 40 20, 30 10, 10 10, 20 
20))",
+            ),
+            (
+                "MULTIPOLYGON (((2 2, 2 5, 5 5, 5 2, 2 2)), ((6 3, 8 3, 8 1, 6 
1, 6 3)))",
+                "POLYGON ((5 2, 2 2, 2 5, 5 5, 6 3, 8 3, 8 1, 6 1, 5 2))",
+            ),
+            (
+                "MULTIPOLYGON (\
+                    ((26 125, 26 200, 126 200, 126 125, 26 125 ),\
+                    ( 51 150, 101 150, 76 175, 51 150 )),\
+                    (( 151 100, 151 200, 176 175, 151 100 ))\
+                )",
+                "POLYGON ((151 100, 176 175, 151 200, 126 200, 26 200, 26 125, 
126 125, 151 100))",
+            ),
+            (
+                "GEOMETRYCOLLECTION (MULTIPOINT((1 1), (3 3)), POINT(5 6), 
LINESTRING(4 5, 5 6))",
+                "POLYGON ((3 3, 1 1, 4 5, 5 6, 3 3))",
+            ),
+            (
+                "GEOMETRYCOLLECTION (MULTIPOINT((1 1), (3 3)), POINT EMPTY, 
LINESTRING(4 5, 5 6))",
+                "POLYGON ((3 3, 1 1, 4 5, 5 6, 3 3))",
+            ),
+            (
+                "GEOMETRYCOLLECTION ( LINESTRING(1 1,2 2),\
+                    GEOMETRYCOLLECTION( POLYGON((3 3,4 4,5 5,3 3)),\
+                        GEOMETRYCOLLECTION (LINESTRING(6 6,7 7), POLYGON((8 
8,9 9,10 10,8 8)))\
+                    )\
+                )",
+                "POLYGON ((10 10, 1 1, 3 3, 3 3, 4 4, 5 5, 8 8, 9 9, 10 10))",
+            ),
+        ];
+
+        for (input_wkt, expected_wkt) in test_cases {
+            let result = tester.invoke_scalar_scalar(input_wkt, 0.1).unwrap();
+            assert_scalar_equal_wkb_geometry_topologically(&result, 
Some(expected_wkt));
+        }
+    }
+
+    /// For testing here, the Python method was adopted as the 
`assert_array_equal`
+    /// doesn't assert `topologically`
+    #[rstest]
+    fn test_different_parameters_fixed_geometry(
+        #[values(WKB_GEOMETRY, WKB_VIEW_GEOMETRY)] sedona_type: SedonaType,
+    ) {
+        let tester = create_tester(sedona_type);
+
+        // 
=========================================================================
+        // Identical Geometry Types, Differing Percentages
+        // 
=========================================================================
+        let test_cases = vec![
+            // LINESTRING with different pctconvex values
+            (
+                "LINESTRING (100 150, 50 60, 70 80, 160 170)",
+                0.1,
+                "POLYGON ((70 80, 50 60, 100 150, 160 170, 70 80))",
+            ),
+            (
+                "LINESTRING (100 150, 50 60, 70 80, 160 170)",
+                0.3,
+                "POLYGON ((70 80, 50 60, 100 150, 160 170, 70 80))",
+            ),
+            // Test POLYGON with different pctconvex values
+            (
+                "POLYGON ((70 80, 50 60, 100 150, 160 170, 70 80))",
+                0.1,
+                "POLYGON ((70 80, 50 60, 100 150, 160 170, 70 80))",
+            ),
+            (
+                "POLYGON ((70 80, 50 60, 100 150, 160 170, 70 80))",
+                0.2,
+                "POLYGON ((70 80, 50 60, 100 150, 160 170, 70 80))",
+            ),
+            // Test MULTIPOINT with different pctconvex values
+            (
+                "MULTIPOINT ((100 150), (160 170))",
+                0.1,
+                "POLYGON ((100 150, 160 170, 100 150))",
+            ),
+            (
+                "MULTIPOINT ((100 150), (160 170))",
+                0.2,
+                "POLYGON ((100 150, 160 170, 100 150))",
+            ),
+            // Test MULTILINESTRING with different pctconvex values
+            (
+                "MULTILINESTRING ((50 150, 50 200), (50 50, 50 100))",
+                0.1,
+                "POLYGON ((50 200, 50 50, 50 100, 50 150, 50 200))",
+            ),
+            (
+                "MULTILINESTRING ((50 150, 50 200), (50 50, 50 100))",
+                0.2,
+                "POLYGON ((50 200, 50 50, 50 100, 50 150, 50 200))",
+            ),
+            // Test MULTIPOLYGON with different pctconvex values
+            (
+                "MULTIPOLYGON (((26 125, 26 200, 126 200, 126 125, 26 125 ),\
+                    ( 51 150, 101 150, 76 175, 51 150 )), (( 151 100, 151 200, 
176 175, 151 100 )))",
+                0.1,
+                "POLYGON ((151 100, 176 175, 151 200, 126 200, 26 200, 26 125, 
126 125, 151 100))"
+            ),
+            (
+                "MULTIPOLYGON (((26 125, 26 200, 126 200, 126 125, 26 125 ),\
+                    ( 51 150, 101 150, 76 175, 51 150 )), (( 151 100, 151 200, 
176 175, 151 100 )))",
+                0.4,
+                "POLYGON((151 100,176 175,151 200,26 200,26 125,151 100))"
+            ),
+            // Test GEOMETRYCOLLECTION with different pctconvex values
+            (
+                "GEOMETRYCOLLECTION(LINESTRING(1 1,2 2), \
+                    GEOMETRYCOLLECTION(POLYGON((3 3,4 4,5 5,3 3)), \
+                    GEOMETRYCOLLECTION(LINESTRING(6 6,7 7), POLYGON((8 8,9 
9,10 10,8 8)))))",
+                0.1,
+                "POLYGON ((10 10, 1 1, 3 3, 3 3, 4 4, 5 5, 8 8, 9 9, 10 10))"
+            ),
+            (
+                "GEOMETRYCOLLECTION(LINESTRING(1 1,2 2), \
+                    GEOMETRYCOLLECTION(POLYGON((3 3,4 4,5 5,3 3)), \
+                    GEOMETRYCOLLECTION(LINESTRING(6 6,7 7), POLYGON((8 8,9 
9,10 10,8 8)))))",
+                0.6,
+                "POLYGON ((10 10, 1 1, 3 3, 3 3, 4 4, 5 5, 8 8, 9 9, 10 10))"
+            ),
+        ];
+
+        for (wkt, pctconvex, expected) in test_cases {
+            let result = tester.invoke_scalar_scalar(wkt, pctconvex).unwrap();
+            assert_scalar_equal_wkb_geometry_topologically(&result, 
Some(expected));
+        }
+    }
+}

Reply via email to