paleolimbot commented on code in PR #298:
URL: https://github.com/apache/sedona-db/pull/298#discussion_r2525439051


##########
python/sedonadb/tests/functions/test_functions.py:
##########
@@ -142,6 +142,96 @@ def test_st_azimuth(eng, geom1, geom2, expected):
     )
 
 
[email protected]("eng", [SedonaDB, PostGIS])
[email protected](
+    ("geom", "expected_boundary"),
+    [
+        (None, None),
+        ("LINESTRING(1 1, 0 0, -1 1)", "MULTIPOINT (1 1, -1 1)"),
+        ("POLYGON((1 1,0 0, -1 1, 1 1))", "LINESTRING (1 1, 0 0, -1 1, 1 1)"),
+        (
+            "LINESTRING(100 150,50 60, 70 80, 160 170)",
+            "MULTIPOINT (100 150, 160 170)",
+        ),
+        (
+            "POLYGON (( 10 130, 50 190, 110 190, 140 150, 150 80, 100 10, 20 
40, 10 130 ), ( 70 40, 100 50, 120 80, 80 110, 50 90, 70 40 ))",
+            "MULTILINESTRING ((10 130, 50 190, 110 190, 140 150, 150 80, 100 
10, 20 40, 10 130), (70 40, 100 50, 120 80, 80 110, 50 90, 70 40))",
+        ),
+        (
+            "MULTILINESTRING ((1 1, 2 2), (3 3, 4 4))",
+            "MULTIPOINT (1 1, 2 2, 3 3, 4 4)",
+        ),
+        (
+            "MULTILINESTRING ((10 10, 20 20), (30 30, 40 40, 30 30))",
+            "MULTIPOINT (10 10, 20 20)",
+        ),
+        ("POLYGON((0 0, 0 1, 1 1, 1 0, 0 0))", "LINESTRING (0 0, 0 1, 1 1, 1 
0, 0 0)"),
+        (
+            "MULTIPOLYGON (((0 0, 0 1, 1 1, 1 0, 0 0)), ((10 10, 10 11, 11 11, 
11 10, 10 10)))",
+            "MULTILINESTRING ((0 0, 0 1, 1 1, 1 0, 0 0), (10 10, 10 11, 11 11, 
11 10, 10 10))",
+        ),
+        (
+            "MULTIPOLYGON (((0 0, 0 10, 10 10, 10 0, 0 0), (2 2, 2 8, 8 8, 8 
2, 2 2)))",
+            "MULTILINESTRING ((0 0, 0 10, 10 10, 10 0, 0 0), (2 2, 2 8, 8 8, 8 
2, 2 2))",  # Note: Order of points in inner ring may vary by implementation, 
but boundary is correct.
+        ),
+        (
+            "GEOMETRYCOLLECTION(POLYGON ((0 0, 0 1, 1 1, 1 0, 0 0)), 
GEOMETRYCOLLECTION(LINESTRING(10 10, 10 20)))",
+            "GEOMETRYCOLLECTION (MULTIPOINT (10 10, 10 20), LINESTRING (0 0, 0 
1, 1 1, 1 0, 0 0))",
+        ),
+        (
+            "GEOMETRYCOLLECTION(LINESTRING(1 1,2 
2),GEOMETRYCOLLECTION(POLYGON((3 3,4 4,5 5,3 
3)),GEOMETRYCOLLECTION(LINESTRING(6 6,7 7),POLYGON((8 8,9 9,10 10,8 8)))))",
+            "GEOMETRYCOLLECTION (MULTIPOINT (1 1, 2 2, 6 6, 7 7), 
MULTILINESTRING ((3 3, 4 4, 5 5, 3 3), (8 8, 9 9, 10 10, 8 8)))",
+        ),
+        (
+            "GEOMETRYCOLLECTION(LINESTRING(10 10,20 
20),GEOMETRYCOLLECTION(POLYGON((30 30,40 40,50 50,30 
30)),GEOMETRYCOLLECTION(LINESTRING(60 60,70 70),LINESTRING(80 80,90 90))))",
+            "GEOMETRYCOLLECTION (MULTIPOINT (10 10, 20 20, 60 60, 70 70, 80 
80, 90 90), LINESTRING (30 30, 40 40, 50 50, 30 30))",
+        ),
+        (
+            "GEOMETRYCOLLECTION(POLYGON((1 1,2 2,3 3,1 
1)),GEOMETRYCOLLECTION(LINESTRING(4 4,5 5),GEOMETRYCOLLECTION(POLYGON((6 6,7 
7,8 8,6 6)),LINESTRING(9 9,10 10))))",
+            "GEOMETRYCOLLECTION (MULTIPOINT (4 4, 5 5, 9 9, 10 10), 
MULTILINESTRING ((1 1, 2 2, 3 3, 1 1), (6 6, 7 7, 8 8, 6 6)))",
+        ),
+        (
+            "GEOMETRYCOLLECTION(LINESTRING(1 1,1 10,10 10,10 1,1 
1),GEOMETRYCOLLECTION(LINESTRING(2 2,2 9,9 9,9 2,2 
2),GEOMETRYCOLLECTION(POLYGON((3 3,3 8,8 8,8 3,3 3)),POLYGON((4 4,4 7,7 7,7 4,4 
4)))))",
+            "MULTILINESTRING ((3 3, 3 8, 8 8, 8 3, 3 3), (4 4, 4 7, 7 7, 7 4, 
4 4))",
+        ),
+        (
+            "GEOMETRYCOLLECTION(POLYGON((0 0,10 0,10 10,0 10,0 
0)),GEOMETRYCOLLECTION(LINESTRING(1 1,9 9),GEOMETRYCOLLECTION(LINESTRING(1 9,9 
1),POLYGON((2 2,8 2,8 8,2 8,2 2)))))",
+            "GEOMETRYCOLLECTION (MULTIPOINT (1 1, 9 9, 1 9, 9 1), 
MULTILINESTRING ((0 0, 10 0, 10 10, 0 10, 0 0), (2 2, 8 2, 8 8, 2 8, 2 2)))",
+        ),
+        (
+            
"GEOMETRYCOLLECTION(GEOMETRYCOLLECTION(GEOMETRYCOLLECTION(LINESTRING(1 2,3 
4),POLYGON((5 6,7 8,9 10,5 6))),POLYGON((11 12,13 14,15 16,11 
12))),LINESTRING(17 18,19 20))",
+            "GEOMETRYCOLLECTION (MULTIPOINT (1 2, 3 4, 17 18, 19 20), 
MULTILINESTRING ((5 6, 7 8, 9 10, 5 6), (11 12, 13 14, 15 16, 11 12)))",
+        ),
+    ],
+)
+def test_st_boundary(eng, geom, expected_boundary):
+    eng = eng.create_or_skip()
+    eng.assert_query_result(
+        f"SELECT ST_Boundary({geom_or_null(geom)})", expected_boundary
+    )
+
+
[email protected]("eng", [SedonaDB, PostGIS])
[email protected](
+    ("geom", "is_empty"),
+    [
+        ("POINT (5 10)", True),
+        ("POINT (0 0)", True),
+        ("POINT (-1 -5)", True),
+        ("MULTIPOINT (100 200)", True),
+        ("MULTIPOINT (5 10, 15 20)", True),
+        ("MULTIPOINT (1 1, 2 2, 3 3, 1 1)", True),
+        ("LINESTRING(10 10, 20 20, 30 10, 10 10)", True),
+        ("MULTILINESTRING ((0 0, 0 1, 1 0, 0 0), (10 10, 10 20, 20 10, 10 
10))", True),
+    ],
+)
+def test_st_boundary_empty(eng, geom, is_empty):

Review Comment:
   This is a great way to avoid the POINT EMPTY vs POINT (nan nan) issue we've 
been having as a result of the geoarrow-c output.



##########
c/sedona-geos/src/st_boundary.rs:
##########
@@ -0,0 +1,344 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::sync::Arc;
+
+use arrow_array::builder::BinaryBuilder;
+use datafusion_common::{error::Result, DataFusionError};
+use datafusion_expr::ColumnarValue;
+use geos::{Geom, Geometry, GeometryTypes};
+use sedona_expr::scalar_udf::{ScalarKernelRef, SedonaScalarKernel};
+use sedona_geometry::wkb_factory::WKB_MIN_PROBABLE_BYTES;
+use sedona_schema::{
+    datatypes::{SedonaType, WKB_GEOMETRY},
+    matchers::ArgMatcher,
+};
+
+use crate::executor::GeosExecutor;
+
+/// ST_Boundary() implementation using the geos crate
+pub fn st_boundary_impl() -> ScalarKernelRef {
+    Arc::new(STBoundary {})
+}
+
+#[derive(Debug)]
+struct STBoundary {}
+
+impl SedonaScalarKernel for STBoundary {
+    fn return_type(&self, args: &[SedonaType]) -> 
datafusion_common::Result<Option<SedonaType>> {
+        let matcher = ArgMatcher::new(vec![ArgMatcher::is_geometry()], 
WKB_GEOMETRY);
+
+        matcher.match_args(args)
+    }
+
+    fn invoke_batch(
+        &self,
+        arg_types: &[SedonaType],
+        args: &[ColumnarValue],
+    ) -> datafusion_common::Result<ColumnarValue> {
+        let executor = GeosExecutor::new(arg_types, args);
+        let mut builder = BinaryBuilder::with_capacity(
+            executor.num_iterations(),
+            WKB_MIN_PROBABLE_BYTES * executor.num_iterations(),
+        );
+
+        executor.execute_wkb_void(|maybe_wkb| {
+            match maybe_wkb {
+                Some(wkb) => {
+                    invoke_scalar(&wkb, &mut builder)?;
+                    builder.append_value([]);
+                }
+                _ => builder.append_null(),
+            }
+            Ok(())
+        })?;
+
+        executor.finish(Arc::new(builder.finish()))
+    }
+}
+
+fn invoke_scalar(geos_geom: &geos::Geometry, writer: &mut BinaryBuilder) -> 
Result<()> {
+    let result_geom = geos_boundary(geos_geom)?;
+
+    let wkb = result_geom
+        .to_wkb()
+        .map_err(|e| DataFusionError::Execution(format!("Failed to convert to 
wkb: {e}")))?;
+
+    writer.append_value(wkb.as_ref());
+    Ok(())
+}
+
+/// For simple geometries, it calls `geometry.boundary()`.
+/// For a `GeometryCollection`, it recursively computes the boundary for each
+/// component and then re-aggregates the resulting geometry types (Point, 
LineString, etc.)
+/// into their respective Multi-geometry types (MultiPoint, MultiLineString, 
etc.)
+/// before forming the final `GeometryCollection`. This aggregation step is 
crucial
+/// for adhering to OGC specifications for `GeometryCollection` boundaries.
+fn geos_boundary(geometry: &impl Geom) -> Result<Geometry> {

Review Comment:
   Thank you for handling this case!



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to