petern48 commented on code in PR #286:
URL: https://github.com/apache/sedona-db/pull/286#discussion_r2507015053


##########
c/sedona-geos/src/st_polygonize.rs:
##########
@@ -0,0 +1,311 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::sync::Arc;
+
+use arrow_array::{
+    builder::OffsetBufferBuilder, cast::as_list_array, Array, ArrayRef, 
BinaryArray,
+};
+use arrow_schema::{DataType, Field, FieldRef};
+use datafusion_common::{cast::as_binary_array, error::Result, DataFusionError, 
ScalarValue};
+use datafusion_expr::{Accumulator, ColumnarValue};
+use geos::Geom;
+use sedona_expr::aggregate_udf::{SedonaAccumulator, SedonaAccumulatorRef};
+use sedona_schema::{
+    datatypes::{SedonaType, WKB_GEOMETRY},
+    matchers::ArgMatcher,
+};
+
+/// ST_Polygonize() aggregate implementation using GEOS
+pub fn st_polygonize_impl() -> SedonaAccumulatorRef {
+    Arc::new(STPolygonize {})
+}
+
+#[derive(Debug)]
+struct STPolygonize {}
+
+impl SedonaAccumulator for STPolygonize {
+    fn return_type(&self, args: &[SedonaType]) -> Result<Option<SedonaType>> {
+        let matcher = ArgMatcher::new(vec![ArgMatcher::is_geometry()], 
WKB_GEOMETRY);
+        matcher.match_args(args)
+    }
+
+    fn accumulator(
+        &self,
+        args: &[SedonaType],
+        _output_type: &SedonaType,
+    ) -> Result<Box<dyn Accumulator>> {
+        Ok(Box::new(PolygonizeAccumulator::new(args[0].clone())))
+    }
+
+    fn state_fields(&self, _args: &[SedonaType]) -> Result<Vec<FieldRef>> {
+        Ok(vec![Arc::new(Field::new(
+            "geometries",
+            DataType::List(Arc::new(Field::new("item", DataType::Binary, 
true))),
+            false,
+        ))])
+    }
+}
+
+#[derive(Debug)]
+struct PolygonizeAccumulator {
+    input_type: SedonaType,
+    geometries: Vec<Vec<u8>>,
+}
+
+impl PolygonizeAccumulator {
+    pub fn new(input_type: SedonaType) -> Self {
+        Self {
+            input_type,
+            geometries: Vec::new(),
+        }
+    }
+
+    fn make_wkb_result(&self) -> Result<Option<Vec<u8>>> {
+        if self.geometries.is_empty() {
+            return Ok(None);
+        }
+
+        let mut geos_geoms = Vec::with_capacity(self.geometries.len());
+        for wkb in &self.geometries {
+            let geom = geos::Geometry::new_from_wkb(wkb).map_err(|e| {
+                DataFusionError::Execution(format!("Failed to convert WKB to 
GEOS: {e}"))
+            })?;
+            geos_geoms.push(geom);
+        }
+
+        let result = geos::Geometry::polygonize(&geos_geoms)
+            .map_err(|e| DataFusionError::Execution(format!("Failed to 
polygonize: {e}")))?;
+
+        let wkb = result.to_wkb().map_err(|e| {
+            DataFusionError::Execution(format!("Failed to convert result to 
WKB: {e}"))
+        })?;
+
+        Ok(Some(wkb.into()))
+    }
+}
+
+impl Accumulator for PolygonizeAccumulator {
+    fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
+        if values.is_empty() {
+            return Err(DataFusionError::Internal(
+                "No input arrays provided to accumulator in 
update_batch".to_string(),
+            ));
+        }
+
+        let arg_types = [self.input_type.clone()];
+        let args = [ColumnarValue::Array(values[0].clone())];
+        let executor = 
sedona_functions::executor::WkbExecutor::new(&arg_types, &args);

Review Comment:
   ```suggestion
           let arg_types = std::slice::from_ref(&self.input_type);
           let args = [ColumnarValue::Array(values[0].clone())];
           let executor = 
sedona_functions::executor::WkbExecutor::new(arg_types, &args);
   ```
   We can avoid this clone.



##########
python/sedonadb/tests/functions/test_aggregate.py:
##########
@@ -115,3 +115,63 @@ def test_st_collect_zero_input(eng):
         ) AS t(geom) WHERE false""",
         None,
     )
+
+
[email protected]("eng", [SedonaDB, PostGIS])
+def test_st_polygonize_basic_triangle(eng):
+    eng = eng.create_or_skip()
+    eng.assert_query_result(
+        """SELECT ST_Polygonize(ST_GeomFromText(geom)) FROM (
+            VALUES
+                ('LINESTRING (0 0, 10 0)'),
+                ('LINESTRING (10 0, 10 10)'),
+                ('LINESTRING (10 10, 0 0)')
+        ) AS t(geom)""",
+        "GEOMETRYCOLLECTION (POLYGON ((10 0, 0 0, 10 10, 10 0)))",
+    )
+
+
[email protected]("eng", [SedonaDB, PostGIS])
+def test_st_polygonize_with_nulls(eng):
+    eng = eng.create_or_skip()
+    eng.assert_query_result(
+        """SELECT ST_Polygonize(ST_GeomFromText(geom)) FROM (
+            VALUES
+                ('LINESTRING (0 0, 10 0)'),
+                (NULL),
+                ('LINESTRING (10 0, 10 10)'),
+                (NULL),
+                ('LINESTRING (10 10, 0 0)')
+        ) AS t(geom)""",
+        "GEOMETRYCOLLECTION (POLYGON ((10 0, 0 0, 10 10, 10 0)))",
+    )
+
+
[email protected]("eng", [SedonaDB, PostGIS])
+def test_st_polygonize_no_polygons_formed(eng):
+    eng = eng.create_or_skip()
+    eng.assert_query_result(
+        """SELECT ST_Polygonize(ST_GeomFromText(geom)) FROM (
+            VALUES
+                ('LINESTRING (0 0, 10 0)'),
+                ('LINESTRING (20 0, 30 0)')
+        ) AS t(geom)""",
+        "GEOMETRYCOLLECTION EMPTY",
+    )
+
+
[email protected]("eng", [SedonaDB, PostGIS])
+def test_st_polygonize_multiple_polygons(eng):
+    eng = eng.create_or_skip()
+    eng.assert_query_result(
+        """SELECT ST_Polygonize(ST_GeomFromText(geom)) FROM (
+            VALUES
+                ('LINESTRING (0 0, 10 0)'),
+                ('LINESTRING (10 0, 5 10)'),
+                ('LINESTRING (5 10, 0 0)'),
+                ('LINESTRING (20 0, 30 0)'),
+                ('LINESTRING (30 0, 25 10)'),
+                ('LINESTRING (25 10, 20 0)')
+        ) AS t(geom)""",
+        "GEOMETRYCOLLECTION (POLYGON ((10 0, 0 0, 5 10, 10 0)), POLYGON ((30 
0, 20 0, 25 10, 30 0)))",
+    )

Review Comment:
   I think we should also add these cases too.
   
   ```sql
   # a single polygon
   select st_astext(st_polygonize(array[st_geomfromtext('POLYGON ((10 0, 0 0, 
10 10, 10 0))')]));
   -- GEOMETRYCOLLECTION(POLYGON((10 0,0 0,10 10,10 0)))
   
   one of each of the other geom types (e.g points, multipoint, etc) and 
linestring empty
   -- GEOMETRYCOLLECTION EMPTY
   
   # A single linestring that's really a LinearRing
   select st_astext(st_polygonize(array[st_geomfromtext('linestring(0 0, 0 1, 1 
1, 1 0, 0 0)')]));
   -- GEOMETRYCOLLECTION(POLYGON((0 0,0 1,1 1,1 0,0 0)))
   ```
   
   These are using Postgis array syntax. Since we don't have array syntax yet, 
we could do something like this to avoid having to write a bunch of those 
polygon:
   
   ```python
   @pytest.mark.parametrize("eng", [SedonaDB, PostGIS])
   @pytest.mark.parametrize(
       ("geom", "expected"),  
   def test_st_polygonize_single_geom(eng, geom, expected):  # feel free to 
come up with a better name
       eng = eng.create_or_skip()
       eng.assert_query_result(
           f"""SELECT ST_Polygonize(ST_GeomFromText(geom)) FROM (
               VALUES
                   ('{geom}')
           ) AS t(geom)""",
           expected,
   ```
   
   Optional: It would be nice to have a few of these cases on the Rust level. 
If someone makes changes during development, it's nice to have `cargo test` 
catch buggy changes rather than waiting to occasionally `pytest`.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to