Copilot commented on code in PR #510:
URL: https://github.com/apache/sedona-db/pull/510#discussion_r2693009365


##########
rust/sedona-functions/src/st_envelope_agg.rs:
##########
@@ -178,12 +192,167 @@ impl Accumulator for BoundsAccumulator2D {
     }
 }
 
+#[derive(Debug)]
+struct BoundsGroupsAccumulator2D {
+    input_type: SedonaType,
+    xs: Vec<Interval>,
+    ys: Vec<Interval>,
+    offset: usize,
+}
+
+impl BoundsGroupsAccumulator2D {
+    pub fn new(input_type: SedonaType) -> Self {
+        Self {
+            input_type,
+            xs: Vec::new(),
+            ys: Vec::new(),
+            offset: 0,
+        }
+    }
+
+    fn execute_update(
+        &mut self,
+        values: &[ArrayRef],
+        group_indices: &[usize],
+        opt_filter: Option<&BooleanArray>,
+        total_num_groups: usize,
+    ) -> Result<()> {
+        // Check some of our assumptions about how this will be called
+        debug_assert_eq!(self.offset, 0);
+        debug_assert_eq!(values.len(), 1);
+        debug_assert_eq!(values[0].len(), group_indices.len());
+        if let Some(filter) = opt_filter {
+            debug_assert_eq!(values[0].len(), filter.len());
+        }
+
+        let arg_types = [self.input_type.clone()];
+        let args = [ColumnarValue::Array(values[0].clone())];
+        let executor = WkbExecutor::new(&arg_types, &args);
+        self.xs.resize(total_num_groups, Interval::empty());
+        self.ys.resize(total_num_groups, Interval::empty());
+        let mut i = 0;
+
+        if let Some(filter) = opt_filter {
+            let mut filter_iter = filter.iter();
+            executor.execute_wkb_void(|maybe_item| {
+                if filter_iter.next().unwrap().unwrap_or(false) {
+                    let group_id = group_indices[i];
+                    i += 1;
+                    if let Some(item) = maybe_item {
+                        geo_traits_update_xy_bounds(
+                            item,
+                            &mut self.xs[group_id],
+                            &mut self.ys[group_id],
+                        )
+                        .map_err(|e| DataFusionError::External(Box::new(e)))?;
+                    }
+                } else {
+                    i += 1;
+                }
+
+                Ok(())
+            })?;
+        } else {
+            executor.execute_wkb_void(|maybe_item| {
+                let group_id = group_indices[i];
+                i += 1;
+                if let Some(item) = maybe_item {
+                    geo_traits_update_xy_bounds(
+                        item,
+                        &mut self.xs[group_id],
+                        &mut self.ys[group_id],
+                    )
+                    .map_err(|e| DataFusionError::External(Box::new(e)))?;
+                }
+
+                Ok(())
+            })?;
+        }
+
+        Ok(())
+    }
+
+    fn emit_wkb_result(&mut self, emit_to: EmitTo) -> Result<ArrayRef> {
+        let emit_size = match emit_to {
+            EmitTo::All => self.xs.len(),
+            EmitTo::First(n) => n,
+        };
+
+        let mut builder =
+            BinaryBuilder::with_capacity(emit_size, emit_size * 
WKB_MIN_PROBABLE_BYTES);
+
+        let emit_range = self.offset..(self.offset + emit_size);
+        for (x, y) in zip(&self.xs[emit_range.clone()], 
&self.ys[emit_range.clone()]) {
+            let written = write_envelope(&(*x).into(), y, &mut builder)?;
+            if written {
+                builder.append_value([]);
+            } else {
+                builder.append_null();
+            }
+        }
+
+        match emit_to {
+            EmitTo::All => {
+                self.xs = Vec::new();
+                self.ys = Vec::new();
+                self.offset = 0;
+            }
+            EmitTo::First(n) => {
+                self.offset += n;
+            }
+        }
+
+        Ok(Arc::new(builder.finish()))
+    }
+}
+
+impl GroupsAccumulator for BoundsGroupsAccumulator2D {
+    fn update_batch(
+        &mut self,
+        values: &[ArrayRef],
+        group_indices: &[usize],
+        opt_filter: Option<&BooleanArray>,
+        total_num_groups: usize,
+    ) -> Result<()> {
+        self.execute_update(values, group_indices, opt_filter, 
total_num_groups)
+    }
+
+    fn state(&mut self, emit_to: EmitTo) -> Result<Vec<ArrayRef>> {
+        Ok(vec![self.emit_wkb_result(emit_to)?])
+    }
+
+    fn merge_batch(
+        &mut self,
+        values: &[ArrayRef],
+        group_indices: &[usize],
+        opt_filter: Option<&arrow_array::BooleanArray>,
+        total_num_groups: usize,
+    ) -> Result<()> {
+        // In this case, our state is identical to our input values
+        self.execute_update(values, group_indices, opt_filter, 
total_num_groups)
+    }
+
+    fn evaluate(&mut self, emit_to: EmitTo) -> Result<ArrayRef> {
+        self.emit_wkb_result(emit_to)
+    }
+
+    fn size(&self) -> usize {
+        size_of::<BoundsAccumulator2D>()

Review Comment:
   The size calculation should use `size_of::<BoundsGroupsAccumulator2D>()` 
instead of `size_of::<BoundsAccumulator2D>()` since this is the `size()` method 
for `BoundsGroupsAccumulator2D`, not `BoundsAccumulator2D`.
   ```suggestion
           size_of::<BoundsGroupsAccumulator2D>()
   ```



##########
rust/sedona-functions/src/st_envelope_agg.rs:
##########
@@ -178,12 +192,167 @@ impl Accumulator for BoundsAccumulator2D {
     }
 }
 
+#[derive(Debug)]
+struct BoundsGroupsAccumulator2D {
+    input_type: SedonaType,
+    xs: Vec<Interval>,
+    ys: Vec<Interval>,
+    offset: usize,
+}
+
+impl BoundsGroupsAccumulator2D {
+    pub fn new(input_type: SedonaType) -> Self {
+        Self {
+            input_type,
+            xs: Vec::new(),
+            ys: Vec::new(),
+            offset: 0,
+        }
+    }
+
+    fn execute_update(
+        &mut self,
+        values: &[ArrayRef],
+        group_indices: &[usize],
+        opt_filter: Option<&BooleanArray>,
+        total_num_groups: usize,
+    ) -> Result<()> {
+        // Check some of our assumptions about how this will be called
+        debug_assert_eq!(self.offset, 0);
+        debug_assert_eq!(values.len(), 1);
+        debug_assert_eq!(values[0].len(), group_indices.len());
+        if let Some(filter) = opt_filter {
+            debug_assert_eq!(values[0].len(), filter.len());
+        }
+
+        let arg_types = [self.input_type.clone()];
+        let args = [ColumnarValue::Array(values[0].clone())];
+        let executor = WkbExecutor::new(&arg_types, &args);
+        self.xs.resize(total_num_groups, Interval::empty());
+        self.ys.resize(total_num_groups, Interval::empty());
+        let mut i = 0;
+
+        if let Some(filter) = opt_filter {
+            let mut filter_iter = filter.iter();
+            executor.execute_wkb_void(|maybe_item| {
+                if filter_iter.next().unwrap().unwrap_or(false) {
+                    let group_id = group_indices[i];
+                    i += 1;
+                    if let Some(item) = maybe_item {
+                        geo_traits_update_xy_bounds(
+                            item,
+                            &mut self.xs[group_id],
+                            &mut self.ys[group_id],
+                        )
+                        .map_err(|e| DataFusionError::External(Box::new(e)))?;
+                    }
+                } else {
+                    i += 1;
+                }
+
+                Ok(())
+            })?;
+        } else {
+            executor.execute_wkb_void(|maybe_item| {
+                let group_id = group_indices[i];
+                i += 1;
+                if let Some(item) = maybe_item {
+                    geo_traits_update_xy_bounds(
+                        item,
+                        &mut self.xs[group_id],
+                        &mut self.ys[group_id],
+                    )
+                    .map_err(|e| DataFusionError::External(Box::new(e)))?;
+                }
+
+                Ok(())
+            })?;
+        }
+
+        Ok(())
+    }
+
+    fn emit_wkb_result(&mut self, emit_to: EmitTo) -> Result<ArrayRef> {
+        let emit_size = match emit_to {
+            EmitTo::All => self.xs.len(),
+            EmitTo::First(n) => n,
+        };
+
+        let mut builder =
+            BinaryBuilder::with_capacity(emit_size, emit_size * 
WKB_MIN_PROBABLE_BYTES);
+
+        let emit_range = self.offset..(self.offset + emit_size);
+        for (x, y) in zip(&self.xs[emit_range.clone()], 
&self.ys[emit_range.clone()]) {
+            let written = write_envelope(&(*x).into(), y, &mut builder)?;
+            if written {
+                builder.append_value([]);
+            } else {

Review Comment:
   Appending an empty byte array when written is true appears incorrect. The 
`write_envelope` function should write the envelope data to the builder, so 
this line should likely be removed. If the envelope was written successfully, 
no additional append operation is needed.
   ```suggestion
               if !written {
   ```



##########
python/sedonadb/tests/functions/test_aggregate.py:
##########
@@ -16,14 +16,126 @@
 # under the License.
 
 import pytest
+import shapely
 from sedonadb.testing import PostGIS, SedonaDB
 
 
+# Aggregate functions don't have a suffix in PostGIS
 def agg_fn_suffix(eng):
-    """Return the appropriate suffix for the aggregate function for the given 
engine."""
     return "" if isinstance(eng, PostGIS) else "_Agg"
 
 
+# ST_Envelope is not an aggregate function in PostGIS but we can check
+# behaviour using ST_Envelope(ST_Collect(...))
+def call_st_envelope_agg(eng, arg):
+    if isinstance(eng, PostGIS):
+        return f"ST_Envelope(ST_Collect({arg}))"
+    else:
+        return f"ST_Envelope_Agg({arg})"
+
+
[email protected]("eng", [SedonaDB, PostGIS])
+def test_st_envelope_agg_points(eng):
+    eng = eng.create_or_skip()
+
+    eng.assert_query_result(
+        f"""SELECT {call_st_envelope_agg(eng, "ST_GeomFromText(geom)")} FROM (
+            VALUES
+                ('POINT (1 2)'),
+                ('POINT (3 4)'),
+                (NULL)
+        ) AS t(geom)""",
+        "POLYGON ((1 2, 1 4, 3 4, 3 2, 1 2))",
+    )
+
+
[email protected]("eng", [SedonaDB, PostGIS])
+def test_st_envelope_agg_all_null(eng):
+    eng = eng.create_or_skip()
+
+    eng.assert_query_result(
+        f"""SELECT {call_st_envelope_agg(eng, "ST_GeomFromText(geom)")} FROM (
+            VALUES
+                (NULL),
+                (NULL),
+                (NULL)
+        ) AS t(geom)""",
+        None,
+    )
+
+
[email protected]("eng", [SedonaDB, PostGIS])
+def test_st_envelope_agg_zero_input(eng):
+    eng = eng.create_or_skip()
+
+    eng.assert_query_result(
+        f"""SELECT {call_st_envelope_agg(eng, "ST_GeomFromText(geom)")} AS 
empty FROM (
+            VALUES
+                ('POINT (1 2)')
+        ) AS t(geom) WHERE false""",
+        None,
+    )
+
+
[email protected]("eng", [SedonaDB, PostGIS])
+def test_st_envelope_agg_single_point(eng):
+    eng = eng.create_or_skip()
+
+    eng.assert_query_result(
+        f"""SELECT {call_st_envelope_agg(eng, "ST_GeomFromText(geom)")} FROM (
+            VALUES ('POINT (5 5)')
+        ) AS t(geom)""",
+        "POINT (5 5)",
+    )
+
+
[email protected]("eng", [SedonaDB, PostGIS])
+def test_st_envelope_agg_collinear_points(eng):
+    eng = eng.create_or_skip()
+
+    eng.assert_query_result(
+        f"""SELECT {call_st_envelope_agg(eng, "ST_GeomFromText(geom)")} FROM (
+            VALUES
+                ('POINT (0 0)'),
+                ('POINT (0 1)'),
+                ('POINT (0 2)')
+        ) AS t(geom)""",
+        "LINESTRING (0 0, 0 2)",
+    )
+
+
[email protected]("eng", [SedonaDB, PostGIS])
+def test_st_envelope_agg_many_groups(eng, con):

Review Comment:
   The test function signature includes parameter `con` but this parameter is 
not provided by the `pytest.mark.parametrize` decorator. This will cause the 
test to fail when attempting to execute. Either add `con` as a pytest fixture 
or remove it if not needed.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to