Rachelint commented on code in PR #15591:
URL: https://github.com/apache/datafusion/pull/15591#discussion_r2072859439


##########
datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/prim_op.rs:
##########
@@ -198,4 +232,28 @@ where
     fn size(&self) -> usize {
         self.values.capacity() * size_of::<T::Native>() + 
self.null_state.size()
     }
+
+    fn supports_blocked_groups(&self) -> bool {
+        true
+    }
+
+    fn alter_block_size(&mut self, block_size: Option<usize>) -> Result<()> {
+        self.values.clear();
+        self.null_state = NullStateAdapter::new(block_size);
+        self.block_size = block_size;
+
+        Ok(())
+    }
+}
+
+impl<N: ArrowNativeTypeOp> Block for Vec<N> {

Review Comment:
   Addressed.



##########
datafusion/physical-plan/src/aggregates/group_values/single_group_by/primitive.rs:
##########
@@ -81,28 +85,49 @@ hash_float!(f16, f32, f64);
 pub struct GroupValuesPrimitive<T: ArrowPrimitiveType> {
     /// The data type of the output array
     data_type: DataType,
+
     /// Stores the group index based on the hash of its value
     ///
     /// We don't store the hashes as hashing fixed width primitives
     /// is fast enough for this not to benefit performance
-    map: HashTable<usize>,
+    map: HashTable<u64>,
+
     /// The group index of the null value if any
-    null_group: Option<usize>,
+    null_group: Option<u64>,
+
     /// The values for each group index
-    values: Vec<T::Native>,
+    values: VecDeque<Vec<T::Native>>,
+
     /// The random state used to generate hashes
     random_state: RandomState,
+
+    /// Block size of current `GroupValues` if exist:
+    ///   - If `None`, it means block optimization is disabled,
+    ///     all `group values`` will be stored in a single `Vec`
+    ///
+    ///   - If `Some(blk_size)`, it means block optimization is enabled,
+    ///     `group values` will be stored in multiple `Vec`s, and each
+    ///     `Vec` if of `blk_size` len, and we call it a `block`
+    ///
+    block_size: Option<usize>,

Review Comment:
   Addressed.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org
For additional commands, e-mail: github-h...@datafusion.apache.org

Reply via email to