Rachelint commented on code in PR #15591: URL: https://github.com/apache/datafusion/pull/15591#discussion_r2072859439
########## datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/prim_op.rs: ########## @@ -198,4 +232,28 @@ where fn size(&self) -> usize { self.values.capacity() * size_of::<T::Native>() + self.null_state.size() } + + fn supports_blocked_groups(&self) -> bool { + true + } + + fn alter_block_size(&mut self, block_size: Option<usize>) -> Result<()> { + self.values.clear(); + self.null_state = NullStateAdapter::new(block_size); + self.block_size = block_size; + + Ok(()) + } +} + +impl<N: ArrowNativeTypeOp> Block for Vec<N> { Review Comment: Addressed. ########## datafusion/physical-plan/src/aggregates/group_values/single_group_by/primitive.rs: ########## @@ -81,28 +85,49 @@ hash_float!(f16, f32, f64); pub struct GroupValuesPrimitive<T: ArrowPrimitiveType> { /// The data type of the output array data_type: DataType, + /// Stores the group index based on the hash of its value /// /// We don't store the hashes as hashing fixed width primitives /// is fast enough for this not to benefit performance - map: HashTable<usize>, + map: HashTable<u64>, + /// The group index of the null value if any - null_group: Option<usize>, + null_group: Option<u64>, + /// The values for each group index - values: Vec<T::Native>, + values: VecDeque<Vec<T::Native>>, + /// The random state used to generate hashes random_state: RandomState, + + /// Block size of current `GroupValues` if exist: + /// - If `None`, it means block optimization is disabled, + /// all `group values`` will be stored in a single `Vec` + /// + /// - If `Some(blk_size)`, it means block optimization is enabled, + /// `group values` will be stored in multiple `Vec`s, and each + /// `Vec` if of `blk_size` len, and we call it a `block` + /// + block_size: Option<usize>, Review Comment: Addressed. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org