Dandandan commented on code in PR #15642: URL: https://github.com/apache/datafusion/pull/15642#discussion_r2033968826
########## datafusion/functions-aggregate/src/count.rs: ########## @@ -525,31 +541,52 @@ impl GroupsAccumulator for CountGroupsAccumulator { assert_eq!(partial_counts.null_count(), 0); let partial_counts = partial_counts.values(); + let num_blocks = total_num_groups / BLOCK_SIZE + 1; + let num_values = total_num_groups % BLOCK_SIZE; + + self.counts + .resize_with(num_blocks, || Vec::with_capacity(BLOCK_SIZE)); + + let prev_blocks = self.counts.len(); + let new_blocks = num_blocks - prev_blocks; + + self.counts + [prev_blocks.saturating_sub(1)..prev_blocks.saturating_sub(1) + new_blocks] + .iter_mut() + .for_each(|block| block.resize(BLOCK_SIZE, 0)); + + self.counts[num_blocks - 1].resize_with(num_values, || 0); + // Adds the counts with the partial counts - self.counts.resize(total_num_groups, 0); group_indices.iter().zip(partial_counts.iter()).for_each( |(&group_index, partial_count)| { - self.counts[group_index] += partial_count; + let block = group_index / BLOCK_SIZE; + let index = group_index % BLOCK_SIZE; + + self.counts[block][index] += partial_count; }, ); Ok(()) } fn evaluate(&mut self, emit_to: EmitTo) -> Result<ArrayRef> { - let counts = emit_to.take_needed(&mut self.counts); + let counts = emit_to.take_needed_blocks(&mut self.counts); // Count is always non null (null inputs just don't contribute to the overall values) - let nulls = None; - let array = PrimitiveArray::<Int64Type>::new(counts.into(), nulls); + // TODO: support emitting batches Review Comment: `evaluate` and `state` could be supported to return `Result<Vec<ArrayRef>>` and `Result<Vec<Vec<ArrayRef>>>` although this is making a quite large breaking change. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org