Dandandan commented on code in PR #15642:
URL: https://github.com/apache/datafusion/pull/15642#discussion_r2033968826


##########
datafusion/functions-aggregate/src/count.rs:
##########
@@ -525,31 +541,52 @@ impl GroupsAccumulator for CountGroupsAccumulator {
         assert_eq!(partial_counts.null_count(), 0);
         let partial_counts = partial_counts.values();
 
+        let num_blocks = total_num_groups / BLOCK_SIZE + 1;
+        let num_values = total_num_groups % BLOCK_SIZE;
+
+        self.counts
+            .resize_with(num_blocks, || Vec::with_capacity(BLOCK_SIZE));
+
+        let prev_blocks = self.counts.len();
+        let new_blocks = num_blocks - prev_blocks;
+
+        self.counts
+            [prev_blocks.saturating_sub(1)..prev_blocks.saturating_sub(1) + 
new_blocks]
+            .iter_mut()
+            .for_each(|block| block.resize(BLOCK_SIZE, 0));
+
+        self.counts[num_blocks - 1].resize_with(num_values, || 0);
+
         // Adds the counts with the partial counts
-        self.counts.resize(total_num_groups, 0);
         group_indices.iter().zip(partial_counts.iter()).for_each(
             |(&group_index, partial_count)| {
-                self.counts[group_index] += partial_count;
+                let block = group_index / BLOCK_SIZE;
+                let index = group_index % BLOCK_SIZE;
+
+                self.counts[block][index] += partial_count;
             },
         );
 
         Ok(())
     }
 
     fn evaluate(&mut self, emit_to: EmitTo) -> Result<ArrayRef> {
-        let counts = emit_to.take_needed(&mut self.counts);
+        let counts = emit_to.take_needed_blocks(&mut self.counts);
 
         // Count is always non null (null inputs just don't contribute to the 
overall values)
-        let nulls = None;
-        let array = PrimitiveArray::<Int64Type>::new(counts.into(), nulls);
+        // TODO: support emitting batches

Review Comment:
   `evaluate` and `state`  could be supported to return `Result<Vec<ArrayRef>>` 
and `Result<Vec<Vec<ArrayRef>>>` although this is making a quite large breaking 
change.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org
For additional commands, e-mail: github-h...@datafusion.apache.org

Reply via email to