eejbyfeldt commented on code in PR #12571:
URL: https://github.com/apache/datafusion/pull/12571#discussion_r1775739407
##########
datafusion/physical-plan/src/aggregates/mod.rs:
##########
@@ -210,13 +221,114 @@ impl PhysicalGroupBy {
.collect()
}
+ /// The number of expressions in the output schema.
+ fn num_output_exprs(&self, mode: &AggregateMode) -> usize {
+ let mut num_exprs = self.expr.len();
+ if !self.is_single() {
+ num_exprs += self.num_internal_exprs;
+ }
+ if *mode != AggregateMode::Partial {
+ num_exprs -= self.num_internal_exprs;
+ }
+ num_exprs
+ }
+
/// Return grouping expressions as they occur in the output schema.
- pub fn output_exprs(&self) -> Vec<Arc<dyn PhysicalExpr>> {
- self.expr
- .iter()
- .enumerate()
- .map(|(index, (_, name))| Arc::new(Column::new(name, index)) as _)
- .collect()
+ pub fn output_exprs(&self, mode: &AggregateMode) -> Vec<Arc<dyn
PhysicalExpr>> {
+ let num_output_exprs = self.num_output_exprs(mode);
+ let mut output_exprs = Vec::with_capacity(num_output_exprs);
+ output_exprs.extend(
+ self.expr
+ .iter()
+ .enumerate()
+ .take(num_output_exprs)
+ .map(|(index, (_, name))| Arc::new(Column::new(name, index))
as _),
+ );
+ if !self.is_single() && *mode == AggregateMode::Partial {
+ output_exprs
+ .push(Arc::new(Column::new(INTERNAL_GROUPING_ID,
self.expr.len())) as _);
+ }
+ output_exprs
+ }
+
+ /// Returns the number expression as grouping keys.
+ fn num_group_exprs(&self) -> usize {
+ if self.is_single() {
+ self.expr.len()
+ } else {
+ self.expr.len() + self.num_internal_exprs
+ }
+ }
+
+ /// Returns the data type of the grouping id.
Review Comment:
Added your comment.
The only reason to implement as a bitmask is if we plan to follow up by
implementing the grouping function
(https://github.com/apache/datafusion/issues/5647) on top of that column.
Otherwise it might be better to make the id just be a sequential number. (It
would actually be better in some ways it would also fix
https://github.com/apache/datafusion/issues/5672)
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]