berkaysynnada commented on code in PR #13805:
URL: https://github.com/apache/datafusion/pull/13805#discussion_r1888115043
##########
datafusion/physical-expr/src/equivalence/properties.rs:
##########
@@ -1828,22 +1831,34 @@ fn calculate_union_binary(
}
// First, calculate valid constants for the union. An expression is
constant
- // at the output of the union if it is constant in both sides.
- let constants: Vec<_> = lhs
+ // at the output of the union if it is constant in both sides with
matching values.
+ let constants = lhs
.constants()
.iter()
- .filter(|const_expr| const_exprs_contains(rhs.constants(),
const_expr.expr()))
- .map(|const_expr| {
- // TODO: When both sides have a constant column, and the actual
- // constant value is the same, then the output properties could
- // reflect the constant is valid across all partitions. However we
- // don't track the actual value that the ConstExpr takes on, so we
- // can't determine that yet
-
ConstExpr::new(Arc::clone(const_expr.expr())).with_across_partitions(false)
+ .filter_map(|lhs_const| {
+ // Find matching constant expression in RHS
+ rhs.constants()
+ .iter()
+ .find(|rhs_const| rhs_const.expr().eq(lhs_const.expr()))
+ .map(|rhs_const| {
+ let mut const_expr =
ConstExpr::new(Arc::clone(lhs_const.expr()));
+
+ // If both sides have matching constant values, preserve
the value and set across_partitions=true
+ if let (Some(lhs_val), Some(rhs_val)) =
+ (lhs_const.value(), rhs_const.value())
+ {
+ if lhs_val == rhs_val {
+ const_expr = const_expr
+ .with_across_partitions(true)
+ .with_value(lhs_val.clone());
+ }
+ }
+ const_expr
+ })
})
- .collect();
+ .collect::<Vec<_>>();
- // remove any constants that are shared in both outputs (avoid double
counting them)
+ // Remove any constants that are shared in both outputs (avoid double
counting them)
for c in &constants {
lhs = lhs.remove_constant(c);
rhs = rhs.remove_constant(c);
Review Comment:
Can you understand why they were exist, and did they become redundant with
this PR? They could do some work which does not appear at the tests. Maybe you
can put some debug_asserts() to ensure we are not double counting (what the
comment says)
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]