gokselk commented on code in PR #13805:
URL: https://github.com/apache/datafusion/pull/13805#discussion_r1888154643
##########
datafusion/physical-expr/src/equivalence/properties.rs:
##########
@@ -1828,22 +1831,34 @@ fn calculate_union_binary(
}
// First, calculate valid constants for the union. An expression is
constant
- // at the output of the union if it is constant in both sides.
- let constants: Vec<_> = lhs
+ // at the output of the union if it is constant in both sides with
matching values.
+ let constants = lhs
.constants()
.iter()
- .filter(|const_expr| const_exprs_contains(rhs.constants(),
const_expr.expr()))
- .map(|const_expr| {
- // TODO: When both sides have a constant column, and the actual
- // constant value is the same, then the output properties could
- // reflect the constant is valid across all partitions. However we
- // don't track the actual value that the ConstExpr takes on, so we
- // can't determine that yet
-
ConstExpr::new(Arc::clone(const_expr.expr())).with_across_partitions(false)
+ .filter_map(|lhs_const| {
+ // Find matching constant expression in RHS
+ rhs.constants()
+ .iter()
+ .find(|rhs_const| rhs_const.expr().eq(lhs_const.expr()))
+ .map(|rhs_const| {
+ let mut const_expr =
ConstExpr::new(Arc::clone(lhs_const.expr()));
+
+ // If both sides have matching constant values, preserve
the value and set across_partitions=true
+ if let (Some(lhs_val), Some(rhs_val)) =
+ (lhs_const.value(), rhs_const.value())
+ {
+ if lhs_val == rhs_val {
+ const_expr = const_expr
+ .with_across_partitions(true)
+ .with_value(lhs_val.clone());
+ }
+ }
+ const_expr
+ })
})
- .collect();
+ .collect::<Vec<_>>();
- // remove any constants that are shared in both outputs (avoid double
counting them)
+ // Remove any constants that are shared in both outputs (avoid double
counting them)
for c in &constants {
lhs = lhs.remove_constant(c);
rhs = rhs.remove_constant(c);
Review Comment:
The constant removal loop was unnecessary even in the original code. The
function already prevents double-counting by:
1. First collecting only the constants that exist in both LHS and RHS into a
filtered `constants` vector
2. Using only this filtered `constants` vector to create the final result
via `with_constants()`
3. While `add_satisfied_orderings()` uses the original constant sets from
LHS and RHS, this is correct because it's only checking if orderings from one
side are satisfied in the other side. Having extra constants in the original
sides doesn't affect this check.
So modifying `lhs` and `rhs` by removing constants has no effect on the
final result, as these modified properties aren't used in any way that would
cause double-counting. The comment about "avoiding double counting" was likely
added as a defensive measure.
##########
datafusion/physical-expr/src/equivalence/properties.rs:
##########
@@ -1828,22 +1831,34 @@ fn calculate_union_binary(
}
// First, calculate valid constants for the union. An expression is
constant
- // at the output of the union if it is constant in both sides.
- let constants: Vec<_> = lhs
+ // at the output of the union if it is constant in both sides with
matching values.
+ let constants = lhs
.constants()
.iter()
- .filter(|const_expr| const_exprs_contains(rhs.constants(),
const_expr.expr()))
- .map(|const_expr| {
- // TODO: When both sides have a constant column, and the actual
- // constant value is the same, then the output properties could
- // reflect the constant is valid across all partitions. However we
- // don't track the actual value that the ConstExpr takes on, so we
- // can't determine that yet
-
ConstExpr::new(Arc::clone(const_expr.expr())).with_across_partitions(false)
+ .filter_map(|lhs_const| {
+ // Find matching constant expression in RHS
+ rhs.constants()
+ .iter()
+ .find(|rhs_const| rhs_const.expr().eq(lhs_const.expr()))
+ .map(|rhs_const| {
+ let mut const_expr =
ConstExpr::new(Arc::clone(lhs_const.expr()));
+
+ // If both sides have matching constant values, preserve
the value and set across_partitions=true
+ if let (Some(lhs_val), Some(rhs_val)) =
+ (lhs_const.value(), rhs_const.value())
+ {
+ if lhs_val == rhs_val {
+ const_expr = const_expr
+ .with_across_partitions(true)
+ .with_value(lhs_val.clone());
+ }
+ }
+ const_expr
+ })
})
- .collect();
+ .collect::<Vec<_>>();
- // remove any constants that are shared in both outputs (avoid double
counting them)
+ // Remove any constants that are shared in both outputs (avoid double
counting them)
for c in &constants {
lhs = lhs.remove_constant(c);
rhs = rhs.remove_constant(c);
Review Comment:
The constant removal loop was unnecessary even in the original code. The
function already prevents double-counting by:
1. First collecting only the constants that exist in both LHS and RHS into a
filtered `constants` vector
2. Using only this filtered `constants` vector to create the final result
via `with_constants()`
3. While `add_satisfied_orderings()` uses the original constant sets from
LHS and RHS, this is correct because it's only checking if orderings from one
side are satisfied in the other side. Having extra constants in the original
sides doesn't affect this check
So modifying `lhs` and `rhs` by removing constants has no effect on the
final result, as these modified properties aren't used in any way that would
cause double-counting. The comment about "avoiding double counting" was likely
added as a defensive measure.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]