petern48 commented on code in PR #17835:
URL: https://github.com/apache/datafusion/pull/17835#discussion_r2391846955
##########
datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs:
##########
@@ -1436,33 +1436,59 @@ impl<S: SimplifyInfo> TreeNodeRewriter for
Simplifier<'_, S> {
// CASE WHEN true THEN A ... END --> A
// CASE WHEN X THEN A WHEN TRUE THEN B ... END --> CASE WHEN X
THEN A ELSE B END
+ // CASE WHEN false THEN A END --> NULL
+ // CASE WHEN false THEN A ELSE B END --> B
+ // CASE WHEN X THEN A WHEN false THEN B END --> CASE WHEN X THEN A
ELSE B END
Expr::Case(Case {
expr: None,
mut when_then_expr,
- else_expr: _,
+ mut else_expr,
// if let guard is not stabilized so we can't use it yet:
https://github.com/rust-lang/rust/issues/51114
// Once it's supported we can avoid searching through
when_then_expr twice in the below .any() and .position() calls
// }) if let Some(i) = when_then_expr.iter().position(|(when,
_)| is_true(when.as_ref())) => {
}) if when_then_expr
.iter()
- .any(|(when, _)| is_true(when.as_ref())) =>
+ .any(|(when, _)| is_true(when.as_ref()) ||
is_false(when.as_ref())) =>
{
- let i = when_then_expr
- .iter()
- .position(|(when, _)| is_true(when.as_ref()))
- .unwrap();
- let (_, then_) = when_then_expr.swap_remove(i);
- // CASE WHEN true THEN A ... END --> A
- if i == 0 {
- return Ok(Transformed::yes(*then_));
+ let mut remove_indices =
Vec::with_capacity(when_then_expr.len());
+ let out_type = info.get_data_type(&when_then_expr[0].1)?;
+ for (i, (when, _)) in when_then_expr.iter().enumerate() {
+ if is_true(when.as_ref()) {
+ let (_, then_) = when_then_expr.swap_remove(i);
+
+ // CASE WHEN X THEN A WHEN TRUE THEN B ... END -->
CASE WHEN X THEN A ELSE B END
+ when_then_expr.truncate(i);
+ else_expr = Some(then_);
+ break;
+ }
+ // CASE WHEN false THEN A
+ if is_false(when.as_ref()) {
+ remove_indices.push(i);
+ }
+ }
+
+ // Remove any CASE false statements
+ for i in remove_indices.iter().rev() {
+ when_then_expr.remove(*i);
+ }
Review Comment:
I changed the logic to instead move all valid entries to a new
`new_when_then_expr` vector. Do you think this is better? I figured it should
at least be more predictable, since it's at most O(n) moves. Whereas the old
deletion logic can end up involving multiple O(n) left shifts. I believe that
would technically be O(n^2).
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]