adriangb commented on code in PR #21623:
URL: https://github.com/apache/datafusion/pull/21623#discussion_r3142620864
##########
datafusion/physical-expr/src/expressions/binary.rs:
##########
@@ -604,6 +604,177 @@ impl PhysicalExpr for BinaryExpr {
write!(f, " {} ", self.op)?;
write_child(f, self.right.as_ref(), precedence)
}
+
+ fn is_null(&self, null_columns: &std::collections::HashSet<usize>) ->
Option<bool> {
+ match self.op {
+ // Comparisons and arithmetic: NULL if either child is NULL
+ Operator::Eq
+ | Operator::NotEq
+ | Operator::Lt
+ | Operator::LtEq
+ | Operator::Gt
+ | Operator::GtEq
+ | Operator::Plus
+ | Operator::Minus
+ | Operator::Multiply
+ | Operator::Divide
+ | Operator::Modulo
+ | Operator::StringConcat
+ | Operator::LikeMatch
+ | Operator::ILikeMatch
+ | Operator::NotLikeMatch
+ | Operator::NotILikeMatch
+ | Operator::RegexMatch
+ | Operator::RegexIMatch
+ | Operator::RegexNotMatch
+ | Operator::RegexNotIMatch
+ | Operator::BitwiseAnd
+ | Operator::BitwiseOr
+ | Operator::BitwiseXor
+ | Operator::BitwiseShiftLeft
+ | Operator::BitwiseShiftRight
+ | Operator::AtArrow
+ | Operator::ArrowAt => {
+ match (
+ self.left.is_null(null_columns),
+ self.right.is_null(null_columns),
+ ) {
+ (Some(true), _) | (_, Some(true)) => Some(true),
+ (Some(false), Some(false)) => Some(false),
+ _ => None,
+ }
+ }
+ // IS DISTINCT FROM / IS NOT DISTINCT FROM never return NULL
+ Operator::IsDistinctFrom | Operator::IsNotDistinctFrom =>
Some(false),
+ // AND: NULL only when neither side is FALSE and at least one is
NULL
+ // FALSE AND NULL = FALSE (not null)
Review Comment:
worth adding:
```suggestion
// FALSE AND NULL = FALSE (not null)
// NULL AND FALSE = FALSE (not null)
```
##########
datafusion/physical-expr-common/src/physical_expr.rs:
##########
@@ -438,6 +439,32 @@ pub trait PhysicalExpr: Any + Send + Sync + Display +
Debug + DynEq + DynHash {
fn placement(&self) -> ExpressionPlacement {
ExpressionPlacement::KeepInPlace
}
+
+ /// Returns whether this expression is guaranteed to evaluate to `NULL`
+ /// when all columns in `null_columns` are `NULL`.
+ ///
+ /// `null_columns` contains the column indices (in the input schema) that
+ /// are assumed to be `NULL`.
+ ///
+ /// - `Some(true)`: definitely evaluates to `NULL`
+ /// - `Some(false)`: definitely does NOT evaluate to `NULL`
+ /// - `None`: unknown (conservative default)
+ fn is_null(&self, _null_columns: &HashSet<usize>) -> Option<bool> {
+ None
+ }
+
+ /// Returns whether this expression is guaranteed to be not-true
+ /// (i.e., evaluates to `NULL` or `FALSE`).
+ ///
+ /// `null_columns` contains the column indices (in the input schema) that
+ /// are assumed to be `NULL`.
+ ///
+ /// - `Some(true)`: definitely evaluates to `NULL` or `FALSE`
Review Comment:
```suggestion
/// - `Some(true)`: some rows may evaluate `NULL` or `FALSE`
```
I assume the point is not that this expression is *always* `NULL`/`FALSE`
for any input, rather that it *may* sometimes be `NULL/FALSE`?
If this distinction may be useful, should we make an enum?
```rust
enum IsFalsy {
/// Falsy (`false` or `null`) for any input
Always,
/// May be `false` or `null` for some inputs but not others.
/// This is the conservative default.
Sometimes,
/// Never falsy
Never,
}
```
These are the same 3 states but more explicitly spelled out IMO.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]