2010YOUY01 commented on code in PR #17319: URL: https://github.com/apache/datafusion/pull/17319#discussion_r2306871250
########## datafusion/optimizer/src/extract_equijoin_predicate.rs: ########## @@ -112,22 +151,82 @@ impl OptimizerRule for ExtractEquijoinPredicate { } } +/// Splits an ANDed filter expression into equijoin predicates and remaining filters. +/// Returns all equijoin predicates and the remaining filters combined with AND. +/// +/// # Example +/// +/// For the expression `a.id = b.id AND a.x > 10 AND b.x > b.id`, this function will extract `a.id = b.id` as an equijoin predicate. +/// +/// It first splits the ANDed sub-expressions: +/// - expr1: a.id = b.id +/// - expr2: a.x > 10 +/// - expr3: b.x > b.id +/// +/// Then, it filters out the equijoin predicates and collects the non-equality expressions. +/// The equijoin condition is: +/// - It is an equality expression like `lhs == rhs` +/// - All column references in `lhs` are from the left schema, and all in `rhs` are from the right schema +/// +/// According to the above rule, `expr1` is the equijoin predicate, while `expr2` and `expr3` are not. +/// The function returns Ok(\[expr1\], Some(expr2 AND expr3)) fn split_eq_and_noneq_join_predicate( filter: Expr, left_schema: &DFSchema, right_schema: &DFSchema, ) -> Result<(Vec<EquijoinPredicate>, Option<Expr>)> { + split_op_and_other_join_predicates(filter, left_schema, right_schema, Operator::Eq) +} + +/// See `split_eq_and_noneq_join_predicate`'s comment for the idea. This function +/// is splitting out `is not distinct from` expressions instead of equal exprs. +/// The `is not distinct from` exprs will be return as `EquijoinPredicate`. +/// +/// # Example +/// - Input: `a.id IS NOT DISTINCT FROM b.id AND a.x > 10 AND b.x > b.id` +/// - Output: Ok([a.id IS NOT DISTINCT FROM b.id], Some((a.x > 10) AND (b.x > b.id))) +/// +/// # Note +/// Caller should be cautious -- `is not distinct from` is not equivalent to +/// equal expression, caller should be responsible for correctly set the `nulls +/// equals nulls` properties in the operator (if it supports), to make the +/// transformation valid. +fn split_is_not_distinct_from_and_other_join_predicate( + filter: Expr, + left_schema: &DFSchema, + right_schema: &DFSchema, +) -> Result<(Vec<EquijoinPredicate>, Option<Expr>)> { + split_op_and_other_join_predicates( + filter, + left_schema, + right_schema, + Operator::IsNotDistinctFrom, + ) +} + +/// See comments in `split_eq_and_noneq_join_predicate` for details. +fn split_op_and_other_join_predicates( + filter: Expr, + left_schema: &DFSchema, + right_schema: &DFSchema, + operator: Operator, +) -> Result<(Vec<EquijoinPredicate>, Option<Expr>)> { + if !matches!(operator, Operator::Eq | Operator::IsNotDistinctFrom) { + return internal_err!("This function should only be used to split equal or 'IS NOT DISTINCT FROM' operator"); + } + let exprs = split_conjunction_owned(filter); + // Treat 'is not distinct from' comparision as join key in equal joins Review Comment: ```suggestion // Treat 'is not distinct from' comparison as join key in equal joins ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org