Dandandan commented on code in PR #13249:
URL: https://github.com/apache/datafusion/pull/13249#discussion_r1827792383


##########
datafusion/optimizer/src/eliminate_outer_join.rs:
##########
@@ -56,6 +59,138 @@ impl EliminateOuterJoin {
     pub fn new() -> Self {
         Self {}
     }
+
+    fn eliminate_outer_join(
+        &self,
+        plan: LogicalPlan,
+    ) -> Result<Transformed<LogicalPlan>> {
+        let LogicalPlan::Filter(mut filter) = plan else {
+            return Ok(Transformed::no(plan));
+        };
+
+        let mut join = match Arc::unwrap_or_clone(filter.input) {
+            LogicalPlan::Join(join) if join.join_type.is_outer() => join,
+            other_input => {
+                filter.input = Arc::new(other_input);
+                return Ok(Transformed::no(LogicalPlan::Filter(filter)));
+            }
+        };
+
+        let join_type = join.join_type;
+        let predicates = split_conjunction_owned(filter.predicate.clone());
+        let left_columns = schema_columns(join.left.schema());
+        let right_columns = schema_columns(join.right.schema());
+        let join_schema = &join.schema;
+
+        let new_join_type = match join_type {
+            JoinType::Left => {
+                eliminate_left_outer_join(predicates, &right_columns, 
join_schema)
+            }
+            JoinType::Right => {
+                eliminate_right_outer_join(predicates, &left_columns, 
join_schema)
+            }
+            JoinType::Full => eliminate_full_outer_join(
+                predicates,
+                &left_columns,
+                &right_columns,
+                join_schema,
+            ),
+            others => unreachable!("{}", others),
+        };
+
+        join.join_type = new_join_type;
+        filter.input = Arc::new(LogicalPlan::Join(join));
+
+        Ok(Transformed::yes(LogicalPlan::Filter(filter)))
+    }
+}
+
+fn eliminate_left_outer_join(
+    predicates: Vec<Expr>,
+    right_columns: &HashSet<Column>,
+    join_schema: &DFSchema,
+) -> JoinType {
+    for predicate in predicates {
+        let columns = predicate.column_refs();
+        if has_bound_filter(&predicate, &columns, right_columns, join_schema) {
+            return JoinType::Inner;
+        }
+    }
+    JoinType::Left
+}
+
+fn eliminate_right_outer_join(
+    predicates: Vec<Expr>,
+    left_columns: &HashSet<Column>,
+    join_schema: &DFSchema,
+) -> JoinType {
+    for predicate in predicates {
+        let columns = predicate.column_refs();
+        if has_bound_filter(&predicate, &columns, left_columns, join_schema) {
+            return JoinType::Inner;
+        }
+    }
+    JoinType::Right
+}
+
+fn eliminate_full_outer_join(
+    predicates: Vec<Expr>,
+    left_columns: &HashSet<Column>,
+    right_columns: &HashSet<Column>,
+    join_schema: &DFSchema,
+) -> JoinType {
+    let mut left_exist = false;
+    let mut right_exist = false;
+    for predicate in predicates {
+        let columns = predicate.column_refs();
+
+        if !left_exist
+            && has_bound_filter(&predicate, &columns, left_columns, 
join_schema)
+        {
+            left_exist = true;
+        }
+
+        if !right_exist
+            && has_bound_filter(&predicate, &columns, right_columns, 
join_schema)
+        {
+            right_exist = true;
+        }
+
+        if left_exist && right_exist {
+            break;
+        }
+    }
+
+    if left_exist && right_exist {
+        JoinType::Inner
+    } else if left_exist {
+        JoinType::Left
+    } else if right_exist {
+        JoinType::Right
+    } else {
+        JoinType::Full
+    }
+}
+
+fn has_bound_filter(
+    predicate: &Expr,
+    predicate_columns: &HashSet<&Column>,
+    child_schema_columns: &HashSet<Column>,
+    join_schema: &DFSchema,
+) -> bool {
+    let predicate_cloned = predicate.clone();
+    let cols = predicate_columns
+        .iter()
+        .filter(|col| child_schema_columns.contains(*col))
+        .cloned();
+    is_restrict_null_predicate(join_schema, predicate_cloned, 
cols).unwrap_or(false)

Review Comment:
   Interesting find that this was already available 🥳 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org
For additional commands, e-mail: github-h...@datafusion.apache.org

Reply via email to