alamb commented on code in PR #10963:
URL: https://github.com/apache/datafusion/pull/10963#discussion_r1645016269
##########
datafusion/optimizer/src/propagate_empty_relation.rs:
##########
@@ -400,6 +433,182 @@ mod tests {
assert_together_optimized_plan_eq(plan, expected)
}
+ #[test]
+ fn test_left_join_empty_left_table() -> Result<()> {
+ let left_table_scan = test_table_scan()?;
+ let left = LogicalPlanBuilder::from(left_table_scan)
+ .filter(Expr::Literal(ScalarValue::Boolean(Some(false))))?
+ .build()?;
+
+ let right_table_scan = test_table_scan_with_name("test2")?;
+ let right = LogicalPlanBuilder::from(right_table_scan).build()?;
+
+ let plan = LogicalPlanBuilder::from(left)
+ .join_using(
+ right,
+ JoinType::Left,
+ vec![Column::from_name("a".to_string())],
+ )?
+ .build()?;
+
+ let expected = "EmptyRelation";
+ assert_together_optimized_plan_eq(plan, expected)
+ }
+
+ #[test]
+ fn test_right_join_empty_right_table() -> Result<()> {
+ let left_table_scan = test_table_scan()?;
+ let left = LogicalPlanBuilder::from(left_table_scan).build()?;
+
+ let right_table_scan = test_table_scan_with_name("test2")?;
+ let right = LogicalPlanBuilder::from(right_table_scan)
+ .filter(Expr::Literal(ScalarValue::Boolean(Some(false))))?
+ .build()?;
+
+ let plan = LogicalPlanBuilder::from(left)
+ .join_using(
+ right,
+ JoinType::Right,
+ vec![Column::from_name("a".to_string())],
+ )?
+ .build()?;
+
+ let expected = "EmptyRelation";
+ assert_together_optimized_plan_eq(plan, expected)
+ }
+
+ #[test]
+ fn test_left_semi_join_empty_left_table() -> Result<()> {
+ let left_table_scan = test_table_scan()?;
+ let left = LogicalPlanBuilder::from(left_table_scan)
+ .filter(Expr::Literal(ScalarValue::Boolean(Some(false))))?
+ .build()?;
+
+ let right_table_scan = test_table_scan_with_name("test2")?;
+ let right = LogicalPlanBuilder::from(right_table_scan).build()?;
+
+ let plan = LogicalPlanBuilder::from(left)
+ .join_using(
+ right,
+ JoinType::LeftSemi,
+ vec![Column::from_name("a".to_string())],
+ )?
+ .build()?;
+
+ let expected = "EmptyRelation";
+ assert_together_optimized_plan_eq(plan, expected)
+ }
+
+ #[test]
+ fn test_left_semi_join_empty_right_table() -> Result<()> {
+ let left_table_scan = test_table_scan()?;
+ let left = LogicalPlanBuilder::from(left_table_scan).build()?;
+
+ let right_table_scan = test_table_scan_with_name("test2")?;
+ let right = LogicalPlanBuilder::from(right_table_scan)
+ .filter(Expr::Literal(ScalarValue::Boolean(Some(false))))?
+ .build()?;
+
+ let plan = LogicalPlanBuilder::from(left)
+ .join_using(
+ right,
+ JoinType::LeftSemi,
+ vec![Column::from_name("a".to_string())],
+ )?
+ .build()?;
+
+ let expected = "EmptyRelation";
+ assert_together_optimized_plan_eq(plan, expected)
+ }
+
+ #[test]
+ fn test_right_semi_join_empty_right_table() -> Result<()> {
+ let left_table_scan = test_table_scan()?;
+ let left = LogicalPlanBuilder::from(left_table_scan).build()?;
+
+ let right_table_scan = test_table_scan_with_name("test2")?;
+ let right = LogicalPlanBuilder::from(right_table_scan)
+ .filter(Expr::Literal(ScalarValue::Boolean(Some(false))))?
+ .build()?;
+
+ let plan = LogicalPlanBuilder::from(left)
+ .join_using(
+ right,
+ JoinType::RightSemi,
+ vec![Column::from_name("a".to_string())],
+ )?
+ .build()?;
+
+ let expected = "EmptyRelation";
+ assert_together_optimized_plan_eq(plan, expected)
+ }
+
+ #[test]
+ fn test_right_semi_join_empty_left_table() -> Result<()> {
+ let left_table_scan = test_table_scan()?;
+ let left = LogicalPlanBuilder::from(left_table_scan)
+ .filter(Expr::Literal(ScalarValue::Boolean(Some(false))))?
+ .build()?;
+
+ let right_table_scan = test_table_scan_with_name("test2")?;
+ let right = LogicalPlanBuilder::from(right_table_scan).build()?;
+
+ let plan = LogicalPlanBuilder::from(left)
+ .join_using(
+ right,
+ JoinType::RightSemi,
+ vec![Column::from_name("a".to_string())],
+ )?
+ .build()?;
+
+ let expected = "EmptyRelation";
+ assert_together_optimized_plan_eq(plan, expected)
+ }
+
+ #[test]
+ fn test_right_anti_join_empty_right_table() -> Result<()> {
Review Comment:
Can you please also add the negative cases ? Like test right
antijoin_empty_left_table and assert it wasn't rewritten?
##########
datafusion/optimizer/src/propagate_empty_relation.rs:
##########
@@ -400,6 +433,182 @@ mod tests {
assert_together_optimized_plan_eq(plan, expected)
}
+ #[test]
Review Comment:
I think having some end to end slt would be good too, but for these rules I
think having good unit tests are key as well
##########
datafusion/optimizer/src/propagate_empty_relation.rs:
##########
@@ -94,29 +94,62 @@ impl OptimizerRule for PropagateEmptyRelation {
Ok(Transformed::no(LogicalPlan::CrossJoin(join.clone())))
}
- LogicalPlan::Join(ref join) if join.join_type == Inner => {
+ LogicalPlan::Join(ref join) => {
// TODO: For Join, more join type need to be careful:
- // For LeftOuter/LeftSemi/LeftAnti Join, only the left side is
empty, the Join result is empty.
- // For LeftSemi Join, if the right side is empty, the Join
result is empty.
// For LeftAnti Join, if the right side is empty, the Join
result is left side(should exclude null ??).
- // For RightOuter/RightSemi/RightAnti Join, only the right
side is empty, the Join result is empty.
- // For RightSemi Join, if the left side is empty, the Join
result is empty.
// For RightAnti Join, if the left side is empty, the Join
result is right side(should exclude null ??).
// For Full Join, only both sides are empty, the Join result
is empty.
// For LeftOut/Full Join, if the right side is empty, the Join
can be eliminated with a Projection with left side
// columns + right side columns replaced with null values.
// For RightOut/Full Join, if the left side is empty, the Join
can be eliminated with a Projection with right side
// columns + left side columns replaced with null values.
let (left_empty, right_empty) =
binary_plan_children_is_empty(&plan)?;
- if left_empty || right_empty {
- return Ok(Transformed::yes(LogicalPlan::EmptyRelation(
- EmptyRelation {
+
Review Comment:
I double checked that this code faithfully implements the semantics in the
comments, and I did some mental review and verification to convince myself that
these semantics are correct
i wonder if @jackwener has a moment to double check too?
##########
datafusion/sqllogictest/test_files/subquery.slt:
##########
@@ -613,10 +613,7 @@ SELECT t1_id, t1_name FROM t1 WHERE EXISTS (SELECT * FROM
t2 WHERE t2_id = t1_id
query TT
explain SELECT t1_id, t1_name FROM t1 WHERE EXISTS (SELECT * FROM t2 WHERE
t2_id = t1_id limit 0)
----
-logical_plan
-01)LeftSemi Join: t1.t1_id = __correlated_sq_1.t2_id
-02)--TableScan: t1 projection=[t1_id, t1_name]
-03)--EmptyRelation
+logical_plan EmptyRelation
Review Comment:
> causing the entire join to be optimized out with the additional join types
now propagating emptyrelation.
I agree with your assesment. I think the `limit 0` means this plan is
better
In terms of preserving the test of the subquery rewrite I think it does
(otherwise it wouldn't be rewritten to a SEMI JOIN and thus the join couldn't
be removed)
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]