sgrebnov commented on code in PR #13132:
URL: https://github.com/apache/datafusion/pull/13132#discussion_r1819442675


##########
datafusion/sql/tests/cases/plan_to_sql.rs:
##########
@@ -1008,6 +1008,93 @@ fn test_sort_with_push_down_fetch() -> Result<()> {
     Ok(())
 }
 
+#[test]
+fn test_join_with_table_scan_filters() -> Result<()> {
+    let schema_left = Schema::new(vec![
+        Field::new("id", DataType::Utf8, false),
+        Field::new("name", DataType::Utf8, false),
+    ]);
+
+    let schema_right = Schema::new(vec![
+        Field::new("id", DataType::Utf8, false),
+        Field::new("age", DataType::Utf8, false),
+    ]);
+
+    let left_plan = table_scan_with_filters(
+        Some("left_table"),
+        &schema_left,
+        None,
+        vec![col("name").like(lit("some_name"))],
+    )?
+    .alias("left")?
+    .build()?;
+
+    let right_plan = table_scan_with_filters(
+        Some("right_table"),
+        &schema_right,
+        None,
+        vec![col("age").gt(lit(10))],
+    )?
+    .build()?;
+
+    let join_plan_with_filter = LogicalPlanBuilder::from(left_plan.clone())
+        .join(
+            right_plan.clone(),
+            datafusion_expr::JoinType::Inner,
+            (vec!["left.id"], vec!["right_table.id"]),
+            Some(col("left.id").gt(lit(5))),
+        )?
+        .build()?;
+
+    let sql = plan_to_sql(&join_plan_with_filter)?;
+
+    let expected_sql = r#"SELECT * FROM left_table AS "left" JOIN right_table 
ON "left".id = right_table.id AND (("left".id > 5) AND ("left"."name" LIKE 
'some_name' AND (age > 10)))"#;
+
+    assert_eq!(sql.to_string(), expected_sql);
+
+    let join_plan_no_filter = LogicalPlanBuilder::from(left_plan.clone())
+        .join(
+            right_plan,
+            datafusion_expr::JoinType::Inner,
+            (vec!["left.id"], vec!["right_table.id"]),
+            None,
+        )?
+        .build()?;
+
+    let sql = plan_to_sql(&join_plan_no_filter)?;
+
+    let expected_sql = r#"SELECT * FROM left_table AS "left" JOIN right_table 
ON "left".id = right_table.id AND ("left"."name" LIKE 'some_name' AND (age > 
10))"#;

Review Comment:
   @goldmedal - thank you for deep review. I suspect that filters were not 
fully pushed down for the right join and full join cases by DF during 
optimization for samples above as two test queries are not exactly the same as 
how records are filtered:
   `on o_custkey = c_custkey where c_name = 'Customer#000000001'  <-- filter is 
applied after join
   `on o_custkey = c_custkey and c_name = 'Customer#000000001'` <-- filtering 
is done during join, will join/include NULL
   
   It seems in all examples above the original `WHERE ` was moved inside Join 
by optimizer (all cases), so optimized plan should be unparsed as below
   
   `select o_orderkey from orders right join (select c_custkey  from customer 
where c_name = 'Customer#000000001') on o_custkey = c_custkey`
   
   or
   
   ```
   select o_orderkey from orders full join customers  on o_custkey = c_custkey 
and c_name = 'Customer#000000001'
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to