buraksenn commented on code in PR #21265:
URL: https://github.com/apache/datafusion/pull/21265#discussion_r3038715442
##########
datafusion/optimizer/src/optimize_projections/mod.rs:
##########
@@ -2311,6 +2320,56 @@ mod tests {
)
}
+ #[test]
+ fn optimize_projections_left_mark_join_with_outer_join() -> Result<()> {
+ use datafusion_expr::utils::disjunction;
+ use datafusion_expr::{exists, out_ref_col};
+
+ let table_a = test_table_scan_with_name("a")?;
+ let table_b = test_table_scan_with_name("b")?;
+
+ let sq_a = Arc::new(
+ LogicalPlanBuilder::from(test_table_scan_with_name("sq_a")?)
+ .filter(col("sq_a.a").eq(out_ref_col(DataType::UInt32,
"a.a")))?
+ .project(vec![lit(1)])?
+ .build()?,
+ );
+
+ let sq_b = Arc::new(
+ LogicalPlanBuilder::from(test_table_scan_with_name("sq_b")?)
+ .filter(col("sq_b.b").eq(out_ref_col(DataType::UInt32,
"a.b")))?
+ .project(vec![lit(1)])?
+ .build()?,
+ );
+
+ let exists_a = exists(sq_a);
+ let exists_b = exists(sq_b);
+
+ let plan = LogicalPlanBuilder::from(table_a)
+ .filter(disjunction(vec![exists_a, exists_b]).unwrap())?
+ .join(table_b, JoinType::Left, (vec!["a"], vec!["a"]), None)?
+ .build()?;
+
+ let optimizer = Optimizer::new();
+ let config = OptimizerContext::new();
+ let result = optimizer.optimize(plan, &config, observe);
+ assert!(
+ result.is_ok(),
+ "Full optimizer should not fail with schema mismatch: {:?}",
Review Comment:
I've added a test for just make sure full optimizer runs successfully as a
regression test instead of this. Moreover, I've modified the other test to use
`assert_optimized_plan_equal` and check final plan.
##########
datafusion/optimizer/src/optimize_projections/mod.rs:
##########
@@ -2311,6 +2320,56 @@ mod tests {
)
}
+ #[test]
+ fn optimize_projections_left_mark_join_with_outer_join() -> Result<()> {
+ use datafusion_expr::utils::disjunction;
+ use datafusion_expr::{exists, out_ref_col};
+
+ let table_a = test_table_scan_with_name("a")?;
+ let table_b = test_table_scan_with_name("b")?;
+
+ let sq_a = Arc::new(
+ LogicalPlanBuilder::from(test_table_scan_with_name("sq_a")?)
+ .filter(col("sq_a.a").eq(out_ref_col(DataType::UInt32,
"a.a")))?
+ .project(vec![lit(1)])?
+ .build()?,
+ );
+
+ let sq_b = Arc::new(
+ LogicalPlanBuilder::from(test_table_scan_with_name("sq_b")?)
+ .filter(col("sq_b.b").eq(out_ref_col(DataType::UInt32,
"a.b")))?
+ .project(vec![lit(1)])?
+ .build()?,
+ );
+
+ let exists_a = exists(sq_a);
+ let exists_b = exists(sq_b);
+
+ let plan = LogicalPlanBuilder::from(table_a)
+ .filter(disjunction(vec![exists_a, exists_b]).unwrap())?
+ .join(table_b, JoinType::Left, (vec!["a"], vec!["a"]), None)?
+ .build()?;
+
+ let optimizer = Optimizer::new();
+ let config = OptimizerContext::new();
+ let result = optimizer.optimize(plan, &config, observe);
+ assert!(
+ result.is_ok(),
+ "Full optimizer should not fail with schema mismatch: {:?}",
+ result.err()
+ );
+ let optimized = result.unwrap();
+ let plan_str = format!("{optimized}");
+ // Verify no double projection — the projection we add to strip mark
+ // columns should be merged by optimize_projections, not left stacked.
+ assert!(
+ !plan_str.contains("Projection: a.a, a.b, a.c\n
Projection:"),
Review Comment:
> I've added a test for just make sure full optimizer runs successfully as a
regression test instead of this. Moreover, I've modified the other test to use
`assert_optimized_plan_equal` and check final plan.
replied above. Thanks it makes sense
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]