alamb commented on code in PR #14474: URL: https://github.com/apache/datafusion/pull/14474#discussion_r1942586153
########## datafusion/sqllogictest/test_files/subquery.slt: ########## @@ -389,19 +389,25 @@ where o_orderstatus in ( 2 3 +# uncorrelated exists +query I +SELECT 1 WHERE EXISTS (SELECT 1) +---- +1 + #exists_subquery_with_same_table #Subquery and outer query refer to the same table. #It will not be rewritten to join because it is not a correlated subquery. query TT explain SELECT t1_id, t1_name, t1_int FROM t1 WHERE EXISTS(SELECT t1_int FROM t1 WHERE t1.t1_id > t1.t1_int) ---- logical_plan -01)Filter: EXISTS (<subquery>) -02)--Subquery: -03)----Projection: t1.t1_int -04)------Filter: t1.t1_int < t1.t1_id -05)--------TableScan: t1 -06)--TableScan: t1 projection=[t1_id, t1_name, t1_int] +01)LeftSemi Join: Review Comment: 👍 this plan looks good to me ########## datafusion/expr/src/logical_plan/invariants.rs: ########## @@ -272,26 +267,34 @@ fn check_inner_plan(inner_plan: &LogicalPlan, can_contain_outer_ref: bool) -> Re | JoinType::LeftSemi | JoinType::LeftAnti | JoinType::LeftMark => { - check_inner_plan(left, can_contain_outer_ref)?; - check_inner_plan(right, false) + check_inner_plan(left)?; + check_no_outer_references(right) } JoinType::Right | JoinType::RightSemi | JoinType::RightAnti => { - check_inner_plan(left, false)?; - check_inner_plan(right, can_contain_outer_ref) + check_no_outer_references(left)?; + check_inner_plan(right) } JoinType::Full => { inner_plan.apply_children(|plan| { - check_inner_plan(plan, false)?; + check_no_outer_references(plan)?; Ok(TreeNodeRecursion::Continue) })?; Ok(()) } }, LogicalPlan::Extension(_) => Ok(()), - _ => plan_err!( - "Unsupported operator in the subquery plan: {}", + plan => check_no_outer_references(plan), + } +} + +fn check_no_outer_references(inner_plan: &LogicalPlan) -> Result<()> { + if inner_plan.contains_outer_reference() { + plan_err!( + "Accessing outer reference columns is not allowed in the plan: {}", Review Comment: (although I see this check is simply moved around in this PR, not introduced) It would also be great if you were able to add a (negative) test for such queries ########## datafusion/optimizer/src/decorrelate_predicate_subquery.rs: ########## @@ -1844,6 +1864,69 @@ mod tests { assert_optimized_plan_equal(plan, expected) } + #[test] + fn exists_uncorrelated_unnest() -> Result<()> { + let subquery_table_source = table_source(&Schema::new(vec![Field::new( + "arr", + DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))), + true, + )])); + let subquery = LogicalPlanBuilder::scan_with_filters( + "sq", + subquery_table_source, + None, + vec![], + )? + .unnest_column("arr")? + .build()?; + let table_scan = test_table_scan()?; + let plan = LogicalPlanBuilder::from(table_scan) + .filter(exists(Arc::new(subquery)))? + .project(vec![col("test.b")])? + .build()?; + + let expected = "Projection: test.b [b:UInt32]\ + \n LeftSemi Join: Filter: Boolean(true) [a:UInt32, b:UInt32, c:UInt32]\ + \n TableScan: test [a:UInt32, b:UInt32, c:UInt32]\ + \n SubqueryAlias: __correlated_sq_1 [arr:Int32;N]\ + \n Unnest: lists[sq.arr|depth=1] structs[] [arr:Int32;N]\ + \n TableScan: sq [arr:List(Field { name: \"item\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} });N]"; + assert_optimized_plan_equal(plan, expected) + } + + #[test] + fn exists_correlated_unnest() -> Result<()> { + eprintln!("test start: exists_correlated_unnest"); Review Comment: not sure if you meant to leave this in or not ########## datafusion/expr/src/logical_plan/invariants.rs: ########## @@ -272,26 +267,34 @@ fn check_inner_plan(inner_plan: &LogicalPlan, can_contain_outer_ref: bool) -> Re | JoinType::LeftSemi | JoinType::LeftAnti | JoinType::LeftMark => { - check_inner_plan(left, can_contain_outer_ref)?; - check_inner_plan(right, false) + check_inner_plan(left)?; + check_no_outer_references(right) } JoinType::Right | JoinType::RightSemi | JoinType::RightAnti => { - check_inner_plan(left, false)?; - check_inner_plan(right, can_contain_outer_ref) + check_no_outer_references(left)?; + check_inner_plan(right) } JoinType::Full => { inner_plan.apply_children(|plan| { - check_inner_plan(plan, false)?; + check_no_outer_references(plan)?; Ok(TreeNodeRecursion::Continue) })?; Ok(()) } }, LogicalPlan::Extension(_) => Ok(()), - _ => plan_err!( - "Unsupported operator in the subquery plan: {}", + plan => check_no_outer_references(plan), + } +} + +fn check_no_outer_references(inner_plan: &LogicalPlan) -> Result<()> { + if inner_plan.contains_outer_reference() { + plan_err!( + "Accessing outer reference columns is not allowed in the plan: {}", Review Comment: I thought it was possible (in theory) to have a correlated subuery in Exists (which would access outder columns) So in this case, the error might be more clear as a `not_impl_err!` with "Referencing outer columns (correlated queries) not yet supported: {}" or something -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org