Jefffrey commented on code in PR #22967:
URL: https://github.com/apache/datafusion/pull/22967#discussion_r3418427514
##########
datafusion/sql/Cargo.toml:
##########
@@ -56,7 +56,6 @@ bigdecimal = { workspace = true }
chrono = { workspace = true }
datafusion-common = { workspace = true, features = ["sql"] }
datafusion-expr = { workspace = true, features = ["sql"] }
-datafusion-functions-nested = { workspace = true, features = ["sql"] }
Review Comment:
main benefit here
##########
datafusion/functions-nested/src/planner.rs:
##########
@@ -120,6 +124,119 @@ impl ExprPlanner for NestedFunctionPlanner {
ScalarFunction::new_udf(map_udf(), vec![keys, values]),
)))
}
+
+ fn plan_any(&self, args: RawAnyAllExpr) ->
Result<PlannerResult<RawAnyAllExpr>> {
Review Comment:
this implementation code is copied essentially verbatim from
`sql/src/expr/mod.rs`; the only modifications being some renaming of variables
(for `any` it called them left/right which are now aligned to needle/haystack)
and some plumbing to make it work with the signatures of `plan_any`/`plan_all`
##########
datafusion/sql/src/expr/mod.rs:
##########
@@ -615,9 +613,24 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
planner_context,
),
_ => {
- let left_expr = self.sql_to_expr(*left, schema,
planner_context)?;
- let right_expr = self.sql_to_expr(*right, schema,
planner_context)?;
- plan_any_op(left_expr, right_expr, &compare_op)
+ let needle = self.sql_to_expr(*left, schema,
planner_context)?;
+ let haystack = self.sql_to_expr(*right, schema,
planner_context)?;
Review Comment:
one part i'm not sure about was whether to include this subquery case as
part of this `plan_any`/`plan_all` call; maybe its better to just name these as
`plan_any_nested`/`plan_all_nested` to differentiate it from the subquery
planning, which we'll keep in `datafusion-sql`?
##########
datafusion/functions-nested/src/planner.rs:
##########
@@ -120,6 +124,119 @@ impl ExprPlanner for NestedFunctionPlanner {
ScalarFunction::new_udf(map_udf(), vec![keys, values]),
)))
}
+
+ fn plan_any(&self, args: RawAnyAllExpr) ->
Result<PlannerResult<RawAnyAllExpr>> {
+ let RawAnyAllExpr {
+ op,
+ needle,
+ haystack,
+ } = args;
+ let expr = match op {
+ BinaryOperator::Eq => Ok(array_has(haystack, needle)),
+ BinaryOperator::NotEq => {
+ let min = array_min(haystack.clone());
+ let max = array_max(haystack.clone());
+ // NOT EQ is true when either bound differs from left
+ let comparison =
+ min.not_eq(needle.clone()).or(max.clone().not_eq(needle));
+ any_op_with_null_handling(max, comparison, haystack)
+ }
+ BinaryOperator::Gt => {
+ let min = array_min(haystack.clone());
+ any_op_with_null_handling(min.clone(), min.lt(needle),
haystack)
+ }
+ BinaryOperator::Lt => {
+ let max = array_max(haystack.clone());
+ any_op_with_null_handling(max.clone(), max.gt(needle),
haystack)
+ }
+ BinaryOperator::GtEq => {
+ let min = array_min(haystack.clone());
+ any_op_with_null_handling(min.clone(), min.lt_eq(needle),
haystack)
+ }
+ BinaryOperator::LtEq => {
+ let max = array_max(haystack.clone());
+ any_op_with_null_handling(max.clone(), max.gt_eq(needle),
haystack)
+ }
+ _ => plan_err!(
+ "Unsupported AnyOp: '{op}', only '=', '<>', '>', '<', '>=',
'<=' are supported"
+ ),
+ }?;
+ Ok(PlannerResult::Planned(expr))
+ }
+
+ fn plan_all(&self, args: RawAnyAllExpr) ->
Result<PlannerResult<RawAnyAllExpr>> {
+ // CASE/WHEN structure:
+ // WHEN arr IS NULL → NULL
+ // WHEN empty → TRUE
+ // WHEN lhs IS NULL → NULL
+ // WHEN decisive_condition → FALSE
+ // WHEN has_nulls → NULL
+ // ELSE → TRUE
+ let RawAnyAllExpr {
+ op,
+ needle,
+ haystack,
+ } = args;
+ let null_arr_check = haystack.clone().is_null();
+ let empty_check = cardinality(haystack.clone()).eq(lit(0u64));
+ let null_lhs_check = needle.clone().is_null();
+ // DataFusion's array_position uses is_null() checks internally (not
equality),
+ // so it can locate NULL elements even though NULL = NULL is NULL in
standard SQL.
+ let has_nulls =
+ array_position(haystack.clone(), lit(ScalarValue::Null), lit(1i64))
+ .is_not_null();
+
+ let decisive_condition = match op {
Review Comment:
also moving this planning code into `datafusion-functions-nested` should
allow us to more easily create dedicated UDFs for any/all without needing to
expose these UDFs beyond the crate
- e.g. instead of needing to create new UDFs
`array_has_all_eq`/`array_has_all_gt` that must be public for the planning code
to reach it (from `sql/src/expr/mod.rs`) we can keep it private within this
crate
related:
- #22102
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]