alamb commented on code in PR #21668:
URL: https://github.com/apache/datafusion/pull/21668#discussion_r3262094507


##########
datafusion/expr/src/logical_plan/plan.rs:
##########
@@ -2495,6 +2495,15 @@ pub struct Filter {
 }
 
 impl Filter {
+    /// Create a new filter operator.
+    ///
+    /// Skips the type-checking and dealiasing done in [Self::try_new].
+    /// For internal use in DataFusion only.
+    #[doc(hidden)]

Review Comment:
   How about just call this `new` ? `unchecked` makes it sound like it should 
be `unsafe`, e.g. 
https://docs.rs/datafusion/latest/datafusion/common/arrow/array/struct.GenericByteArray.html#method.new_unchecked
   
   Though it does have the same properties that the the caller needs to ensure 
some invariants
   
   Could you please document what the properties are? I think `predicate` needs 
to only refer to columns that came from the output schema of `input`



##########
datafusion/optimizer/src/simplify_expressions/simplify_predicates.rs:
##########
@@ -52,28 +52,27 @@ pub fn simplify_predicates(predicates: Vec<Expr>) -> 
Result<Vec<Expr>> {
     let mut other_predicates = Vec::new();
 
     for pred in predicates {
-        match &pred {
-            Expr::BinaryExpr(BinaryExpr {
-                left,
-                op:
-                    Operator::Gt
-                    | Operator::GtEq
-                    | Operator::Lt
-                    | Operator::LtEq
-                    | Operator::Eq,
-                right,
-            }) => {
-                let left_col = extract_column_from_expr(left);
-                let right_col = extract_column_from_expr(right);
-                if let (Some(col), Some(_)) = (&left_col, right.as_literal()) {
-                    
column_predicates.entry(col.clone()).or_default().push(pred);
-                } else if let (Some(_), Some(col)) = (left.as_literal(), 
&right_col) {
-                    
column_predicates.entry(col.clone()).or_default().push(pred);
-                } else {
-                    other_predicates.push(pred);
-                }
+        use Operator::*;

Review Comment:
   is this a drive by cleanup, or does it change the semantics?



##########
datafusion/optimizer/src/push_down_filter.rs:
##########
@@ -795,13 +802,15 @@ impl OptimizerRule for PushDownFilter {
                 new_predicates.len()
             );
         }
+
         if old_predicate_len != new_predicates.len() {
-            let Some(new_predicate) = conjunction(new_predicates) else {
+            if let Some(new_predicate) = conjunction(new_predicates) {

Review Comment:
   why this change? I don't think it is incorrect, but it makes the code harder 
to review as it increases churn and increases the size of the diff unecessairly
   
   This also applies to many of the other changes above.
   
   IN the future it would be easier (and thus faster) to review such PRs if you 
made a separate PR with code cleanups that don't change the semantics but just 
makes it better, and then one with your actual changes



##########
datafusion/optimizer/src/push_down_filter.rs:
##########
@@ -4177,7 +4179,7 @@ mod tests {
             plan,
             @r"
         Projection: a, b
-          Filter: t.a > Int32(5) AND t.b > Int32(10) AND TestScalarUDF() > 
Float64(0.1)
+          Filter: TestScalarUDF() > Float64(0.1) AND t.a > Int32(5) AND t.b > 
Int32(10)

Review Comment:
   why did this test change and why is it an improvement?



##########
datafusion/optimizer/src/push_down_filter.rs:
##########
@@ -1355,65 +1314,20 @@ fn rewrite_projection(
         }
     }
 
-    match conjunction(push_predicates) {
-        Some(expr) => {
-            // re-write all filters based on this projection
-            // E.g. in `Filter: b\n  Projection: a > 1 as b`, we can swap 
them, but the filter must be "a > 1"
-            let new_filter = LogicalPlan::Filter(Filter::try_new(
-                replace_cols_by_name(expr, &pushable_map)?,
-                std::mem::take(&mut projection.input),
-            )?);
-
-            projection.input = Arc::new(new_filter);
-
-            Ok((
-                Transformed::yes(LogicalPlan::Projection(projection)),
-                conjunction(keep_predicates),
-            ))
-        }
-        None => Ok((
-            Transformed::no(LogicalPlan::Projection(projection)),
-            conjunction(keep_predicates),
-        )),
-    }
-}
-
-/// Creates a new LogicalPlan::Filter node.
-pub fn make_filter(predicate: Expr, input: Arc<LogicalPlan>) -> 
Result<LogicalPlan> {

Review Comment:
   this is technically a breaking API I think -- how about you just deprecate 
it at first? 
https://datafusion.apache.org/contributor-guide/api-health.html#deprecation-guidelines



##########
datafusion/optimizer/src/push_down_filter.rs:
##########
@@ -1687,6 +1624,10 @@ mod tests {
             .aggregate(vec![col("a")], vec![sum(col("b")).alias("b")])?
             .filter(col("b").gt(lit(10i64)))?
             .build()?;
+        let transformed = PushDownFilter::new()

Review Comment:
   why did you add this? Maybe some comments would help
   
   Also rather than copy/pasting the same thing several places, maybe you can 
could add it to assert_optimized_plan_equal!?



##########
datafusion/optimizer/src/push_down_filter.rs:
##########
@@ -1441,42 +1355,65 @@ where
 
 /// replaces columns by its name on the projection.
 pub fn replace_cols_by_name(
-    e: Expr,
-    replace_map: &HashMap<String, Expr>,
+    expr: Expr,
+    replace_map: &HashMap<String, impl AsRef<Expr>>,
 ) -> Result<Expr> {
-    e.transform_up(|expr| {
-        Ok(if let Expr::Column(c) = &expr {
-            match replace_map.get(&c.flat_name()) {
-                Some(new_c) => Transformed::yes(new_c.clone()),
-                None => Transformed::no(expr),
-            }
+    expr.transform_up(|expr| {
+        if let Expr::Column(col) = &expr
+            && let Some(new_expr) = replace_map.get(&col.flat_name())
+        {
+            Ok(Transformed::yes(new_expr.as_ref().clone()))
         } else {
-            Transformed::no(expr)
-        })
+            Ok(Transformed::no(expr))
+        }
     })
     .data()
 }
 
+/// Unalias expression reference.
+fn unalias(expr: &Expr) -> &Expr {

Review Comment:
   Can we use 
https://docs.rs/datafusion/latest/datafusion/prelude/enum.Expr.html#method.unalias
 instead?



##########
datafusion-testing:
##########


Review Comment:
   Can we make this change as a separate PR to datafusion-testing first?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to