alamb commented on code in PR #13467: URL: https://github.com/apache/datafusion/pull/13467#discussion_r1847285822
########## datafusion/common/src/tree_node.rs: ########## @@ -769,6 +770,263 @@ impl<T> Transformed<T> { } } +/// [`Container`] contains elements that a function can be applied on or mapped. The +/// elements of the container are siblings so the continuation rules are similar to +/// [`TreeNodeRecursion::visit_sibling`] / [`Transformed::transform_sibling`]. +pub trait Container<'a, T: 'a>: Sized { + fn apply_elements<F: FnMut(&'a T) -> Result<TreeNodeRecursion>>( Review Comment: Could we also add some documentation here about what apply_elements is (perhaps just a map back to `TreeNode::apply`?) ########## datafusion/common/src/tree_node.rs: ########## @@ -769,6 +770,263 @@ impl<T> Transformed<T> { } } +/// [`Container`] contains elements that a function can be applied on or mapped. The +/// elements of the container are siblings so the continuation rules are similar to +/// [`TreeNodeRecursion::visit_sibling`] / [`Transformed::transform_sibling`]. +pub trait Container<'a, T: 'a>: Sized { + fn apply_elements<F: FnMut(&'a T) -> Result<TreeNodeRecursion>>( + &'a self, + f: F, + ) -> Result<TreeNodeRecursion>; + + fn map_elements<F: FnMut(T) -> Result<Transformed<T>>>( Review Comment: Could we also add some documentation here about what map_elements is (perhaps just a map back to `TreeNode::map_children`?) ########## datafusion/common/src/tree_node.rs: ########## @@ -769,6 +770,263 @@ impl<T> Transformed<T> { } } +/// [`Container`] contains elements that a function can be applied on or mapped. The +/// elements of the container are siblings so the continuation rules are similar to +/// [`TreeNodeRecursion::visit_sibling`] / [`Transformed::transform_sibling`]. +pub trait Container<'a, T: 'a>: Sized { + fn apply_elements<F: FnMut(&'a T) -> Result<TreeNodeRecursion>>( + &'a self, + f: F, + ) -> Result<TreeNodeRecursion>; + + fn map_elements<F: FnMut(T) -> Result<Transformed<T>>>( + self, + f: F, + ) -> Result<Transformed<Self>>; +} + +impl<'a, T: 'a, C: Container<'a, T>> Container<'a, T> for Box<C> { + fn apply_elements<F: FnMut(&'a T) -> Result<TreeNodeRecursion>>( + &'a self, + f: F, + ) -> Result<TreeNodeRecursion> { + self.as_ref().apply_elements(f) + } + + fn map_elements<F: FnMut(T) -> Result<Transformed<T>>>( + self, + f: F, + ) -> Result<Transformed<Self>> { + (*self).map_elements(f)?.map_data(|c| Ok(Self::new(c))) + } +} + +impl<'a, T: 'a, C: Container<'a, T> + Clone> Container<'a, T> for Arc<C> { + fn apply_elements<F: FnMut(&'a T) -> Result<TreeNodeRecursion>>( + &'a self, + f: F, + ) -> Result<TreeNodeRecursion> { + self.as_ref().apply_elements(f) + } + + fn map_elements<F: FnMut(T) -> Result<Transformed<T>>>( + self, + f: F, + ) -> Result<Transformed<Self>> { + Arc::unwrap_or_clone(self) + .map_elements(f)? + .map_data(|c| Ok(Arc::new(c))) + } +} + +impl<'a, T: 'a, C: Container<'a, T>> Container<'a, T> for Option<C> { + fn apply_elements<F: FnMut(&'a T) -> Result<TreeNodeRecursion>>( + &'a self, + f: F, + ) -> Result<TreeNodeRecursion> { + match self { + Some(t) => t.apply_elements(f), + None => Ok(TreeNodeRecursion::Continue), + } + } + + fn map_elements<F: FnMut(T) -> Result<Transformed<T>>>( + self, + f: F, + ) -> Result<Transformed<Self>> { + self.map_or(Ok(Transformed::no(None)), |c| { + c.map_elements(f)?.map_data(|c| Ok(Some(c))) + }) + } +} + +impl<'a, T: 'a, C: Container<'a, T>> Container<'a, T> for Vec<C> { + fn apply_elements<F: FnMut(&'a T) -> Result<TreeNodeRecursion>>( + &'a self, + mut f: F, + ) -> Result<TreeNodeRecursion> { + let mut tnr = TreeNodeRecursion::Continue; + for c in self { + tnr = c.apply_elements(&mut f)?; + match tnr { + TreeNodeRecursion::Continue | TreeNodeRecursion::Jump => {} + TreeNodeRecursion::Stop => return Ok(TreeNodeRecursion::Stop), + } + } + Ok(tnr) + } + + fn map_elements<F: FnMut(T) -> Result<Transformed<T>>>( + self, + mut f: F, + ) -> Result<Transformed<Self>> { + let mut tnr = TreeNodeRecursion::Continue; + let mut transformed = false; + self.into_iter() + .map(|c| match tnr { + TreeNodeRecursion::Continue | TreeNodeRecursion::Jump => { + c.map_elements(&mut f).map(|result| { + tnr = result.tnr; + transformed |= result.transformed; + result.data + }) + } + TreeNodeRecursion::Stop => Ok(c), + }) + .collect::<Result<Vec<_>>>() + .map(|data| Transformed::new(data, transformed, tnr)) + } +} + +impl<'a, T: 'a, K: Eq + Hash, C: Container<'a, T>> Container<'a, T> for HashMap<K, C> { + fn apply_elements<F: FnMut(&'a T) -> Result<TreeNodeRecursion>>( + &'a self, + mut f: F, + ) -> Result<TreeNodeRecursion> { + let mut tnr = TreeNodeRecursion::Continue; + for c in self.values() { + tnr = c.apply_elements(&mut f)?; + match tnr { + TreeNodeRecursion::Continue | TreeNodeRecursion::Jump => {} + TreeNodeRecursion::Stop => return Ok(TreeNodeRecursion::Stop), + } + } + Ok(tnr) + } + + fn map_elements<F: FnMut(T) -> Result<Transformed<T>>>( + self, + mut f: F, + ) -> Result<Transformed<Self>> { + let mut tnr = TreeNodeRecursion::Continue; + let mut transformed = false; + self.into_iter() + .map(|(k, c)| match tnr { + TreeNodeRecursion::Continue | TreeNodeRecursion::Jump => { + c.map_elements(&mut f).map(|result| { + tnr = result.tnr; + transformed |= result.transformed; + (k, result.data) + }) + } + TreeNodeRecursion::Stop => Ok((k, c)), + }) + .collect::<Result<HashMap<_, _>>>() + .map(|data| Transformed::new(data, transformed, tnr)) + } +} + +impl<'a, T: 'a, C0: Container<'a, T>, C1: Container<'a, T>> Container<'a, T> Review Comment: It took me a moment to realize this was the impl for `(a,b)` 2-tuples. Maybe we could make that clearer with some comments. Likewise with the 3-tuple. Though to be honest, it seems to me like it might be clearer if we didn't implement this trait for tuples, and instead made the places they are used, named `struct`s. But that is just a personal style preference ########## datafusion/expr/src/logical_plan/tree_node.rs: ########## @@ -81,7 +79,7 @@ impl TreeNode for LogicalPlan { expr, input, schema, - }) => rewrite_arc(input, f)?.update_data(|input| { + }) => input.map_elements(f)?.update_data(|input| { Review Comment: Is the difference between `map_children` and `map_elements` that `map_elements` doesn't also apply the function to `input` ? ########## datafusion/expr/src/logical_plan/ddl.rs: ########## @@ -497,6 +522,29 @@ pub struct CreateFunctionBody { pub function_body: Option<Expr>, } +impl<'a> Container<'a, Expr> for CreateFunctionBody { + fn apply_elements<F: FnMut(&'a Expr) -> Result<TreeNodeRecursion>>( + &'a self, + f: F, + ) -> Result<TreeNodeRecursion> { + self.function_body.apply_elements(f) + } + + fn map_elements<F: FnMut(Expr) -> Result<Transformed<Expr>>>( Review Comment: this is very cool ########## datafusion/expr/src/logical_plan/tree_node.rs: ########## @@ -423,54 +405,40 @@ impl LogicalPlan { mut f: F, ) -> Result<TreeNodeRecursion> { match self { - LogicalPlan::Projection(Projection { expr, .. }) => { - expr.iter().apply_until_stop(f) - } - LogicalPlan::Values(Values { values, .. }) => values - .iter() - .apply_until_stop(|value| value.iter().apply_until_stop(&mut f)), + LogicalPlan::Projection(Projection { expr, .. }) => expr.apply_elements(f), + LogicalPlan::Values(Values { values, .. }) => values.apply_elements(f), LogicalPlan::Filter(Filter { predicate, .. }) => f(predicate), LogicalPlan::Repartition(Repartition { partitioning_scheme, .. }) => match partitioning_scheme { Partitioning::Hash(expr, _) | Partitioning::DistributeBy(expr) => { - expr.iter().apply_until_stop(f) + expr.apply_elements(f) } Partitioning::RoundRobinBatch(_) => Ok(TreeNodeRecursion::Continue), }, LogicalPlan::Window(Window { window_expr, .. }) => { - window_expr.iter().apply_until_stop(f) + window_expr.apply_elements(f) } LogicalPlan::Aggregate(Aggregate { group_expr, aggr_expr, .. - }) => group_expr - .iter() - .chain(aggr_expr.iter()) - .apply_until_stop(f), + }) => (group_expr, aggr_expr).apply_ref_elements(f), // There are two part of expression for join, equijoin(on) and non-equijoin(filter). // 1. the first part is `on.len()` equijoin expressions, and the struct of each expr is `left-on = right-on`. // 2. the second part is non-equijoin(filter). LogicalPlan::Join(Join { on, filter, .. }) => { - on.iter() - // TODO: why we need to create an `Expr::eq`? Cloning `Expr` is costly... Review Comment: 🎉 for removing the clone ########## datafusion/common/src/tree_node.rs: ########## @@ -769,6 +770,263 @@ impl<T> Transformed<T> { } } +/// [`Container`] contains elements that a function can be applied on or mapped. The +/// elements of the container are siblings so the continuation rules are similar to +/// [`TreeNodeRecursion::visit_sibling`] / [`Transformed::transform_sibling`]. +pub trait Container<'a, T: 'a>: Sized { + fn apply_elements<F: FnMut(&'a T) -> Result<TreeNodeRecursion>>( + &'a self, + f: F, + ) -> Result<TreeNodeRecursion>; + + fn map_elements<F: FnMut(T) -> Result<Transformed<T>>>( + self, + f: F, + ) -> Result<Transformed<Self>>; +} + +impl<'a, T: 'a, C: Container<'a, T>> Container<'a, T> for Box<C> { + fn apply_elements<F: FnMut(&'a T) -> Result<TreeNodeRecursion>>( + &'a self, + f: F, + ) -> Result<TreeNodeRecursion> { + self.as_ref().apply_elements(f) + } + + fn map_elements<F: FnMut(T) -> Result<Transformed<T>>>( + self, + f: F, + ) -> Result<Transformed<Self>> { + (*self).map_elements(f)?.map_data(|c| Ok(Self::new(c))) + } +} + +impl<'a, T: 'a, C: Container<'a, T> + Clone> Container<'a, T> for Arc<C> { + fn apply_elements<F: FnMut(&'a T) -> Result<TreeNodeRecursion>>( + &'a self, + f: F, + ) -> Result<TreeNodeRecursion> { + self.as_ref().apply_elements(f) + } + + fn map_elements<F: FnMut(T) -> Result<Transformed<T>>>( + self, + f: F, + ) -> Result<Transformed<Self>> { + Arc::unwrap_or_clone(self) + .map_elements(f)? + .map_data(|c| Ok(Arc::new(c))) + } +} + +impl<'a, T: 'a, C: Container<'a, T>> Container<'a, T> for Option<C> { + fn apply_elements<F: FnMut(&'a T) -> Result<TreeNodeRecursion>>( + &'a self, + f: F, + ) -> Result<TreeNodeRecursion> { + match self { + Some(t) => t.apply_elements(f), + None => Ok(TreeNodeRecursion::Continue), + } + } + + fn map_elements<F: FnMut(T) -> Result<Transformed<T>>>( + self, + f: F, + ) -> Result<Transformed<Self>> { + self.map_or(Ok(Transformed::no(None)), |c| { + c.map_elements(f)?.map_data(|c| Ok(Some(c))) + }) + } +} + +impl<'a, T: 'a, C: Container<'a, T>> Container<'a, T> for Vec<C> { + fn apply_elements<F: FnMut(&'a T) -> Result<TreeNodeRecursion>>( + &'a self, + mut f: F, + ) -> Result<TreeNodeRecursion> { + let mut tnr = TreeNodeRecursion::Continue; + for c in self { + tnr = c.apply_elements(&mut f)?; + match tnr { Review Comment: this is definitely a much easier to understand formulation ########## datafusion/common/src/tree_node.rs: ########## @@ -769,6 +770,263 @@ impl<T> Transformed<T> { } } +/// [`Container`] contains elements that a function can be applied on or mapped. The +/// elements of the container are siblings so the continuation rules are similar to +/// [`TreeNodeRecursion::visit_sibling`] / [`Transformed::transform_sibling`]. +pub trait Container<'a, T: 'a>: Sized { Review Comment: I think it is a matter of preference, but what would you think about calling this something less generic, such as `TreeNodeContainer` ? ########## datafusion/expr/src/tree_node.rs: ########## @@ -57,78 +57,50 @@ impl TreeNode for Expr { | Expr::Negative(expr) | Expr::Cast(Cast { expr, .. }) | Expr::TryCast(TryCast { expr, .. }) - | Expr::InSubquery(InSubquery{ expr, .. }) => vec![expr.as_ref()], + | Expr::InSubquery(InSubquery { expr, .. }) => expr.apply_elements(f), Review Comment: it is nice to avoid creating vecs here as well -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org