alamb commented on code in PR #16642: URL: https://github.com/apache/datafusion/pull/16642#discussion_r2180625134
########## datafusion/physical-plan/src/filter_pushdown.rs: ########## @@ -317,24 +152,74 @@ impl<T> FilterPushdownPropagation<T> { } #[derive(Debug, Clone)] -struct ChildFilterDescription { +pub struct ChildFilterDescription { /// Description of which parent filters can be pushed down into this node. /// Since we need to transmit filter pushdown results back to this node's parent /// we need to track each parent filter for each child, even those that are unsupported / won't be pushed down. /// We do this using a [`PredicateSupport`] which simplifies manipulating supported/unsupported filters. - parent_filters: PredicateSupports, + pub(crate) parent_filters: Vec<PredicateSupport>, /// Description of which filters this node is pushing down to its children. /// Since this is not transmitted back to the parents we can have variable sized inner arrays /// instead of having to track supported/unsupported. - self_filters: Vec<Arc<dyn PhysicalExpr>>, + pub(crate) self_filters: Vec<Arc<dyn PhysicalExpr>>, } impl ChildFilterDescription { - fn new() -> Self { - Self { - parent_filters: PredicateSupports::new(vec![]), - self_filters: vec![], + /// Build a child filter description by analyzing which parent filters can be pushed to a specific child. Review Comment: Update -- I see this is defined on `FilterDescription::from_children` maybe we can leave a link to it here ```suggestion /// Build a child filter description by analyzing which parent filters can be pushed to a specific child. /// /// See [`FilterDescription::from_children`] for more details ``` ########## datafusion/physical-plan/src/filter_pushdown.rs: ########## @@ -317,24 +152,74 @@ impl<T> FilterPushdownPropagation<T> { } #[derive(Debug, Clone)] -struct ChildFilterDescription { +pub struct ChildFilterDescription { /// Description of which parent filters can be pushed down into this node. /// Since we need to transmit filter pushdown results back to this node's parent /// we need to track each parent filter for each child, even those that are unsupported / won't be pushed down. /// We do this using a [`PredicateSupport`] which simplifies manipulating supported/unsupported filters. - parent_filters: PredicateSupports, + pub(crate) parent_filters: Vec<PredicateSupport>, /// Description of which filters this node is pushing down to its children. /// Since this is not transmitted back to the parents we can have variable sized inner arrays /// instead of having to track supported/unsupported. - self_filters: Vec<Arc<dyn PhysicalExpr>>, + pub(crate) self_filters: Vec<Arc<dyn PhysicalExpr>>, } impl ChildFilterDescription { - fn new() -> Self { - Self { - parent_filters: PredicateSupports::new(vec![]), - self_filters: vec![], + /// Build a child filter description by analyzing which parent filters can be pushed to a specific child. Review Comment: It would help to document here what conditions are being checkd for -- it seems that it is just checking that the child has all the columns necessary to evaluate the filter, but doesn't check if the child can semantically push them parent filters through ########## datafusion/physical-plan/src/sorts/sort.rs: ########## @@ -1268,19 +1270,19 @@ impl ExecutionPlan for SortExec { config: &datafusion_common::config::ConfigOptions, ) -> Result<FilterDescription> { if !matches!(phase, FilterPushdownPhase::Post) { - return Ok(FilterDescription::new_with_child_count(1) - .all_parent_filters_supported(parent_filters)); + return FilterDescription::from_children(parent_filters, &self.children()); } + + let mut child = ChildFilterDescription::from_child(parent_filters, self.input())?; + if let Some(filter) = &self.filter { if config.optimizer.enable_dynamic_filter_pushdown { - let filter = Arc::clone(filter) as Arc<dyn PhysicalExpr>; - return Ok(FilterDescription::new_with_child_count(1) - .all_parent_filters_supported(parent_filters) - .with_self_filter(filter)); + child = Review Comment: I agree this looks much better -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org