Dandandan commented on code in PR #16445: URL: https://github.com/apache/datafusion/pull/16445#discussion_r2154953337
########## datafusion/physical-plan/src/joins/hash_join.rs: ########## @@ -943,10 +978,71 @@ impl ExecutionPlan for HashJoinExec { try_embed_projection(projection, self) } } + + fn gather_filters_for_pushdown( + &self, + phase: FilterPushdownPhase, + parent_filters: Vec<Arc<dyn PhysicalExpr>>, + config: &datafusion_common::config::ConfigOptions, + ) -> Result<FilterDescription> { + // Don't allow parent filters to be pushed down for now + // Only add our dynamic filter during the Post phase + if !matches!(phase, FilterPushdownPhase::Post) { + return Ok(FilterDescription::new_with_child_count(2) + .all_parent_filters_unsupported(parent_filters)); + } + + // Only push down dynamic filters if enabled + if config.optimizer.enable_dynamic_filter_pushdown { + let filter = Arc::clone(&self.dynamic_filter) as Arc<dyn PhysicalExpr>; + // Push the dynamic filter to the right side (probe side) only + // Left side (build side) gets empty vec, right side gets the filter + let filters_for_children = vec![vec![], vec![filter]]; + return Ok(FilterDescription::new_with_child_count(2) + .all_parent_filters_unsupported(parent_filters) + .with_self_filters_for_children(filters_for_children)); + } + + Ok(FilterDescription::new_with_child_count(2) + .all_parent_filters_unsupported(parent_filters)) + } +} + +/// Compute min/max bounds for each column in the given arrays +fn compute_bounds(arrays: &[ArrayRef]) -> Result<Vec<(ScalarValue, ScalarValue)>> { + arrays + .iter() + .map(|array| { + if array.is_empty() { + // Return NULL values for empty arrays + return Ok(( + ScalarValue::try_from(array.data_type())?, + ScalarValue::try_from(array.data_type())?, + )); + } + + // Compute min/max using ScalarValue's utilities + let mut min_val = ScalarValue::try_from_array(array, 0)?; Review Comment: I think we should arrow kernel for this (this is slow). -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org