This is an automated email from the ASF dual-hosted git repository. alamb pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push: new eb6d63fb93 Optimize MIN/MAX when relation is empty (#8940) eb6d63fb93 is described below commit eb6d63fb939b0ea01b6404ca5e44e50ee83e2dbc Author: Liang-Chi Hsieh <vii...@gmail.com> AuthorDate: Thu Jan 25 10:34:44 2024 -0800 Optimize MIN/MAX when relation is empty (#8940) * Optimize MIN/MAX when relation is empty * Fix clippy --- .../src/physical_optimizer/aggregate_statistics.rs | 98 ++++++++++++++-------- datafusion/sqllogictest/test_files/aggregate.slt | 20 +++++ 2 files changed, 84 insertions(+), 34 deletions(-) diff --git a/datafusion/core/src/physical_optimizer/aggregate_statistics.rs b/datafusion/core/src/physical_optimizer/aggregate_statistics.rs index 0a53c775aa..4fe11c14a7 100644 --- a/datafusion/core/src/physical_optimizer/aggregate_statistics.rs +++ b/datafusion/core/src/physical_optimizer/aggregate_statistics.rs @@ -198,30 +198,45 @@ fn take_optimizable_min( stats: &Statistics, ) -> Option<(ScalarValue, String)> { if let Precision::Exact(num_rows) = &stats.num_rows { - if *num_rows > 0 { - let col_stats = &stats.column_statistics; - if let Some(casted_expr) = - agg_expr.as_any().downcast_ref::<expressions::Min>() - { - if casted_expr.expressions().len() == 1 { - // TODO optimize with exprs other than Column - if let Some(col_expr) = casted_expr.expressions()[0] - .as_any() - .downcast_ref::<expressions::Column>() + match *num_rows { + 0 => { + // MIN/MAX with 0 rows is always null + if let Some(casted_expr) = + agg_expr.as_any().downcast_ref::<expressions::Min>() + { + if let Ok(min_data_type) = + ScalarValue::try_from(casted_expr.field().unwrap().data_type()) { - if let Precision::Exact(val) = - &col_stats[col_expr.index()].min_value + return Some((min_data_type, casted_expr.name().to_string())); + } + } + } + value if value > 0 => { + let col_stats = &stats.column_statistics; + if let Some(casted_expr) = + agg_expr.as_any().downcast_ref::<expressions::Min>() + { + if casted_expr.expressions().len() == 1 { + // TODO optimize with exprs other than Column + if let Some(col_expr) = casted_expr.expressions()[0] + .as_any() + .downcast_ref::<expressions::Column>() { - if !val.is_null() { - return Some(( - val.clone(), - casted_expr.name().to_string(), - )); + if let Precision::Exact(val) = + &col_stats[col_expr.index()].min_value + { + if !val.is_null() { + return Some(( + val.clone(), + casted_expr.name().to_string(), + )); + } } } } } } + _ => {} } } None @@ -233,30 +248,45 @@ fn take_optimizable_max( stats: &Statistics, ) -> Option<(ScalarValue, String)> { if let Precision::Exact(num_rows) = &stats.num_rows { - if *num_rows > 0 { - let col_stats = &stats.column_statistics; - if let Some(casted_expr) = - agg_expr.as_any().downcast_ref::<expressions::Max>() - { - if casted_expr.expressions().len() == 1 { - // TODO optimize with exprs other than Column - if let Some(col_expr) = casted_expr.expressions()[0] - .as_any() - .downcast_ref::<expressions::Column>() + match *num_rows { + 0 => { + // MIN/MAX with 0 rows is always null + if let Some(casted_expr) = + agg_expr.as_any().downcast_ref::<expressions::Max>() + { + if let Ok(max_data_type) = + ScalarValue::try_from(casted_expr.field().unwrap().data_type()) { - if let Precision::Exact(val) = - &col_stats[col_expr.index()].max_value + return Some((max_data_type, casted_expr.name().to_string())); + } + } + } + value if value > 0 => { + let col_stats = &stats.column_statistics; + if let Some(casted_expr) = + agg_expr.as_any().downcast_ref::<expressions::Max>() + { + if casted_expr.expressions().len() == 1 { + // TODO optimize with exprs other than Column + if let Some(col_expr) = casted_expr.expressions()[0] + .as_any() + .downcast_ref::<expressions::Column>() { - if !val.is_null() { - return Some(( - val.clone(), - casted_expr.name().to_string(), - )); + if let Precision::Exact(val) = + &col_stats[col_expr.index()].max_value + { + if !val.is_null() { + return Some(( + val.clone(), + casted_expr.name().to_string(), + )); + } } } } } } + _ => {} } } None diff --git a/datafusion/sqllogictest/test_files/aggregate.slt b/datafusion/sqllogictest/test_files/aggregate.slt index e9c92f53e0..5cd728c434 100644 --- a/datafusion/sqllogictest/test_files/aggregate.slt +++ b/datafusion/sqllogictest/test_files/aggregate.slt @@ -3084,6 +3084,26 @@ SELECT MAX(col0) FROM empty WHERE col0=1; ---- NULL +query TT +EXPLAIN SELECT MIN(col0) FROM empty; +---- +logical_plan +Aggregate: groupBy=[[]], aggr=[[MIN(empty.col0)]] +--TableScan: empty projection=[col0] +physical_plan +ProjectionExec: expr=[NULL as MIN(empty.col0)] +--PlaceholderRowExec + +query TT +EXPLAIN SELECT MAX(col0) FROM empty; +---- +logical_plan +Aggregate: groupBy=[[]], aggr=[[MAX(empty.col0)]] +--TableScan: empty projection=[col0] +physical_plan +ProjectionExec: expr=[NULL as MAX(empty.col0)] +--PlaceholderRowExec + statement ok DROP TABLE empty;