This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new eb6d63fb93 Optimize MIN/MAX when relation is empty (#8940)
eb6d63fb93 is described below

commit eb6d63fb939b0ea01b6404ca5e44e50ee83e2dbc
Author: Liang-Chi Hsieh <vii...@gmail.com>
AuthorDate: Thu Jan 25 10:34:44 2024 -0800

    Optimize MIN/MAX when relation is empty (#8940)
    
    * Optimize MIN/MAX when relation is empty
    
    * Fix clippy
---
 .../src/physical_optimizer/aggregate_statistics.rs | 98 ++++++++++++++--------
 datafusion/sqllogictest/test_files/aggregate.slt   | 20 +++++
 2 files changed, 84 insertions(+), 34 deletions(-)

diff --git a/datafusion/core/src/physical_optimizer/aggregate_statistics.rs 
b/datafusion/core/src/physical_optimizer/aggregate_statistics.rs
index 0a53c775aa..4fe11c14a7 100644
--- a/datafusion/core/src/physical_optimizer/aggregate_statistics.rs
+++ b/datafusion/core/src/physical_optimizer/aggregate_statistics.rs
@@ -198,30 +198,45 @@ fn take_optimizable_min(
     stats: &Statistics,
 ) -> Option<(ScalarValue, String)> {
     if let Precision::Exact(num_rows) = &stats.num_rows {
-        if *num_rows > 0 {
-            let col_stats = &stats.column_statistics;
-            if let Some(casted_expr) =
-                agg_expr.as_any().downcast_ref::<expressions::Min>()
-            {
-                if casted_expr.expressions().len() == 1 {
-                    // TODO optimize with exprs other than Column
-                    if let Some(col_expr) = casted_expr.expressions()[0]
-                        .as_any()
-                        .downcast_ref::<expressions::Column>()
+        match *num_rows {
+            0 => {
+                // MIN/MAX with 0 rows is always null
+                if let Some(casted_expr) =
+                    agg_expr.as_any().downcast_ref::<expressions::Min>()
+                {
+                    if let Ok(min_data_type) =
+                        
ScalarValue::try_from(casted_expr.field().unwrap().data_type())
                     {
-                        if let Precision::Exact(val) =
-                            &col_stats[col_expr.index()].min_value
+                        return Some((min_data_type, 
casted_expr.name().to_string()));
+                    }
+                }
+            }
+            value if value > 0 => {
+                let col_stats = &stats.column_statistics;
+                if let Some(casted_expr) =
+                    agg_expr.as_any().downcast_ref::<expressions::Min>()
+                {
+                    if casted_expr.expressions().len() == 1 {
+                        // TODO optimize with exprs other than Column
+                        if let Some(col_expr) = casted_expr.expressions()[0]
+                            .as_any()
+                            .downcast_ref::<expressions::Column>()
                         {
-                            if !val.is_null() {
-                                return Some((
-                                    val.clone(),
-                                    casted_expr.name().to_string(),
-                                ));
+                            if let Precision::Exact(val) =
+                                &col_stats[col_expr.index()].min_value
+                            {
+                                if !val.is_null() {
+                                    return Some((
+                                        val.clone(),
+                                        casted_expr.name().to_string(),
+                                    ));
+                                }
                             }
                         }
                     }
                 }
             }
+            _ => {}
         }
     }
     None
@@ -233,30 +248,45 @@ fn take_optimizable_max(
     stats: &Statistics,
 ) -> Option<(ScalarValue, String)> {
     if let Precision::Exact(num_rows) = &stats.num_rows {
-        if *num_rows > 0 {
-            let col_stats = &stats.column_statistics;
-            if let Some(casted_expr) =
-                agg_expr.as_any().downcast_ref::<expressions::Max>()
-            {
-                if casted_expr.expressions().len() == 1 {
-                    // TODO optimize with exprs other than Column
-                    if let Some(col_expr) = casted_expr.expressions()[0]
-                        .as_any()
-                        .downcast_ref::<expressions::Column>()
+        match *num_rows {
+            0 => {
+                // MIN/MAX with 0 rows is always null
+                if let Some(casted_expr) =
+                    agg_expr.as_any().downcast_ref::<expressions::Max>()
+                {
+                    if let Ok(max_data_type) =
+                        
ScalarValue::try_from(casted_expr.field().unwrap().data_type())
                     {
-                        if let Precision::Exact(val) =
-                            &col_stats[col_expr.index()].max_value
+                        return Some((max_data_type, 
casted_expr.name().to_string()));
+                    }
+                }
+            }
+            value if value > 0 => {
+                let col_stats = &stats.column_statistics;
+                if let Some(casted_expr) =
+                    agg_expr.as_any().downcast_ref::<expressions::Max>()
+                {
+                    if casted_expr.expressions().len() == 1 {
+                        // TODO optimize with exprs other than Column
+                        if let Some(col_expr) = casted_expr.expressions()[0]
+                            .as_any()
+                            .downcast_ref::<expressions::Column>()
                         {
-                            if !val.is_null() {
-                                return Some((
-                                    val.clone(),
-                                    casted_expr.name().to_string(),
-                                ));
+                            if let Precision::Exact(val) =
+                                &col_stats[col_expr.index()].max_value
+                            {
+                                if !val.is_null() {
+                                    return Some((
+                                        val.clone(),
+                                        casted_expr.name().to_string(),
+                                    ));
+                                }
                             }
                         }
                     }
                 }
             }
+            _ => {}
         }
     }
     None
diff --git a/datafusion/sqllogictest/test_files/aggregate.slt 
b/datafusion/sqllogictest/test_files/aggregate.slt
index e9c92f53e0..5cd728c434 100644
--- a/datafusion/sqllogictest/test_files/aggregate.slt
+++ b/datafusion/sqllogictest/test_files/aggregate.slt
@@ -3084,6 +3084,26 @@ SELECT MAX(col0) FROM empty WHERE col0=1;
 ----
 NULL
 
+query TT
+EXPLAIN SELECT MIN(col0) FROM empty;
+----
+logical_plan
+Aggregate: groupBy=[[]], aggr=[[MIN(empty.col0)]]
+--TableScan: empty projection=[col0]
+physical_plan
+ProjectionExec: expr=[NULL as MIN(empty.col0)]
+--PlaceholderRowExec
+
+query TT
+EXPLAIN SELECT MAX(col0) FROM empty;
+----
+logical_plan
+Aggregate: groupBy=[[]], aggr=[[MAX(empty.col0)]]
+--TableScan: empty projection=[col0]
+physical_plan
+ProjectionExec: expr=[NULL as MAX(empty.col0)]
+--PlaceholderRowExec
+
 statement ok
 DROP TABLE empty;
 

Reply via email to