This is an automated email from the ASF dual-hosted git repository.

lijibing pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 294acab3f91 [improvement](statistics)Improve statistics cache loading 
logic. (#38829)
294acab3f91 is described below

commit 294acab3f9136002553e182f520a61dea772df7a
Author: Jibing-Li <64681310+jibing...@users.noreply.github.com>
AuthorDate: Tue Aug 6 10:22:32 2024 +0800

    [improvement](statistics)Improve statistics cache loading logic. (#38829)
    
    Improve statistics cache loading logic. When loading operation throw an
    exception, do not cache UNKNOWN, so it can try to load again next time.
---
 .../doris/datasource/hive/HMSExternalTable.java    |  12 +-
 .../java/org/apache/doris/qe/ShowExecutor.java     |   9 +-
 .../apache/doris/statistics/AnalysisManager.java   |   8 +-
 .../apache/doris/statistics/ColumnStatistic.java   | 140 +++++++++------------
 .../statistics/ColumnStatisticsCacheLoader.java    |  34 ++---
 .../doris/statistics/PartitionColumnStatistic.java | 121 ++++++++----------
 .../PartitionColumnStatisticCacheLoader.java       |  28 ++---
 .../doris/statistics/StatisticsRepository.java     |  13 +-
 .../doris/statistics/util/StatisticsUtil.java      |   6 +-
 .../org/apache/doris/statistics/CacheTest.java     |  44 +++++++
 10 files changed, 205 insertions(+), 210 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
index 067f2231a4b..5692f61df0c 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
@@ -610,21 +610,13 @@ public class HMSExternalTable extends ExternalTable 
implements MTMVRelatedTableI
                 continue;
             }
             ColumnStatisticsData data = tableStat.getStatsData();
-            try {
-                setStatData(column, data, columnStatisticBuilder, count);
-            } catch (AnalysisException e) {
-                if (LOG.isDebugEnabled()) {
-                    LOG.debug(e);
-                }
-                return Optional.empty();
-            }
+            setStatData(column, data, columnStatisticBuilder, count);
         }
 
         return Optional.of(columnStatisticBuilder.build());
     }
 
-    private void setStatData(Column col, ColumnStatisticsData data, 
ColumnStatisticBuilder builder, long count)
-            throws AnalysisException {
+    private void setStatData(Column col, ColumnStatisticsData data, 
ColumnStatisticBuilder builder, long count) {
         long ndv = 0;
         long nulls = 0;
         double colSize = 0;
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java 
b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java
index 7340f2affcc..a3d1ca313ae 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java
@@ -2695,7 +2695,14 @@ public class ShowExecutor {
             if (indexName == null) {
                 continue;
             }
-            columnStatistics.add(Pair.of(Pair.of(indexName, row.get(5)), 
ColumnStatistic.fromResultRow(row)));
+            try {
+                columnStatistics.add(Pair.of(Pair.of(indexName, row.get(5)), 
ColumnStatistic.fromResultRow(row)));
+            } catch (Exception e) {
+                LOG.warn("Failed to deserialize column statistics. reason: 
[{}]. Row [{}]", e.getMessage(), row);
+                if (LOG.isDebugEnabled()) {
+                    LOG.debug(e);
+                }
+            }
         }
     }
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
index 0ebbfd79811..0f9e833d496 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
@@ -834,8 +834,12 @@ public class AnalysisManager implements Writable {
         //        count, ndv, null_count, min, max, data_size, update_time]
         StatisticsCache cache = Env.getCurrentEnv().getStatisticsCache();
         for (ResultRow row : resultRows) {
-            cache.updatePartitionColStatsCache(catalogId, dbId, tableId, 
indexId, row.get(4), colName,
-                    PartitionColumnStatistic.fromResultRow(row));
+            try {
+                cache.updatePartitionColStatsCache(catalogId, dbId, tableId, 
indexId, row.get(4), colName,
+                        PartitionColumnStatistic.fromResultRow(row));
+            } catch (Exception e) {
+                cache.invalidatePartitionColumnStatsCache(catalogId, dbId, 
tableId, indexId, row.get(4), colName);
+            }
         }
     }
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java
index 639d5ef8346..9713d2d30e1 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java
@@ -108,100 +108,82 @@ public class ColumnStatistic {
     }
 
     public static ColumnStatistic fromResultRow(List<ResultRow> resultRows) {
-        ColumnStatistic columnStatistic = null;
-        try {
-            for (ResultRow resultRow : resultRows) {
-                String partId = resultRow.get(6);
-                if (partId == null) {
-                    columnStatistic = fromResultRow(resultRow);
-                } else {
-                    LOG.warn("Column statistics table shouldn't contain 
partition stats. [{}]", resultRow);
-                }
-            }
-        } catch (Throwable t) {
-            if (LOG.isDebugEnabled()) {
-                LOG.debug("Failed to deserialize column stats", t);
+        ColumnStatistic columnStatistic = ColumnStatistic.UNKNOWN;
+        for (ResultRow resultRow : resultRows) {
+            String partId = resultRow.get(6);
+            if (partId == null) {
+                columnStatistic = fromResultRow(resultRow);
+            } else {
+                LOG.warn("Column statistics table shouldn't contain partition 
stats. [{}]", resultRow);
             }
-            return ColumnStatistic.UNKNOWN;
-        }
-        if (columnStatistic == null) {
-            return ColumnStatistic.UNKNOWN;
         }
         return columnStatistic;
     }
 
     // TODO: use thrift
     public static ColumnStatistic fromResultRow(ResultRow row) {
-        try {
-            ColumnStatisticBuilder columnStatisticBuilder = new 
ColumnStatisticBuilder();
-            double count = Double.parseDouble(row.get(7));
-            columnStatisticBuilder.setCount(count);
-            double ndv = Double.parseDouble(row.getWithDefault(8, "0"));
-            columnStatisticBuilder.setNdv(ndv);
-            String nullCount = row.getWithDefault(9, "0");
-            columnStatisticBuilder.setNumNulls(Double.parseDouble(nullCount));
-            columnStatisticBuilder.setDataSize(Double
-                    .parseDouble(row.getWithDefault(12, "0")));
-            
columnStatisticBuilder.setAvgSizeByte(columnStatisticBuilder.getCount() == 0
-                    ? 0 : columnStatisticBuilder.getDataSize()
-                    / columnStatisticBuilder.getCount());
-            long catalogId = Long.parseLong(row.get(1));
-            long idxId = Long.parseLong(row.get(4));
-            long dbID = Long.parseLong(row.get(2));
-            long tblId = Long.parseLong(row.get(3));
-            String colName = row.get(5);
-            Column col = StatisticsUtil.findColumn(catalogId, dbID, tblId, 
idxId, colName);
-            if (col == null) {
-                if (LOG.isDebugEnabled()) {
-                    LOG.debug("Failed to deserialize column statistics, ctlId: 
{} dbId: {}"
-                                    + "tblId: {} column: {} not exists",
-                            catalogId, dbID, tblId, colName);
-                }
-                return ColumnStatistic.UNKNOWN;
+        ColumnStatisticBuilder columnStatisticBuilder = new 
ColumnStatisticBuilder();
+        double count = Double.parseDouble(row.get(7));
+        columnStatisticBuilder.setCount(count);
+        double ndv = Double.parseDouble(row.getWithDefault(8, "0"));
+        columnStatisticBuilder.setNdv(ndv);
+        String nullCount = row.getWithDefault(9, "0");
+        columnStatisticBuilder.setNumNulls(Double.parseDouble(nullCount));
+        columnStatisticBuilder.setDataSize(Double
+                .parseDouble(row.getWithDefault(12, "0")));
+        
columnStatisticBuilder.setAvgSizeByte(columnStatisticBuilder.getCount() == 0
+                ? 0 : columnStatisticBuilder.getDataSize()
+                / columnStatisticBuilder.getCount());
+        long catalogId = Long.parseLong(row.get(1));
+        long idxId = Long.parseLong(row.get(4));
+        long dbID = Long.parseLong(row.get(2));
+        long tblId = Long.parseLong(row.get(3));
+        String colName = row.get(5);
+        Column col = StatisticsUtil.findColumn(catalogId, dbID, tblId, idxId, 
colName);
+        if (col == null) {
+            if (LOG.isDebugEnabled()) {
+                LOG.debug("Failed to deserialize column statistics, ctlId: {} 
dbId: {}"
+                                + "tblId: {} column: {} not exists",
+                        catalogId, dbID, tblId, colName);
             }
-            String min = row.get(10);
-            String max = row.get(11);
-            if (min != null && !min.equalsIgnoreCase("NULL")) {
-                // Internal catalog get the min/max value using a separate SQL,
-                // and the value is already encoded by base64. Need to handle 
internal and external catalog separately.
-                if (catalogId != InternalCatalog.INTERNAL_CATALOG_ID && 
min.equalsIgnoreCase("NULL")) {
+            return ColumnStatistic.UNKNOWN;
+        }
+        String min = row.get(10);
+        String max = row.get(11);
+        if (min != null && !min.equalsIgnoreCase("NULL")) {
+            // Internal catalog get the min/max value using a separate SQL,
+            // and the value is already encoded by base64. Need to handle 
internal and external catalog separately.
+            if (catalogId != InternalCatalog.INTERNAL_CATALOG_ID && 
min.equalsIgnoreCase("NULL")) {
+                columnStatisticBuilder.setMinValue(Double.NEGATIVE_INFINITY);
+            } else {
+                try {
+                    
columnStatisticBuilder.setMinValue(StatisticsUtil.convertToDouble(col.getType(),
 min));
+                    
columnStatisticBuilder.setMinExpr(StatisticsUtil.readableValue(col.getType(), 
min));
+                } catch (AnalysisException e) {
+                    LOG.warn("Failed to deserialize column {} min value {}.", 
col, min, e);
                     
columnStatisticBuilder.setMinValue(Double.NEGATIVE_INFINITY);
-                } else {
-                    try {
-                        
columnStatisticBuilder.setMinValue(StatisticsUtil.convertToDouble(col.getType(),
 min));
-                        
columnStatisticBuilder.setMinExpr(StatisticsUtil.readableValue(col.getType(), 
min));
-                    } catch (AnalysisException e) {
-                        LOG.warn("Failed to deserialize column {} min value 
{}.", col, min, e);
-                        
columnStatisticBuilder.setMinValue(Double.NEGATIVE_INFINITY);
-                    }
                 }
-            } else {
-                columnStatisticBuilder.setMinValue(Double.NEGATIVE_INFINITY);
             }
-            if (max != null && !max.equalsIgnoreCase("NULL")) {
-                if (catalogId != InternalCatalog.INTERNAL_CATALOG_ID && 
max.equalsIgnoreCase("NULL")) {
+        } else {
+            columnStatisticBuilder.setMinValue(Double.NEGATIVE_INFINITY);
+        }
+        if (max != null && !max.equalsIgnoreCase("NULL")) {
+            if (catalogId != InternalCatalog.INTERNAL_CATALOG_ID && 
max.equalsIgnoreCase("NULL")) {
+                columnStatisticBuilder.setMaxValue(Double.POSITIVE_INFINITY);
+            } else {
+                try {
+                    
columnStatisticBuilder.setMaxValue(StatisticsUtil.convertToDouble(col.getType(),
 max));
+                    
columnStatisticBuilder.setMaxExpr(StatisticsUtil.readableValue(col.getType(), 
max));
+                } catch (AnalysisException e) {
+                    LOG.warn("Failed to deserialize column {} max value {}.", 
col, max, e);
                     
columnStatisticBuilder.setMaxValue(Double.POSITIVE_INFINITY);
-                } else {
-                    try {
-                        
columnStatisticBuilder.setMaxValue(StatisticsUtil.convertToDouble(col.getType(),
 max));
-                        
columnStatisticBuilder.setMaxExpr(StatisticsUtil.readableValue(col.getType(), 
max));
-                    } catch (AnalysisException e) {
-                        LOG.warn("Failed to deserialize column {} max value 
{}.", col, max, e);
-                        
columnStatisticBuilder.setMaxValue(Double.POSITIVE_INFINITY);
-                    }
                 }
-            } else {
-                columnStatisticBuilder.setMaxValue(Double.POSITIVE_INFINITY);
-            }
-            columnStatisticBuilder.setUpdatedTime(row.get(13));
-            return columnStatisticBuilder.build();
-        } catch (Exception e) {
-            LOG.warn("Failed to deserialize column statistics. reason: [{}]. 
Row [{}]", e.getMessage(), row);
-            if (LOG.isDebugEnabled()) {
-                LOG.debug(e);
             }
-            return ColumnStatistic.UNKNOWN;
+        } else {
+            columnStatisticBuilder.setMaxValue(Double.POSITIVE_INFINITY);
         }
+        columnStatisticBuilder.setUpdatedTime(row.get(13));
+        return columnStatisticBuilder.build();
     }
 
     public static boolean isAlmostUnique(double ndv, double rowCount) {
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticsCacheLoader.java
 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticsCacheLoader.java
index fba1a4d7b72..ec98ee5af15 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticsCacheLoader.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticsCacheLoader.java
@@ -18,7 +18,6 @@
 package org.apache.doris.statistics;
 
 import org.apache.doris.catalog.TableIf;
-import org.apache.doris.qe.InternalQueryExecutionException;
 import org.apache.doris.statistics.util.StatisticsUtil;
 
 import org.apache.logging.log4j.LogManager;
@@ -33,21 +32,14 @@ public class ColumnStatisticsCacheLoader extends 
BasicAsyncCacheLoader<Statistic
 
     @Override
     protected Optional<ColumnStatistic> doLoad(StatisticsCacheKey key) {
-        Optional<ColumnStatistic> columnStatistic = Optional.empty();
+        Optional<ColumnStatistic> columnStatistic;
         try {
             // Load from statistics table.
             columnStatistic = loadFromStatsTable(key);
             if (!columnStatistic.isPresent()) {
                 // Load from data source metadata
-                try {
-                    TableIf table = StatisticsUtil.findTable(key.catalogId, 
key.dbId, key.tableId);
-                    columnStatistic = table.getColumnStatistic(key.colName);
-                } catch (Exception e) {
-                    if (LOG.isDebugEnabled()) {
-                        LOG.debug("Exception to get column statistics by 
metadata. [Catalog:{}, DB:{}, Table:{}]",
-                                key.catalogId, key.dbId, key.tableId, e);
-                    }
-                }
+                TableIf table = StatisticsUtil.findTable(key.catalogId, 
key.dbId, key.tableId);
+                columnStatistic = table.getColumnStatistic(key.colName);
             }
         } catch (Throwable t) {
             LOG.warn("Failed to load stats for column [Catalog:{}, DB:{}, 
Table:{}, Column:{}], Reason: {}",
@@ -55,6 +47,7 @@ public class ColumnStatisticsCacheLoader extends 
BasicAsyncCacheLoader<Statistic
             if (LOG.isDebugEnabled()) {
                 LOG.debug(t);
             }
+            return null;
         }
         if (columnStatistic.isPresent()) {
             // For non-empty table, return UNKNOWN if we can't collect ndv 
value.
@@ -68,22 +61,9 @@ public class ColumnStatisticsCacheLoader extends 
BasicAsyncCacheLoader<Statistic
     }
 
     private Optional<ColumnStatistic> loadFromStatsTable(StatisticsCacheKey 
key) {
-        List<ResultRow> columnResults;
-        try {
-            columnResults = StatisticsRepository.loadColStats(
-                    key.catalogId, key.dbId, key.tableId, key.idxId, 
key.colName);
-        } catch (InternalQueryExecutionException e) {
-            LOG.info("Failed to load stats for table {} column {}. Reason:{}",
-                    key.tableId, key.colName, e.getMessage());
-            return Optional.empty();
-        }
-        ColumnStatistic columnStatistics;
-        try {
-            columnStatistics = 
StatisticsUtil.deserializeToColumnStatistics(columnResults);
-        } catch (Exception e) {
-            LOG.warn("Exception to deserialize column statistics", e);
-            return Optional.empty();
-        }
+        List<ResultRow> columnResults
+                = StatisticsRepository.loadColStats(key.catalogId, key.dbId, 
key.tableId, key.idxId, key.colName);
+        ColumnStatistic columnStatistics = 
StatisticsUtil.deserializeToColumnStatistics(columnResults);
         if (columnStatistics == null) {
             return Optional.empty();
         } else {
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/PartitionColumnStatistic.java
 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/PartitionColumnStatistic.java
index d755392e79f..eebe910d8b0 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/PartitionColumnStatistic.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/PartitionColumnStatistic.java
@@ -29,6 +29,7 @@ import org.apache.logging.log4j.Logger;
 
 import java.io.ByteArrayInputStream;
 import java.io.DataInputStream;
+import java.io.IOException;
 import java.util.Base64;
 import java.util.List;
 import java.util.StringJoiner;
@@ -76,7 +77,7 @@ public class PartitionColumnStatistic {
         this.updatedTime = updatedTime;
     }
 
-    public static PartitionColumnStatistic fromResultRow(List<ResultRow> 
resultRows) {
+    public static PartitionColumnStatistic fromResultRow(List<ResultRow> 
resultRows) throws IOException {
         if (resultRows == null || resultRows.isEmpty()) {
             return PartitionColumnStatistic.UNKNOWN;
         }
@@ -90,80 +91,68 @@ public class PartitionColumnStatistic {
                     stringJoiner.toString());
             return PartitionColumnStatistic.UNKNOWN;
         }
-        try {
-            return fromResultRow(resultRows.get(0));
-        } catch (Throwable t) {
-            if (LOG.isDebugEnabled()) {
-                LOG.debug("Failed to deserialize column stats", t);
-            }
-            return PartitionColumnStatistic.UNKNOWN;
-        }
+        return fromResultRow(resultRows.get(0));
     }
 
-    public static PartitionColumnStatistic fromResultRow(ResultRow row) {
+    public static PartitionColumnStatistic fromResultRow(ResultRow row) throws 
IOException {
         // row : [catalog_id, db_id, tbl_id, idx_id, part_name, col_id,
         //        count, ndv, null_count, min, max, data_size, update_time]
-        try {
-            long catalogId = Long.parseLong(row.get(0));
-            long dbID = Long.parseLong(row.get(1));
-            long tblId = Long.parseLong(row.get(2));
-            long idxId = Long.parseLong(row.get(3));
-            String colName = row.get(5);
-            Column col = StatisticsUtil.findColumn(catalogId, dbID, tblId, 
idxId, colName);
-            if (col == null) {
-                LOG.info("Failed to deserialize column statistics, ctlId: {} 
dbId: {}, "
-                        + "tblId: {} column: {} not exists", catalogId, dbID, 
tblId, colName);
-                return PartitionColumnStatistic.UNKNOWN;
-            }
+        long catalogId = Long.parseLong(row.get(0));
+        long dbID = Long.parseLong(row.get(1));
+        long tblId = Long.parseLong(row.get(2));
+        long idxId = Long.parseLong(row.get(3));
+        String colName = row.get(5);
+        Column col = StatisticsUtil.findColumn(catalogId, dbID, tblId, idxId, 
colName);
+        if (col == null) {
+            LOG.info("Failed to deserialize column statistics, ctlId: {} dbId: 
{}, "
+                    + "tblId: {} column: {} not exists", catalogId, dbID, 
tblId, colName);
+            return PartitionColumnStatistic.UNKNOWN;
+        }
 
-            PartitionColumnStatisticBuilder partitionStatisticBuilder = new 
PartitionColumnStatisticBuilder();
-            double count = Double.parseDouble(row.get(6));
-            partitionStatisticBuilder.setCount(count);
-            String ndv = row.get(7);
-            Base64.Decoder decoder = Base64.getDecoder();
-            DataInputStream dis = new DataInputStream(new 
ByteArrayInputStream(decoder.decode(ndv)));
-            Hll hll = new Hll();
-            if (!hll.deserialize(dis)) {
-                LOG.warn("Failed to deserialize ndv. [{}]", row);
-                return PartitionColumnStatistic.UNKNOWN;
-            }
-            partitionStatisticBuilder.setNdv(Hll128.fromHll(hll));
-            String nullCount = row.getWithDefault(8, "0");
-            
partitionStatisticBuilder.setNumNulls(Double.parseDouble(nullCount));
-            partitionStatisticBuilder.setDataSize(Double
-                    .parseDouble(row.getWithDefault(11, "0")));
-            
partitionStatisticBuilder.setAvgSizeByte(partitionStatisticBuilder.getCount() 
== 0
-                    ? 0 : partitionStatisticBuilder.getDataSize()
-                    / partitionStatisticBuilder.getCount());
-            String min = row.get(9);
-            String max = row.get(10);
-            if (min != null && !"NULL".equalsIgnoreCase(min)) {
-                try {
-                    
partitionStatisticBuilder.setMinValue(StatisticsUtil.convertToDouble(col.getType(),
 min));
-                    
partitionStatisticBuilder.setMinExpr(StatisticsUtil.readableValue(col.getType(),
 min));
-                } catch (AnalysisException e) {
-                    LOG.warn("Failed to deserialize column {} min value {}.", 
col, min, e);
-                    
partitionStatisticBuilder.setMinValue(Double.NEGATIVE_INFINITY);
-                }
-            } else {
+        PartitionColumnStatisticBuilder partitionStatisticBuilder = new 
PartitionColumnStatisticBuilder();
+        double count = Double.parseDouble(row.get(6));
+        partitionStatisticBuilder.setCount(count);
+        String ndv = row.get(7);
+        Base64.Decoder decoder = Base64.getDecoder();
+        DataInputStream dis = new DataInputStream(new 
ByteArrayInputStream(decoder.decode(ndv)));
+        Hll hll = new Hll();
+        if (!hll.deserialize(dis)) {
+            LOG.warn("Failed to deserialize ndv. [{}]", row);
+            return PartitionColumnStatistic.UNKNOWN;
+        }
+        partitionStatisticBuilder.setNdv(Hll128.fromHll(hll));
+        String nullCount = row.getWithDefault(8, "0");
+        partitionStatisticBuilder.setNumNulls(Double.parseDouble(nullCount));
+        partitionStatisticBuilder.setDataSize(Double
+                .parseDouble(row.getWithDefault(11, "0")));
+        
partitionStatisticBuilder.setAvgSizeByte(partitionStatisticBuilder.getCount() 
== 0
+                ? 0 : partitionStatisticBuilder.getDataSize()
+                / partitionStatisticBuilder.getCount());
+        String min = row.get(9);
+        String max = row.get(10);
+        if (min != null && !"NULL".equalsIgnoreCase(min)) {
+            try {
+                
partitionStatisticBuilder.setMinValue(StatisticsUtil.convertToDouble(col.getType(),
 min));
+                
partitionStatisticBuilder.setMinExpr(StatisticsUtil.readableValue(col.getType(),
 min));
+            } catch (AnalysisException e) {
+                LOG.warn("Failed to deserialize column {} min value {}.", col, 
min, e);
                 
partitionStatisticBuilder.setMinValue(Double.NEGATIVE_INFINITY);
             }
-            if (max != null && !"NULL".equalsIgnoreCase(max)) {
-                try {
-                    
partitionStatisticBuilder.setMaxValue(StatisticsUtil.convertToDouble(col.getType(),
 max));
-                    
partitionStatisticBuilder.setMaxExpr(StatisticsUtil.readableValue(col.getType(),
 max));
-                } catch (AnalysisException e) {
-                    LOG.warn("Failed to deserialize column {} max value {}.", 
col, max, e);
-                    
partitionStatisticBuilder.setMaxValue(Double.POSITIVE_INFINITY);
-                }
-            } else {
+        } else {
+            partitionStatisticBuilder.setMinValue(Double.NEGATIVE_INFINITY);
+        }
+        if (max != null && !"NULL".equalsIgnoreCase(max)) {
+            try {
+                
partitionStatisticBuilder.setMaxValue(StatisticsUtil.convertToDouble(col.getType(),
 max));
+                
partitionStatisticBuilder.setMaxExpr(StatisticsUtil.readableValue(col.getType(),
 max));
+            } catch (AnalysisException e) {
+                LOG.warn("Failed to deserialize column {} max value {}.", col, 
max, e);
                 
partitionStatisticBuilder.setMaxValue(Double.POSITIVE_INFINITY);
             }
-            partitionStatisticBuilder.setUpdatedTime(row.get(12));
-            return partitionStatisticBuilder.build();
-        } catch (Exception e) {
-            LOG.warn("Failed to deserialize column statistics. Row [{}]", row, 
e);
-            return PartitionColumnStatistic.UNKNOWN;
+        } else {
+            partitionStatisticBuilder.setMaxValue(Double.POSITIVE_INFINITY);
         }
+        partitionStatisticBuilder.setUpdatedTime(row.get(12));
+        return partitionStatisticBuilder.build();
     }
 }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/PartitionColumnStatisticCacheLoader.java
 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/PartitionColumnStatisticCacheLoader.java
index c365f6b1a74..7154ad2e819 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/PartitionColumnStatisticCacheLoader.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/PartitionColumnStatisticCacheLoader.java
@@ -17,12 +17,12 @@
 
 package org.apache.doris.statistics;
 
-import org.apache.doris.qe.InternalQueryExecutionException;
 import org.apache.doris.statistics.util.StatisticsUtil;
 
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
 
+import java.io.IOException;
 import java.util.List;
 import java.util.Optional;
 
@@ -33,7 +33,7 @@ public class PartitionColumnStatisticCacheLoader extends
 
     @Override
     protected Optional<PartitionColumnStatistic> 
doLoad(PartitionColumnStatisticCacheKey key) {
-        Optional<PartitionColumnStatistic> partitionStatistic = 
Optional.empty();
+        Optional<PartitionColumnStatistic> partitionStatistic;
         try {
             partitionStatistic = loadFromPartitionStatsTable(key);
         } catch (Throwable t) {
@@ -42,6 +42,7 @@ public class PartitionColumnStatisticCacheLoader extends
             if (LOG.isDebugEnabled()) {
                 LOG.debug(t);
             }
+            return null;
         }
         if (partitionStatistic.isPresent()) {
             // For non-empty table, return UNKNOWN if we can't collect ndv 
value.
@@ -54,24 +55,11 @@ public class PartitionColumnStatisticCacheLoader extends
         return partitionStatistic;
     }
 
-    private Optional<PartitionColumnStatistic> 
loadFromPartitionStatsTable(PartitionColumnStatisticCacheKey key) {
-        List<ResultRow> partitionResults;
-        try {
-            String partName = "'" + StatisticsUtil.escapeSQL(key.partId) + "'";
-            partitionResults = StatisticsRepository.loadPartitionColumnStats(
+    private Optional<PartitionColumnStatistic> 
loadFromPartitionStatsTable(PartitionColumnStatisticCacheKey key)
+            throws IOException {
+        String partName = "'" + StatisticsUtil.escapeSQL(key.partId) + "'";
+        List<ResultRow> partitionResults = 
StatisticsRepository.loadPartitionColumnStats(
                 key.catalogId, key.dbId, key.tableId, key.idxId, partName, 
key.colName);
-        } catch (InternalQueryExecutionException e) {
-            LOG.info("Failed to load stats for table {} column {}. Reason:{}",
-                    key.tableId, key.colName, e.getMessage());
-            return Optional.empty();
-        }
-        PartitionColumnStatistic partitionStatistic;
-        try {
-            partitionStatistic = 
StatisticsUtil.deserializeToPartitionStatistics(partitionResults);
-        } catch (Exception e) {
-            LOG.warn("Exception to deserialize partition statistics", e);
-            return Optional.empty();
-        }
-        return Optional.ofNullable(partitionStatistic);
+        return 
Optional.ofNullable(StatisticsUtil.deserializeToPartitionStatistics(partitionResults));
     }
 }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
index 6202d40f1d9..76fb22a60e4 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
@@ -126,11 +126,20 @@ public class StatisticsRepository {
 
     public static ColumnStatistic queryColumnStatisticsByName(
             long ctlId, long dbId, long tableId, long indexId, String colName) 
{
+        ColumnStatistic columnStatistic = ColumnStatistic.UNKNOWN;
         ResultRow resultRow = queryColumnStatisticById(ctlId, dbId, tableId, 
indexId, colName);
         if (resultRow == null) {
-            return ColumnStatistic.UNKNOWN;
+            return columnStatistic;
         }
-        return ColumnStatistic.fromResultRow(resultRow);
+        try {
+            columnStatistic = ColumnStatistic.fromResultRow(resultRow);
+        } catch (Exception e) {
+            LOG.warn("Failed to deserialize column statistics. reason: [{}]. 
Row [{}]", e.getMessage(), resultRow);
+            if (LOG.isDebugEnabled()) {
+                LOG.debug(e);
+            }
+        }
+        return columnStatistic;
     }
 
     public static List<ResultRow> queryColumnStatisticsByPartitions(TableIf 
table, Set<String> columnNames,
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
index 7937040d323..0f5c81b1cf0 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
@@ -180,8 +180,7 @@ public class StatisticsUtil {
         }
     }
 
-    public static ColumnStatistic 
deserializeToColumnStatistics(List<ResultRow> resultBatches)
-            throws Exception {
+    public static ColumnStatistic 
deserializeToColumnStatistics(List<ResultRow> resultBatches) {
         if (CollectionUtils.isEmpty(resultBatches)) {
             return null;
         }
@@ -192,7 +191,8 @@ public class StatisticsUtil {
         return 
resultBatches.stream().map(Histogram::fromResultRow).collect(Collectors.toList());
     }
 
-    public static PartitionColumnStatistic 
deserializeToPartitionStatistics(List<ResultRow> resultBatches) {
+    public static PartitionColumnStatistic 
deserializeToPartitionStatistics(List<ResultRow> resultBatches)
+            throws IOException {
         if (CollectionUtils.isEmpty(resultBatches)) {
             return null;
         }
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/statistics/CacheTest.java 
b/fe/fe-core/src/test/java/org/apache/doris/statistics/CacheTest.java
index 729291d5323..9c0b7fff33c 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/statistics/CacheTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/CacheTest.java
@@ -388,4 +388,48 @@ public class CacheTest extends TestWithFeService {
         Thread.sleep(100);
         Assertions.assertEquals(1, 
columnStatisticsCache.synchronous().asMap().size());
     }
+
+    @Test
+    public void testLoadWithException() throws Exception {
+        new MockUp<ColumnStatisticsCacheLoader>() {
+            @Mock
+            protected Optional<ColumnStatistic> doLoad(StatisticsCacheKey key) 
{
+                return null;
+            }
+        };
+        StatisticsCache statisticsCache = new StatisticsCache();
+        ColumnStatistic columnStatistic = 
statisticsCache.getColumnStatistics(1, 1, 1, -1, "col");
+        Thread.sleep(3000);
+        Assertions.assertTrue(columnStatistic.isUnKnown);
+
+        new MockUp<ColumnStatisticsCacheLoader>() {
+            @Mock
+            protected Optional<ColumnStatistic> doLoad(StatisticsCacheKey key) 
{
+                return Optional.of(new ColumnStatistic(1, 2,
+                    null, 3, 4, 5, 6, 7,
+                    null, null, false,
+                        new Date().toString()));
+            }
+        };
+        columnStatistic = statisticsCache.getColumnStatistics(1, 1, 1, -1, 
"col");
+        for (int i = 0; i < 60; i++) {
+            columnStatistic = statisticsCache.getColumnStatistics(1, 1, 1, -1, 
"col");
+            if (columnStatistic != ColumnStatistic.UNKNOWN) {
+                break;
+            }
+            System.out.println("Not ready yet.");
+            Thread.sleep(1000);
+        }
+        if (columnStatistic != ColumnStatistic.UNKNOWN) {
+            Assertions.assertEquals(1, columnStatistic.count);
+            Assertions.assertEquals(2, columnStatistic.ndv);
+            Assertions.assertEquals(3, columnStatistic.avgSizeByte);
+            Assertions.assertEquals(4, columnStatistic.numNulls);
+            Assertions.assertEquals(5, columnStatistic.dataSize);
+            Assertions.assertEquals(6, columnStatistic.minValue);
+            Assertions.assertEquals(7, columnStatistic.maxValue);
+        } else {
+            Assertions.fail("Column stats is still unknown");
+        }
+    }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to