This is an automated email from the ASF dual-hosted git repository. lijibing pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 294acab3f91 [improvement](statistics)Improve statistics cache loading logic. (#38829) 294acab3f91 is described below commit 294acab3f9136002553e182f520a61dea772df7a Author: Jibing-Li <64681310+jibing...@users.noreply.github.com> AuthorDate: Tue Aug 6 10:22:32 2024 +0800 [improvement](statistics)Improve statistics cache loading logic. (#38829) Improve statistics cache loading logic. When loading operation throw an exception, do not cache UNKNOWN, so it can try to load again next time. --- .../doris/datasource/hive/HMSExternalTable.java | 12 +- .../java/org/apache/doris/qe/ShowExecutor.java | 9 +- .../apache/doris/statistics/AnalysisManager.java | 8 +- .../apache/doris/statistics/ColumnStatistic.java | 140 +++++++++------------ .../statistics/ColumnStatisticsCacheLoader.java | 34 ++--- .../doris/statistics/PartitionColumnStatistic.java | 121 ++++++++---------- .../PartitionColumnStatisticCacheLoader.java | 28 ++--- .../doris/statistics/StatisticsRepository.java | 13 +- .../doris/statistics/util/StatisticsUtil.java | 6 +- .../org/apache/doris/statistics/CacheTest.java | 44 +++++++ 10 files changed, 205 insertions(+), 210 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java index 067f2231a4b..5692f61df0c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java @@ -610,21 +610,13 @@ public class HMSExternalTable extends ExternalTable implements MTMVRelatedTableI continue; } ColumnStatisticsData data = tableStat.getStatsData(); - try { - setStatData(column, data, columnStatisticBuilder, count); - } catch (AnalysisException e) { - if (LOG.isDebugEnabled()) { - LOG.debug(e); - } - return Optional.empty(); - } + setStatData(column, data, columnStatisticBuilder, count); } return Optional.of(columnStatisticBuilder.build()); } - private void setStatData(Column col, ColumnStatisticsData data, ColumnStatisticBuilder builder, long count) - throws AnalysisException { + private void setStatData(Column col, ColumnStatisticsData data, ColumnStatisticBuilder builder, long count) { long ndv = 0; long nulls = 0; double colSize = 0; diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java index 7340f2affcc..a3d1ca313ae 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java @@ -2695,7 +2695,14 @@ public class ShowExecutor { if (indexName == null) { continue; } - columnStatistics.add(Pair.of(Pair.of(indexName, row.get(5)), ColumnStatistic.fromResultRow(row))); + try { + columnStatistics.add(Pair.of(Pair.of(indexName, row.get(5)), ColumnStatistic.fromResultRow(row))); + } catch (Exception e) { + LOG.warn("Failed to deserialize column statistics. reason: [{}]. Row [{}]", e.getMessage(), row); + if (LOG.isDebugEnabled()) { + LOG.debug(e); + } + } } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java index 0ebbfd79811..0f9e833d496 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java @@ -834,8 +834,12 @@ public class AnalysisManager implements Writable { // count, ndv, null_count, min, max, data_size, update_time] StatisticsCache cache = Env.getCurrentEnv().getStatisticsCache(); for (ResultRow row : resultRows) { - cache.updatePartitionColStatsCache(catalogId, dbId, tableId, indexId, row.get(4), colName, - PartitionColumnStatistic.fromResultRow(row)); + try { + cache.updatePartitionColStatsCache(catalogId, dbId, tableId, indexId, row.get(4), colName, + PartitionColumnStatistic.fromResultRow(row)); + } catch (Exception e) { + cache.invalidatePartitionColumnStatsCache(catalogId, dbId, tableId, indexId, row.get(4), colName); + } } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java index 639d5ef8346..9713d2d30e1 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java @@ -108,100 +108,82 @@ public class ColumnStatistic { } public static ColumnStatistic fromResultRow(List<ResultRow> resultRows) { - ColumnStatistic columnStatistic = null; - try { - for (ResultRow resultRow : resultRows) { - String partId = resultRow.get(6); - if (partId == null) { - columnStatistic = fromResultRow(resultRow); - } else { - LOG.warn("Column statistics table shouldn't contain partition stats. [{}]", resultRow); - } - } - } catch (Throwable t) { - if (LOG.isDebugEnabled()) { - LOG.debug("Failed to deserialize column stats", t); + ColumnStatistic columnStatistic = ColumnStatistic.UNKNOWN; + for (ResultRow resultRow : resultRows) { + String partId = resultRow.get(6); + if (partId == null) { + columnStatistic = fromResultRow(resultRow); + } else { + LOG.warn("Column statistics table shouldn't contain partition stats. [{}]", resultRow); } - return ColumnStatistic.UNKNOWN; - } - if (columnStatistic == null) { - return ColumnStatistic.UNKNOWN; } return columnStatistic; } // TODO: use thrift public static ColumnStatistic fromResultRow(ResultRow row) { - try { - ColumnStatisticBuilder columnStatisticBuilder = new ColumnStatisticBuilder(); - double count = Double.parseDouble(row.get(7)); - columnStatisticBuilder.setCount(count); - double ndv = Double.parseDouble(row.getWithDefault(8, "0")); - columnStatisticBuilder.setNdv(ndv); - String nullCount = row.getWithDefault(9, "0"); - columnStatisticBuilder.setNumNulls(Double.parseDouble(nullCount)); - columnStatisticBuilder.setDataSize(Double - .parseDouble(row.getWithDefault(12, "0"))); - columnStatisticBuilder.setAvgSizeByte(columnStatisticBuilder.getCount() == 0 - ? 0 : columnStatisticBuilder.getDataSize() - / columnStatisticBuilder.getCount()); - long catalogId = Long.parseLong(row.get(1)); - long idxId = Long.parseLong(row.get(4)); - long dbID = Long.parseLong(row.get(2)); - long tblId = Long.parseLong(row.get(3)); - String colName = row.get(5); - Column col = StatisticsUtil.findColumn(catalogId, dbID, tblId, idxId, colName); - if (col == null) { - if (LOG.isDebugEnabled()) { - LOG.debug("Failed to deserialize column statistics, ctlId: {} dbId: {}" - + "tblId: {} column: {} not exists", - catalogId, dbID, tblId, colName); - } - return ColumnStatistic.UNKNOWN; + ColumnStatisticBuilder columnStatisticBuilder = new ColumnStatisticBuilder(); + double count = Double.parseDouble(row.get(7)); + columnStatisticBuilder.setCount(count); + double ndv = Double.parseDouble(row.getWithDefault(8, "0")); + columnStatisticBuilder.setNdv(ndv); + String nullCount = row.getWithDefault(9, "0"); + columnStatisticBuilder.setNumNulls(Double.parseDouble(nullCount)); + columnStatisticBuilder.setDataSize(Double + .parseDouble(row.getWithDefault(12, "0"))); + columnStatisticBuilder.setAvgSizeByte(columnStatisticBuilder.getCount() == 0 + ? 0 : columnStatisticBuilder.getDataSize() + / columnStatisticBuilder.getCount()); + long catalogId = Long.parseLong(row.get(1)); + long idxId = Long.parseLong(row.get(4)); + long dbID = Long.parseLong(row.get(2)); + long tblId = Long.parseLong(row.get(3)); + String colName = row.get(5); + Column col = StatisticsUtil.findColumn(catalogId, dbID, tblId, idxId, colName); + if (col == null) { + if (LOG.isDebugEnabled()) { + LOG.debug("Failed to deserialize column statistics, ctlId: {} dbId: {}" + + "tblId: {} column: {} not exists", + catalogId, dbID, tblId, colName); } - String min = row.get(10); - String max = row.get(11); - if (min != null && !min.equalsIgnoreCase("NULL")) { - // Internal catalog get the min/max value using a separate SQL, - // and the value is already encoded by base64. Need to handle internal and external catalog separately. - if (catalogId != InternalCatalog.INTERNAL_CATALOG_ID && min.equalsIgnoreCase("NULL")) { + return ColumnStatistic.UNKNOWN; + } + String min = row.get(10); + String max = row.get(11); + if (min != null && !min.equalsIgnoreCase("NULL")) { + // Internal catalog get the min/max value using a separate SQL, + // and the value is already encoded by base64. Need to handle internal and external catalog separately. + if (catalogId != InternalCatalog.INTERNAL_CATALOG_ID && min.equalsIgnoreCase("NULL")) { + columnStatisticBuilder.setMinValue(Double.NEGATIVE_INFINITY); + } else { + try { + columnStatisticBuilder.setMinValue(StatisticsUtil.convertToDouble(col.getType(), min)); + columnStatisticBuilder.setMinExpr(StatisticsUtil.readableValue(col.getType(), min)); + } catch (AnalysisException e) { + LOG.warn("Failed to deserialize column {} min value {}.", col, min, e); columnStatisticBuilder.setMinValue(Double.NEGATIVE_INFINITY); - } else { - try { - columnStatisticBuilder.setMinValue(StatisticsUtil.convertToDouble(col.getType(), min)); - columnStatisticBuilder.setMinExpr(StatisticsUtil.readableValue(col.getType(), min)); - } catch (AnalysisException e) { - LOG.warn("Failed to deserialize column {} min value {}.", col, min, e); - columnStatisticBuilder.setMinValue(Double.NEGATIVE_INFINITY); - } } - } else { - columnStatisticBuilder.setMinValue(Double.NEGATIVE_INFINITY); } - if (max != null && !max.equalsIgnoreCase("NULL")) { - if (catalogId != InternalCatalog.INTERNAL_CATALOG_ID && max.equalsIgnoreCase("NULL")) { + } else { + columnStatisticBuilder.setMinValue(Double.NEGATIVE_INFINITY); + } + if (max != null && !max.equalsIgnoreCase("NULL")) { + if (catalogId != InternalCatalog.INTERNAL_CATALOG_ID && max.equalsIgnoreCase("NULL")) { + columnStatisticBuilder.setMaxValue(Double.POSITIVE_INFINITY); + } else { + try { + columnStatisticBuilder.setMaxValue(StatisticsUtil.convertToDouble(col.getType(), max)); + columnStatisticBuilder.setMaxExpr(StatisticsUtil.readableValue(col.getType(), max)); + } catch (AnalysisException e) { + LOG.warn("Failed to deserialize column {} max value {}.", col, max, e); columnStatisticBuilder.setMaxValue(Double.POSITIVE_INFINITY); - } else { - try { - columnStatisticBuilder.setMaxValue(StatisticsUtil.convertToDouble(col.getType(), max)); - columnStatisticBuilder.setMaxExpr(StatisticsUtil.readableValue(col.getType(), max)); - } catch (AnalysisException e) { - LOG.warn("Failed to deserialize column {} max value {}.", col, max, e); - columnStatisticBuilder.setMaxValue(Double.POSITIVE_INFINITY); - } } - } else { - columnStatisticBuilder.setMaxValue(Double.POSITIVE_INFINITY); - } - columnStatisticBuilder.setUpdatedTime(row.get(13)); - return columnStatisticBuilder.build(); - } catch (Exception e) { - LOG.warn("Failed to deserialize column statistics. reason: [{}]. Row [{}]", e.getMessage(), row); - if (LOG.isDebugEnabled()) { - LOG.debug(e); } - return ColumnStatistic.UNKNOWN; + } else { + columnStatisticBuilder.setMaxValue(Double.POSITIVE_INFINITY); } + columnStatisticBuilder.setUpdatedTime(row.get(13)); + return columnStatisticBuilder.build(); } public static boolean isAlmostUnique(double ndv, double rowCount) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticsCacheLoader.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticsCacheLoader.java index fba1a4d7b72..ec98ee5af15 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticsCacheLoader.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticsCacheLoader.java @@ -18,7 +18,6 @@ package org.apache.doris.statistics; import org.apache.doris.catalog.TableIf; -import org.apache.doris.qe.InternalQueryExecutionException; import org.apache.doris.statistics.util.StatisticsUtil; import org.apache.logging.log4j.LogManager; @@ -33,21 +32,14 @@ public class ColumnStatisticsCacheLoader extends BasicAsyncCacheLoader<Statistic @Override protected Optional<ColumnStatistic> doLoad(StatisticsCacheKey key) { - Optional<ColumnStatistic> columnStatistic = Optional.empty(); + Optional<ColumnStatistic> columnStatistic; try { // Load from statistics table. columnStatistic = loadFromStatsTable(key); if (!columnStatistic.isPresent()) { // Load from data source metadata - try { - TableIf table = StatisticsUtil.findTable(key.catalogId, key.dbId, key.tableId); - columnStatistic = table.getColumnStatistic(key.colName); - } catch (Exception e) { - if (LOG.isDebugEnabled()) { - LOG.debug("Exception to get column statistics by metadata. [Catalog:{}, DB:{}, Table:{}]", - key.catalogId, key.dbId, key.tableId, e); - } - } + TableIf table = StatisticsUtil.findTable(key.catalogId, key.dbId, key.tableId); + columnStatistic = table.getColumnStatistic(key.colName); } } catch (Throwable t) { LOG.warn("Failed to load stats for column [Catalog:{}, DB:{}, Table:{}, Column:{}], Reason: {}", @@ -55,6 +47,7 @@ public class ColumnStatisticsCacheLoader extends BasicAsyncCacheLoader<Statistic if (LOG.isDebugEnabled()) { LOG.debug(t); } + return null; } if (columnStatistic.isPresent()) { // For non-empty table, return UNKNOWN if we can't collect ndv value. @@ -68,22 +61,9 @@ public class ColumnStatisticsCacheLoader extends BasicAsyncCacheLoader<Statistic } private Optional<ColumnStatistic> loadFromStatsTable(StatisticsCacheKey key) { - List<ResultRow> columnResults; - try { - columnResults = StatisticsRepository.loadColStats( - key.catalogId, key.dbId, key.tableId, key.idxId, key.colName); - } catch (InternalQueryExecutionException e) { - LOG.info("Failed to load stats for table {} column {}. Reason:{}", - key.tableId, key.colName, e.getMessage()); - return Optional.empty(); - } - ColumnStatistic columnStatistics; - try { - columnStatistics = StatisticsUtil.deserializeToColumnStatistics(columnResults); - } catch (Exception e) { - LOG.warn("Exception to deserialize column statistics", e); - return Optional.empty(); - } + List<ResultRow> columnResults + = StatisticsRepository.loadColStats(key.catalogId, key.dbId, key.tableId, key.idxId, key.colName); + ColumnStatistic columnStatistics = StatisticsUtil.deserializeToColumnStatistics(columnResults); if (columnStatistics == null) { return Optional.empty(); } else { diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/PartitionColumnStatistic.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/PartitionColumnStatistic.java index d755392e79f..eebe910d8b0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/PartitionColumnStatistic.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/PartitionColumnStatistic.java @@ -29,6 +29,7 @@ import org.apache.logging.log4j.Logger; import java.io.ByteArrayInputStream; import java.io.DataInputStream; +import java.io.IOException; import java.util.Base64; import java.util.List; import java.util.StringJoiner; @@ -76,7 +77,7 @@ public class PartitionColumnStatistic { this.updatedTime = updatedTime; } - public static PartitionColumnStatistic fromResultRow(List<ResultRow> resultRows) { + public static PartitionColumnStatistic fromResultRow(List<ResultRow> resultRows) throws IOException { if (resultRows == null || resultRows.isEmpty()) { return PartitionColumnStatistic.UNKNOWN; } @@ -90,80 +91,68 @@ public class PartitionColumnStatistic { stringJoiner.toString()); return PartitionColumnStatistic.UNKNOWN; } - try { - return fromResultRow(resultRows.get(0)); - } catch (Throwable t) { - if (LOG.isDebugEnabled()) { - LOG.debug("Failed to deserialize column stats", t); - } - return PartitionColumnStatistic.UNKNOWN; - } + return fromResultRow(resultRows.get(0)); } - public static PartitionColumnStatistic fromResultRow(ResultRow row) { + public static PartitionColumnStatistic fromResultRow(ResultRow row) throws IOException { // row : [catalog_id, db_id, tbl_id, idx_id, part_name, col_id, // count, ndv, null_count, min, max, data_size, update_time] - try { - long catalogId = Long.parseLong(row.get(0)); - long dbID = Long.parseLong(row.get(1)); - long tblId = Long.parseLong(row.get(2)); - long idxId = Long.parseLong(row.get(3)); - String colName = row.get(5); - Column col = StatisticsUtil.findColumn(catalogId, dbID, tblId, idxId, colName); - if (col == null) { - LOG.info("Failed to deserialize column statistics, ctlId: {} dbId: {}, " - + "tblId: {} column: {} not exists", catalogId, dbID, tblId, colName); - return PartitionColumnStatistic.UNKNOWN; - } + long catalogId = Long.parseLong(row.get(0)); + long dbID = Long.parseLong(row.get(1)); + long tblId = Long.parseLong(row.get(2)); + long idxId = Long.parseLong(row.get(3)); + String colName = row.get(5); + Column col = StatisticsUtil.findColumn(catalogId, dbID, tblId, idxId, colName); + if (col == null) { + LOG.info("Failed to deserialize column statistics, ctlId: {} dbId: {}, " + + "tblId: {} column: {} not exists", catalogId, dbID, tblId, colName); + return PartitionColumnStatistic.UNKNOWN; + } - PartitionColumnStatisticBuilder partitionStatisticBuilder = new PartitionColumnStatisticBuilder(); - double count = Double.parseDouble(row.get(6)); - partitionStatisticBuilder.setCount(count); - String ndv = row.get(7); - Base64.Decoder decoder = Base64.getDecoder(); - DataInputStream dis = new DataInputStream(new ByteArrayInputStream(decoder.decode(ndv))); - Hll hll = new Hll(); - if (!hll.deserialize(dis)) { - LOG.warn("Failed to deserialize ndv. [{}]", row); - return PartitionColumnStatistic.UNKNOWN; - } - partitionStatisticBuilder.setNdv(Hll128.fromHll(hll)); - String nullCount = row.getWithDefault(8, "0"); - partitionStatisticBuilder.setNumNulls(Double.parseDouble(nullCount)); - partitionStatisticBuilder.setDataSize(Double - .parseDouble(row.getWithDefault(11, "0"))); - partitionStatisticBuilder.setAvgSizeByte(partitionStatisticBuilder.getCount() == 0 - ? 0 : partitionStatisticBuilder.getDataSize() - / partitionStatisticBuilder.getCount()); - String min = row.get(9); - String max = row.get(10); - if (min != null && !"NULL".equalsIgnoreCase(min)) { - try { - partitionStatisticBuilder.setMinValue(StatisticsUtil.convertToDouble(col.getType(), min)); - partitionStatisticBuilder.setMinExpr(StatisticsUtil.readableValue(col.getType(), min)); - } catch (AnalysisException e) { - LOG.warn("Failed to deserialize column {} min value {}.", col, min, e); - partitionStatisticBuilder.setMinValue(Double.NEGATIVE_INFINITY); - } - } else { + PartitionColumnStatisticBuilder partitionStatisticBuilder = new PartitionColumnStatisticBuilder(); + double count = Double.parseDouble(row.get(6)); + partitionStatisticBuilder.setCount(count); + String ndv = row.get(7); + Base64.Decoder decoder = Base64.getDecoder(); + DataInputStream dis = new DataInputStream(new ByteArrayInputStream(decoder.decode(ndv))); + Hll hll = new Hll(); + if (!hll.deserialize(dis)) { + LOG.warn("Failed to deserialize ndv. [{}]", row); + return PartitionColumnStatistic.UNKNOWN; + } + partitionStatisticBuilder.setNdv(Hll128.fromHll(hll)); + String nullCount = row.getWithDefault(8, "0"); + partitionStatisticBuilder.setNumNulls(Double.parseDouble(nullCount)); + partitionStatisticBuilder.setDataSize(Double + .parseDouble(row.getWithDefault(11, "0"))); + partitionStatisticBuilder.setAvgSizeByte(partitionStatisticBuilder.getCount() == 0 + ? 0 : partitionStatisticBuilder.getDataSize() + / partitionStatisticBuilder.getCount()); + String min = row.get(9); + String max = row.get(10); + if (min != null && !"NULL".equalsIgnoreCase(min)) { + try { + partitionStatisticBuilder.setMinValue(StatisticsUtil.convertToDouble(col.getType(), min)); + partitionStatisticBuilder.setMinExpr(StatisticsUtil.readableValue(col.getType(), min)); + } catch (AnalysisException e) { + LOG.warn("Failed to deserialize column {} min value {}.", col, min, e); partitionStatisticBuilder.setMinValue(Double.NEGATIVE_INFINITY); } - if (max != null && !"NULL".equalsIgnoreCase(max)) { - try { - partitionStatisticBuilder.setMaxValue(StatisticsUtil.convertToDouble(col.getType(), max)); - partitionStatisticBuilder.setMaxExpr(StatisticsUtil.readableValue(col.getType(), max)); - } catch (AnalysisException e) { - LOG.warn("Failed to deserialize column {} max value {}.", col, max, e); - partitionStatisticBuilder.setMaxValue(Double.POSITIVE_INFINITY); - } - } else { + } else { + partitionStatisticBuilder.setMinValue(Double.NEGATIVE_INFINITY); + } + if (max != null && !"NULL".equalsIgnoreCase(max)) { + try { + partitionStatisticBuilder.setMaxValue(StatisticsUtil.convertToDouble(col.getType(), max)); + partitionStatisticBuilder.setMaxExpr(StatisticsUtil.readableValue(col.getType(), max)); + } catch (AnalysisException e) { + LOG.warn("Failed to deserialize column {} max value {}.", col, max, e); partitionStatisticBuilder.setMaxValue(Double.POSITIVE_INFINITY); } - partitionStatisticBuilder.setUpdatedTime(row.get(12)); - return partitionStatisticBuilder.build(); - } catch (Exception e) { - LOG.warn("Failed to deserialize column statistics. Row [{}]", row, e); - return PartitionColumnStatistic.UNKNOWN; + } else { + partitionStatisticBuilder.setMaxValue(Double.POSITIVE_INFINITY); } + partitionStatisticBuilder.setUpdatedTime(row.get(12)); + return partitionStatisticBuilder.build(); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/PartitionColumnStatisticCacheLoader.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/PartitionColumnStatisticCacheLoader.java index c365f6b1a74..7154ad2e819 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/PartitionColumnStatisticCacheLoader.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/PartitionColumnStatisticCacheLoader.java @@ -17,12 +17,12 @@ package org.apache.doris.statistics; -import org.apache.doris.qe.InternalQueryExecutionException; import org.apache.doris.statistics.util.StatisticsUtil; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import java.io.IOException; import java.util.List; import java.util.Optional; @@ -33,7 +33,7 @@ public class PartitionColumnStatisticCacheLoader extends @Override protected Optional<PartitionColumnStatistic> doLoad(PartitionColumnStatisticCacheKey key) { - Optional<PartitionColumnStatistic> partitionStatistic = Optional.empty(); + Optional<PartitionColumnStatistic> partitionStatistic; try { partitionStatistic = loadFromPartitionStatsTable(key); } catch (Throwable t) { @@ -42,6 +42,7 @@ public class PartitionColumnStatisticCacheLoader extends if (LOG.isDebugEnabled()) { LOG.debug(t); } + return null; } if (partitionStatistic.isPresent()) { // For non-empty table, return UNKNOWN if we can't collect ndv value. @@ -54,24 +55,11 @@ public class PartitionColumnStatisticCacheLoader extends return partitionStatistic; } - private Optional<PartitionColumnStatistic> loadFromPartitionStatsTable(PartitionColumnStatisticCacheKey key) { - List<ResultRow> partitionResults; - try { - String partName = "'" + StatisticsUtil.escapeSQL(key.partId) + "'"; - partitionResults = StatisticsRepository.loadPartitionColumnStats( + private Optional<PartitionColumnStatistic> loadFromPartitionStatsTable(PartitionColumnStatisticCacheKey key) + throws IOException { + String partName = "'" + StatisticsUtil.escapeSQL(key.partId) + "'"; + List<ResultRow> partitionResults = StatisticsRepository.loadPartitionColumnStats( key.catalogId, key.dbId, key.tableId, key.idxId, partName, key.colName); - } catch (InternalQueryExecutionException e) { - LOG.info("Failed to load stats for table {} column {}. Reason:{}", - key.tableId, key.colName, e.getMessage()); - return Optional.empty(); - } - PartitionColumnStatistic partitionStatistic; - try { - partitionStatistic = StatisticsUtil.deserializeToPartitionStatistics(partitionResults); - } catch (Exception e) { - LOG.warn("Exception to deserialize partition statistics", e); - return Optional.empty(); - } - return Optional.ofNullable(partitionStatistic); + return Optional.ofNullable(StatisticsUtil.deserializeToPartitionStatistics(partitionResults)); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java index 6202d40f1d9..76fb22a60e4 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java @@ -126,11 +126,20 @@ public class StatisticsRepository { public static ColumnStatistic queryColumnStatisticsByName( long ctlId, long dbId, long tableId, long indexId, String colName) { + ColumnStatistic columnStatistic = ColumnStatistic.UNKNOWN; ResultRow resultRow = queryColumnStatisticById(ctlId, dbId, tableId, indexId, colName); if (resultRow == null) { - return ColumnStatistic.UNKNOWN; + return columnStatistic; } - return ColumnStatistic.fromResultRow(resultRow); + try { + columnStatistic = ColumnStatistic.fromResultRow(resultRow); + } catch (Exception e) { + LOG.warn("Failed to deserialize column statistics. reason: [{}]. Row [{}]", e.getMessage(), resultRow); + if (LOG.isDebugEnabled()) { + LOG.debug(e); + } + } + return columnStatistic; } public static List<ResultRow> queryColumnStatisticsByPartitions(TableIf table, Set<String> columnNames, diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java index 7937040d323..0f5c81b1cf0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java @@ -180,8 +180,7 @@ public class StatisticsUtil { } } - public static ColumnStatistic deserializeToColumnStatistics(List<ResultRow> resultBatches) - throws Exception { + public static ColumnStatistic deserializeToColumnStatistics(List<ResultRow> resultBatches) { if (CollectionUtils.isEmpty(resultBatches)) { return null; } @@ -192,7 +191,8 @@ public class StatisticsUtil { return resultBatches.stream().map(Histogram::fromResultRow).collect(Collectors.toList()); } - public static PartitionColumnStatistic deserializeToPartitionStatistics(List<ResultRow> resultBatches) { + public static PartitionColumnStatistic deserializeToPartitionStatistics(List<ResultRow> resultBatches) + throws IOException { if (CollectionUtils.isEmpty(resultBatches)) { return null; } diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/CacheTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/CacheTest.java index 729291d5323..9c0b7fff33c 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/CacheTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/CacheTest.java @@ -388,4 +388,48 @@ public class CacheTest extends TestWithFeService { Thread.sleep(100); Assertions.assertEquals(1, columnStatisticsCache.synchronous().asMap().size()); } + + @Test + public void testLoadWithException() throws Exception { + new MockUp<ColumnStatisticsCacheLoader>() { + @Mock + protected Optional<ColumnStatistic> doLoad(StatisticsCacheKey key) { + return null; + } + }; + StatisticsCache statisticsCache = new StatisticsCache(); + ColumnStatistic columnStatistic = statisticsCache.getColumnStatistics(1, 1, 1, -1, "col"); + Thread.sleep(3000); + Assertions.assertTrue(columnStatistic.isUnKnown); + + new MockUp<ColumnStatisticsCacheLoader>() { + @Mock + protected Optional<ColumnStatistic> doLoad(StatisticsCacheKey key) { + return Optional.of(new ColumnStatistic(1, 2, + null, 3, 4, 5, 6, 7, + null, null, false, + new Date().toString())); + } + }; + columnStatistic = statisticsCache.getColumnStatistics(1, 1, 1, -1, "col"); + for (int i = 0; i < 60; i++) { + columnStatistic = statisticsCache.getColumnStatistics(1, 1, 1, -1, "col"); + if (columnStatistic != ColumnStatistic.UNKNOWN) { + break; + } + System.out.println("Not ready yet."); + Thread.sleep(1000); + } + if (columnStatistic != ColumnStatistic.UNKNOWN) { + Assertions.assertEquals(1, columnStatistic.count); + Assertions.assertEquals(2, columnStatistic.ndv); + Assertions.assertEquals(3, columnStatistic.avgSizeByte); + Assertions.assertEquals(4, columnStatistic.numNulls); + Assertions.assertEquals(5, columnStatistic.dataSize); + Assertions.assertEquals(6, columnStatistic.minValue); + Assertions.assertEquals(7, columnStatistic.maxValue); + } else { + Assertions.fail("Column stats is still unknown"); + } + } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org