This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch branch-2.1 in repository https://gitbox.apache.org/repos/asf/doris.git
commit 75b47b7189bf455ebe5c5dba9ebcb5686b838c12 Author: minghong <engle...@gmail.com> AuthorDate: Thu Apr 18 11:17:00 2024 +0800 [opt](nereids)clear min/max column stats if table is partially analyzed (#33685) --- .../doris/nereids/stats/StatsCalculator.java | 56 +++++++++++++--------- 1 file changed, 34 insertions(+), 22 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java index 57a79037d80..34248d5a55a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java @@ -123,6 +123,7 @@ import org.apache.doris.nereids.trees.plans.visitor.DefaultPlanVisitor; import org.apache.doris.nereids.types.DataType; import org.apache.doris.nereids.util.PlanUtils; import org.apache.doris.qe.ConnectContext; +import org.apache.doris.statistics.AnalysisManager; import org.apache.doris.statistics.ColumnStatistic; import org.apache.doris.statistics.ColumnStatisticBuilder; import org.apache.doris.statistics.Histogram; @@ -130,6 +131,7 @@ import org.apache.doris.statistics.StatisticConstants; import org.apache.doris.statistics.StatisticRange; import org.apache.doris.statistics.Statistics; import org.apache.doris.statistics.StatisticsBuilder; +import org.apache.doris.statistics.TableStatsMeta; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableSet; @@ -155,8 +157,6 @@ import java.util.stream.Collectors; */ public class StatsCalculator extends DefaultPlanVisitor<Statistics, Void> { public static double DEFAULT_AGGREGATE_RATIO = 0.5; - public static double DEFAULT_AGGREGATE_EXPAND_RATIO = 1.05; - public static double AGGREGATE_COLUMN_CORRELATION_COEFFICIENT = 0.75; public static double DEFAULT_COLUMN_NDV_RATIO = 0.5; @@ -762,8 +762,12 @@ public class StatsCalculator extends DefaultPlanVisitor<Statistics, Void> { } } Set<SlotReference> slotSet = slotSetBuilder.build(); - Map<Expression, ColumnStatistic> columnStatisticMap = new HashMap<>(); + Map<Expression, ColumnStatisticBuilder> columnStatisticBuilderMap = new HashMap<>(); TableIf table = catalogRelation.getTable(); + AnalysisManager analysisManager = Env.getCurrentEnv().getAnalysisManager(); + TableStatsMeta tableMeta = analysisManager.findTableStatsStatus(table.getId()); + // rows newly updated after last analyze + long deltaRowCount = tableMeta == null ? 0 : tableMeta.updatedRows.get(); double rowCount = catalogRelation.getTable().getRowCountForNereids(); boolean hasUnknownCol = false; long idxId = -1; @@ -773,6 +777,10 @@ public class StatsCalculator extends DefaultPlanVisitor<Statistics, Void> { idxId = olapScan.getSelectedIndexId(); } } + if (deltaRowCount > 0 && LOG.isDebugEnabled()) { + LOG.debug("{} is partially analyzed, clear min/max values in column stats", + catalogRelation.getTable().getName()); + } for (SlotReference slotReference : slotSet) { String colName = slotReference.getColumn().isPresent() ? slotReference.getColumn().get().getName() @@ -789,40 +797,44 @@ public class StatsCalculator extends DefaultPlanVisitor<Statistics, Void> { } else { cache = getColumnStatistic(table, colName, idxId); } + ColumnStatisticBuilder colStatsBuilder = new ColumnStatisticBuilder(cache); if (cache.avgSizeByte <= 0) { - cache = new ColumnStatisticBuilder(cache) - .setAvgSizeByte(slotReference.getColumn().get().getType().getSlotSize()) - .build(); + colStatsBuilder.setAvgSizeByte(slotReference.getColumn().get().getType().getSlotSize()); } if (!cache.isUnKnown) { - rowCount = Math.max(rowCount, cache.count); + rowCount = Math.max(rowCount, cache.count + deltaRowCount); } else { hasUnknownCol = true; } if (ConnectContext.get() != null && ConnectContext.get().getSessionVariable().enableStats) { - columnStatisticMap.put(slotReference, cache); + if (deltaRowCount > 0) { + // clear min-max to avoid error estimation + // for example, after yesterday data loaded, user send query about yesterday immediately. + // since yesterday data are not analyzed, the max date is before yesterday, and hence optimizer + // estimates the filter result is zero + colStatsBuilder.setMinExpr(null).setMinValue(Double.NEGATIVE_INFINITY) + .setMaxExpr(null).setMaxValue(Double.POSITIVE_INFINITY); + } + columnStatisticBuilderMap.put(slotReference, colStatsBuilder); } else { - columnStatisticMap.put(slotReference, ColumnStatistic.UNKNOWN); + columnStatisticBuilderMap.put(slotReference, new ColumnStatisticBuilder(ColumnStatistic.UNKNOWN)); hasUnknownCol = true; } } if (hasUnknownCol && ConnectContext.get() != null && ConnectContext.get().getStatementContext() != null) { ConnectContext.get().getStatementContext().setHasUnknownColStats(true); } - Statistics stats = new Statistics(rowCount, columnStatisticMap); - stats = normalizeCatalogRelationColumnStatsRowCount(stats); - return stats; - } - - private Statistics normalizeCatalogRelationColumnStatsRowCount(Statistics stats) { - for (Expression slot : stats.columnStatistics().keySet()) { - ColumnStatistic colStats = stats.findColumnStatistics(slot); - Preconditions.checkArgument(colStats != null, - "can not find col stats for %s in table", slot.toSql()); - stats.addColumnStats(slot, - new ColumnStatisticBuilder(colStats).setCount(stats.getRowCount()).build()); + return normalizeCatalogRelationColumnStatsRowCount(rowCount, columnStatisticBuilderMap); + } + + private Statistics normalizeCatalogRelationColumnStatsRowCount(double rowCount, + Map<Expression, ColumnStatisticBuilder> columnStatisticBuilderMap) { + Map<Expression, ColumnStatistic> columnStatisticMap = new HashMap<>(); + for (Expression slot : columnStatisticBuilderMap.keySet()) { + columnStatisticMap.put(slot, + columnStatisticBuilderMap.get(slot).setCount(rowCount).build()); } - return stats; + return new Statistics(rowCount, columnStatisticMap); } private Statistics computeTopN(TopN topN) { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org