This is an automated email from the ASF dual-hosted git repository. englefly pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 29eab1ae532 [feat](Nereids) after partition prune, output rows of scan node only contains rows from selected partitions (#36760) 29eab1ae532 is described below commit 29eab1ae532352e708832999aadc558a9165c245 Author: minghong <engle...@gmail.com> AuthorDate: Wed Jun 26 19:54:45 2024 +0800 [feat](Nereids) after partition prune, output rows of scan node only contains rows from selected partitions (#36760) 1. update rowcount if some partitions are pruned 2. refactor StatsCalcualtor for Scan --- .../org/apache/doris/nereids/StatementContext.java | 12 +- .../doris/nereids/rules/rewrite/ColumnPruning.java | 2 +- .../doris/nereids/stats/StatsCalculator.java | 391 +++++++++++++-------- .../nereids/trees/plans/algebra/OlapScan.java | 6 + .../logical/LogicalDeferMaterializeOlapScan.java | 5 + .../trees/plans/logical/LogicalOlapScan.java | 8 + .../physical/PhysicalDeferMaterializeOlapScan.java | 5 + .../trees/plans/physical/PhysicalOlapScan.java | 8 + .../apache/doris/statistics/StatisticsBuilder.java | 7 +- .../nereids_p0/stats/partition_col_stats.groovy | 2 +- 10 files changed, 297 insertions(+), 149 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java index 58ccaae34d0..e79f079129d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java @@ -18,7 +18,6 @@ package org.apache.doris.nereids; import org.apache.doris.analysis.StatementBase; -import org.apache.doris.catalog.Column; import org.apache.doris.catalog.TableIf; import org.apache.doris.catalog.constraint.TableIdentifier; import org.apache.doris.common.Id; @@ -32,6 +31,7 @@ import org.apache.doris.nereids.trees.expressions.ExprId; import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.Placeholder; import org.apache.doris.nereids.trees.expressions.Slot; +import org.apache.doris.nereids.trees.expressions.SlotReference; import org.apache.doris.nereids.trees.expressions.StatementScopeIdGenerator; import org.apache.doris.nereids.trees.plans.ObjectId; import org.apache.doris.nereids.trees.plans.PlaceholderId; @@ -135,7 +135,7 @@ public class StatementContext implements Closeable { private final Map<Slot, Relation> slotToRelation = Maps.newHashMap(); // the columns in Plan.getExpressions(), such as columns in join condition or filter condition, group by expression - private final Set<Column> keyColumns = Sets.newHashSet(); + private final Set<SlotReference> keySlots = Sets.newHashSet(); private BitSet disableRules; // table locks @@ -516,12 +516,12 @@ public class StatementContext implements Closeable { } } - public void addKeyColumn(Column column) { - keyColumns.add(column); + public void addKeySlot(SlotReference slot) { + keySlots.add(slot); } - public boolean isKeyColumn(Column column) { - return keyColumns.contains(column); + public boolean isKeySlot(SlotReference slot) { + return keySlots.contains(slot); } /** Get table id with lazy */ diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/ColumnPruning.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/ColumnPruning.java index e8d4c6d96ab..20a91ca5657 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/ColumnPruning.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/ColumnPruning.java @@ -138,7 +138,7 @@ public class ColumnPruning extends DefaultPlanRewriter<PruneContext> implements if (stmtContext != null) { for (Slot key : keys) { if (key instanceof SlotReference) { - ((SlotReference) key).getColumn().ifPresent(stmtContext::addKeyColumn); + stmtContext.addKeySlot((SlotReference) key); } } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java index a96ec287f76..bf78fe2a0bf 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java @@ -18,7 +18,9 @@ package org.apache.doris.nereids.stats; import org.apache.doris.analysis.IntLiteral; +import org.apache.doris.catalog.Column; import org.apache.doris.catalog.Env; +import org.apache.doris.catalog.MTMV; import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.TableIf; import org.apache.doris.common.FeConstants; @@ -299,15 +301,212 @@ public class StatsCalculator extends DefaultPlanVisitor<Statistics, Void> { return computeFilter(filter); } + /** + * returns the sum of deltaRowCount for all selected partitions or for the table. + */ + private long computeDeltaRowCount(OlapScan olapScan, SlotReference slot) { + AnalysisManager analysisManager = Env.getCurrentEnv().getAnalysisManager(); + TableStatsMeta tableMeta = analysisManager.findTableStatsStatus(olapScan.getTable().getId()); + + long deltaRowCount = 0; + if (tableMeta != null) { + ColStatsMeta colMeta = tableMeta.findColumnStatsMeta( + olapScan.getTable().getIndexNameById(olapScan.getSelectedIndexId()), slot.getName()); + if (colMeta != null) { + if (olapScan.getSelectedPartitionIds().isEmpty()) { + deltaRowCount = tableMeta.updatedRows.get() - colMeta.updatedRows; + } else { + // sum partition delta row + for (long partitionId : olapScan.getSelectedPartitionIds()) { + deltaRowCount += tableMeta.partitionUpdateRows.getOrDefault(partitionId, 0L) + - colMeta.partitionUpdateRows.getOrDefault(partitionId, 0L); + } + } + } + } + return deltaRowCount; + } + + private void adjustColStats(CatalogRelation catalogRelation, SlotReference slot, + ColumnStatisticBuilder builder) { + if (builder.getAvgSizeByte() <= 0) { + builder.setAvgSizeByte(slot.getDataType().toCatalogDataType().getSlotSize()); + } + if (catalogRelation instanceof OlapScan) { + OlapScan olapScan = (OlapScan) catalogRelation; + long delta = computeDeltaRowCount(olapScan, slot); + if (delta > 0) { + builder.setCount(builder.getCount() + delta); + // clear min-max to avoid error estimation + // for example, after yesterday data loaded, user send query about yesterday immediately. + // since yesterday data are not analyzed, the max date is before yesterday, and hence optimizer + // estimates the filter result is zero + builder.setMinExpr(null).setMinValue(Double.NEGATIVE_INFINITY) + .setMaxExpr(null).setMaxValue(Double.POSITIVE_INFINITY); + } + } + } + + private ColumnStatistic getColumnStatsFromTableCache(CatalogRelation catalogRelation, SlotReference slot) { + long idxId = -1; + if (catalogRelation instanceof OlapScan) { + idxId = ((OlapScan) catalogRelation).getSelectedIndexIdForMV(); + } + return getColumnStatistic(catalogRelation.getTable(), slot.getName(), idxId); + } + + private ColumnStatistic getColumnStatsFromPartitionCache(CatalogRelation catalogRelation, SlotReference slot, + List<String> partitionNames) { + long idxId = -1; + if (catalogRelation instanceof OlapScan) { + idxId = ((OlapScan) catalogRelation).getSelectedIndexIdForMV(); + } + return getColumnStatistic(catalogRelation.getTable(), slot.getName(), idxId, partitionNames); + } + + private long getSelectedPartitionRowCount(OlapScan olapScan) { + long partRowCountSum = 0; + for (long id : olapScan.getSelectedPartitionIds()) { + long partRowCount = olapScan.getTable().getPartition(id).getBaseIndex().getRowCount(); + // if we cannot get any partition's rowCount, return -1 to fallback to table level stats + if (partRowCount <= 0) { + return -1; + } + partRowCountSum += partRowCount; + } + return partRowCountSum; + } + + private void setHasUnknownColStatsInStatementContext() { + if (ConnectContext.get() != null && ConnectContext.get().getStatementContext() != null) { + ConnectContext.get().getStatementContext().setHasUnknownColStats(true); + } + } + + private void checkIfUnknownStatsUsedAsKey(StatisticsBuilder builder) { + if (ConnectContext.get() != null && ConnectContext.get().getStatementContext() != null) { + for (Map.Entry<Expression, ColumnStatistic> entry : builder.getExpressionColumnStatsEntries()) { + if (entry.getKey() instanceof SlotReference + && ConnectContext.get().getStatementContext().isKeySlot((SlotReference) entry.getKey())) { + if (entry.getValue().isUnKnown) { + ConnectContext.get().getStatementContext().setHasUnknownColStats(true); + break; + } + } + } + } + } + + private Statistics computeOlapScan(LogicalOlapScan olapScan) { + OlapTable olapTable = olapScan.getTable(); + + if (olapScan.getSelectedIndexId() != olapScan.getTable().getBaseIndexId() || olapTable instanceof MTMV) { + // mv is selected, return its estimated stats + Optional<Statistics> optStats = cascadesContext.getStatementContext() + .getStatistics(olapScan.getRelationId()); + if (optStats.isPresent()) { + double actualRowCount = olapScan.getTable().getRowCountForNereids(); + // if estimated mv rowCount is more than actual row count, fall back to base table stats + if (actualRowCount > optStats.get().getRowCount()) { + return optStats.get(); + } + } + } + + StatisticsBuilder builder = new StatisticsBuilder(); + + // for system table or FeUt, use ColumnStatistic.UNKNOWN + if (StatisticConstants.isSystemTable(olapTable) || !FeConstants.enableInternalSchemaDb + || ConnectContext.get() == null + || ConnectContext.get().getSessionVariable().internalSession) { + for (Slot slot : olapScan.getOutput()) { + builder.putColumnStatistics(slot, ColumnStatistic.UNKNOWN); + } + setHasUnknownColStatsInStatementContext(); + builder.setRowCount(olapTable.getRowCountForNereids()); + return builder.build(); + } + + // for regression shape test, get row count from columnStats.count + if (ConnectContext.get() == null || !ConnectContext.get().getSessionVariable().enableStats) { + // get row count from any visible slotReference's colStats + double rowCount = 1; + for (Slot slot : olapScan.getOutput()) { + if (isVisibleSlotReference(slot)) { + ColumnStatistic cache = getColumnStatistic(olapTable, slot.getName(), + olapScan.getSelectedIndexIdForMV()); + rowCount = Math.max(rowCount, cache.count); + } + builder.putColumnStatistics(slot, + new ColumnStatisticBuilder(ColumnStatistic.UNKNOWN).setCount(rowCount).build()); + } + setHasUnknownColStatsInStatementContext(); + return builder.setRowCount(rowCount).build(); + } + + // build Stats for olapScan + // if slot is not slotReference or is invisible, use UNKNOWN + List<SlotReference> outputSlotReferences = new ArrayList<>(); + for (Slot slot : olapScan.getOutput()) { + if (isVisibleSlotReference(slot)) { + outputSlotReferences.add((SlotReference) slot); + } else { + builder.putColumnStatistics(slot, ColumnStatistic.UNKNOWN); + } + } + // build col stats for outputSlotReferences + if (!olapScan.getSelectedPartitionIds().isEmpty()) { + double rowCount = getSelectedPartitionRowCount(olapScan); + // if partition row count is not available, fallback to table stats + if (rowCount > 0) { + List<String> selectedPartitionNames = new ArrayList<>(olapScan.getSelectedPartitionIds().size()); + olapScan.getSelectedPartitionIds().forEach(id -> { + selectedPartitionNames.add(olapScan.getTable().getPartition(id).getName()); + }); + for (SlotReference slot : outputSlotReferences) { + ColumnStatistic cache = getColumnStatsFromPartitionCache(olapScan, slot, selectedPartitionNames); + ColumnStatisticBuilder colStatsBuilder = new ColumnStatisticBuilder(cache); + adjustColStats(olapScan, slot, colStatsBuilder); + builder.putColumnStatistics(slot, colStatsBuilder.build()); + rowCount = Math.max(rowCount, colStatsBuilder.getCount()); + } + checkIfUnknownStatsUsedAsKey(builder); + return builder.setRowCount(rowCount).build(); + } + } + + // get table level stats + double rowCount = olapScan.getTable().getRowCountForNereids(); + for (SlotReference slot : outputSlotReferences) { + ColumnStatistic cache = getColumnStatsFromTableCache(olapScan, slot); + ColumnStatisticBuilder colStatsBuilder = new ColumnStatisticBuilder(cache); + adjustColStats(olapScan, slot, colStatsBuilder); + builder.putColumnStatistics(slot, colStatsBuilder.build()); + rowCount = Math.max(rowCount, colStatsBuilder.getCount()); + } + checkIfUnknownStatsUsedAsKey(builder); + return builder.setRowCount(rowCount).build(); + } + @Override public Statistics visitLogicalOlapScan(LogicalOlapScan olapScan, Void context) { - return computeCatalogRelation(olapScan); + return computeOlapScan(olapScan); + } + + private boolean isVisibleSlotReference(Slot slot) { + if (slot instanceof SlotReference) { + Optional<Column> colOpt = ((SlotReference) slot).getColumn(); + if (colOpt.isPresent()) { + return colOpt.get().isVisible(); + } + } + return false; } @Override public Statistics visitLogicalDeferMaterializeOlapScan(LogicalDeferMaterializeOlapScan deferMaterializeOlapScan, Void context) { - return computeCatalogRelation(deferMaterializeOlapScan.getLogicalOlapScan()); + return computeOlapScan(deferMaterializeOlapScan.getLogicalOlapScan()); } @Override @@ -726,6 +925,30 @@ public class StatsCalculator extends DefaultPlanVisitor<Statistics, Void> { } } + private ColumnStatistic getColumnStatistic(TableIf table, String colName, long idxId) { + ConnectContext connectContext = ConnectContext.get(); + if (connectContext != null && connectContext.getSessionVariable().internalSession) { + return ColumnStatistic.UNKNOWN; + } + long catalogId; + long dbId; + try { + catalogId = table.getDatabase().getCatalog().getId(); + dbId = table.getDatabase().getId(); + } catch (Exception e) { + // Use -1 for catalog id and db id when failed to get them from metadata. + // This is OK because catalog id and db id is not in the hashcode function of ColumnStatistics cache + // and the table id is globally unique. + if (LOG.isDebugEnabled()) { + LOG.debug(String.format("Fail to get catalog id and db id for table %s", table.getName())); + } + catalogId = -1; + dbId = -1; + } + return Env.getCurrentEnv().getStatisticsCache().getColumnStatistics( + catalogId, dbId, table.getId(), idxId, colName); + } + private ColumnStatistic getColumnStatistic(TableIf table, String colName, long idxId, List<String> partitionNames) { ConnectContext connectContext = ConnectContext.get(); if (connectContext != null && connectContext.getSessionVariable().internalSession) { @@ -757,6 +980,7 @@ public class StatsCalculator extends DefaultPlanVisitor<Statistics, Void> { PartitionColumnStatisticBuilder builder = new PartitionColumnStatisticBuilder(); boolean hasUnknown = false; // check if there is any unknown stats to avoid unnecessary partition column stats merge. + List<PartitionColumnStatistic> pColStatsLists = new ArrayList<>(partitionNames.size()); for (String partitionName : partitionNames) { PartitionColumnStatistic pcolStats = Env.getCurrentEnv().getStatisticsCache() .getPartitionColumnStatistics( @@ -764,19 +988,14 @@ public class StatsCalculator extends DefaultPlanVisitor<Statistics, Void> { if (pcolStats.isUnKnown) { hasUnknown = true; break; + } else { + pColStatsLists.add(pcolStats); } } if (!hasUnknown) { boolean isFirst = true; // try to merge partition column stats - for (String partitionName : partitionNames) { - PartitionColumnStatistic pcolStats = Env.getCurrentEnv().getStatisticsCache() - .getPartitionColumnStatistics( - catalogId, dbId, table.getId(), idxId, partitionName, colName); - if (pcolStats.isUnKnown) { - hasUnknown = true; - break; - } + for (PartitionColumnStatistic pcolStats : pColStatsLists) { if (isFirst) { builder = new PartitionColumnStatisticBuilder(pcolStats); isFirst = false; @@ -784,9 +1003,7 @@ public class StatsCalculator extends DefaultPlanVisitor<Statistics, Void> { builder.merge(pcolStats); } } - if (!hasUnknown) { - return builder.toColumnStatistics(); - } + return builder.toColumnStatistics(); } } // if any partition-col-stats is unknown, fall back to table level col stats @@ -795,23 +1012,21 @@ public class StatsCalculator extends DefaultPlanVisitor<Statistics, Void> { } } - // TODO: 1. Subtract the pruned partition - // 2. Consider the influence of runtime filter - // 3. Get NDV and column data size from StatisticManger, StatisticManager doesn't support it now. + /** + * compute stats for catalogRelations except OlapScan + */ private Statistics computeCatalogRelation(CatalogRelation catalogRelation) { - if (catalogRelation instanceof LogicalOlapScan) { - LogicalOlapScan olap = (LogicalOlapScan) catalogRelation; - if (olap.getSelectedIndexId() != olap.getTable().getBaseIndexId()) { - // mv is selected, return its estimated stats - Optional<Statistics> optStats = cascadesContext.getStatementContext() - .getStatistics(olap.getRelationId()); - if (optStats.isPresent()) { - double actualRowCount = catalogRelation.getTable().getRowCountForNereids(); - if (actualRowCount > optStats.get().getRowCount()) { - return optStats.get(); - } - } + StatisticsBuilder builder = new StatisticsBuilder(); + // for FeUt, use ColumnStatistic.UNKNOWN + if (!FeConstants.enableInternalSchemaDb + || ConnectContext.get() == null + || ConnectContext.get().getSessionVariable().internalSession) { + builder.setRowCount(catalogRelation.getTable().getRowCountForNereids()); + for (Slot slot : catalogRelation.getOutput()) { + builder.putColumnStatistics(slot, ColumnStatistic.UNKNOWN); } + setHasUnknownColStatsInStatementContext(); + return builder.build(); } List<Slot> output = catalogRelation.getOutput(); @@ -822,121 +1037,17 @@ public class StatsCalculator extends DefaultPlanVisitor<Statistics, Void> { } } Set<SlotReference> slotSet = slotSetBuilder.build(); - Map<Expression, ColumnStatisticBuilder> columnStatisticBuilderMap = new HashMap<>(); - TableIf table = catalogRelation.getTable(); - AnalysisManager analysisManager = Env.getCurrentEnv().getAnalysisManager(); - TableStatsMeta tableMeta = analysisManager.findTableStatsStatus(table.getId()); - long tableUpdatedRows = tableMeta == null ? 0 : tableMeta.updatedRows.get(); - boolean hasUnknownKeyCol = false; - long idxId = -1; - List<String> selectedPartitionNames; - if (catalogRelation instanceof OlapScan) { - OlapScan olapScan = (OlapScan) catalogRelation; - if (olapScan.getTable().getBaseIndexId() != olapScan.getSelectedIndexId()) { - idxId = olapScan.getSelectedIndexId(); - } - selectedPartitionNames = new ArrayList<>(olapScan.getSelectedPartitionIds().size()); - olapScan.getSelectedPartitionIds().forEach(id -> { - selectedPartitionNames.add(olapScan.getTable().getPartition(id).getName()); - }); - } else { - selectedPartitionNames = new ArrayList<>(); - } - double rowCount = 0.0; - for (SlotReference slotReference : slotSet) { - boolean usedAsKey = false; - if (ConnectContext.get() != null && slotReference.getColumn().isPresent() - && ConnectContext.get().getStatementContext() != null) { - usedAsKey = ConnectContext.get().getStatementContext().isKeyColumn(slotReference.getColumn().get()); - } - String colName = slotReference.getColumn().isPresent() - ? slotReference.getColumn().get().getName() - : slotReference.getName(); - boolean shouldIgnoreThisCol = StatisticConstants.shouldIgnoreCol(table, slotReference.getColumn().get()); - if (colName == null) { - throw new RuntimeException(String.format("Invalid slot: %s", slotReference.getExprId())); - } - // compute delta row - long deltaRowCount = 0; - if (catalogRelation instanceof OlapScan) { - OlapTable olapTable = (OlapTable) table; - if (tableMeta != null) { - ColStatsMeta colMeta = tableMeta.findColumnStatsMeta( - olapTable.getIndexNameById(idxId == -1 ? olapTable.getBaseIndexId() : idxId), colName); - if (colMeta != null) { - if (((OlapScan) catalogRelation).getSelectedPartitionIds().isEmpty()) { - deltaRowCount = tableUpdatedRows - colMeta.updatedRows; - } else { - // sum partition delta row - for (long partitionId : ((OlapScan) catalogRelation).getSelectedPartitionIds()) { - deltaRowCount += tableMeta.partitionUpdateRows.getOrDefault(partitionId, 0L) - - colMeta.partitionUpdateRows.getOrDefault(partitionId, 0L); - } - } - } - } - - } - ColumnStatistic cache; - if (!FeConstants.enableInternalSchemaDb - || shouldIgnoreThisCol) { - cache = ColumnStatistic.UNKNOWN; - } else { - cache = getColumnStatistic(table, colName, idxId, selectedPartitionNames); - } + double rowCount = catalogRelation.getTable().getRowCountForNereids(); + for (SlotReference slot : slotSet) { + ColumnStatistic cache = getColumnStatsFromTableCache(catalogRelation, slot); ColumnStatisticBuilder colStatsBuilder = new ColumnStatisticBuilder(cache); - if (cache.avgSizeByte <= 0) { - colStatsBuilder.setAvgSizeByte(slotReference.getColumn().get().getType().getSlotSize()); - } - if (!cache.isUnKnown) { - rowCount = Math.max(rowCount, cache.count + deltaRowCount); - } else { - if (usedAsKey) { - hasUnknownKeyCol = true; - } - } - if (ConnectContext.get() != null && ConnectContext.get().getSessionVariable().enableStats) { - // deltaRowCount > 0 indicates that - // new data is loaded to the table after this column was analyzed last time. - // In this case, need to eliminate min/max value for this column. - if (deltaRowCount > 0) { - // clear min-max to avoid error estimation - // for example, after yesterday data loaded, user send query about yesterday immediately. - // since yesterday data are not analyzed, the max date is before yesterday, and hence optimizer - // estimates the filter result is zero - colStatsBuilder.setMinExpr(null).setMinValue(Double.NEGATIVE_INFINITY) - .setMaxExpr(null).setMaxValue(Double.POSITIVE_INFINITY); - if (LOG.isDebugEnabled()) { - LOG.debug("{}.{} is partially analyzed, clear min/max values in column stats", - table.getName(), colName); - } - } - columnStatisticBuilderMap.put(slotReference, colStatsBuilder); - } else { - columnStatisticBuilderMap.put(slotReference, new ColumnStatisticBuilder(ColumnStatistic.UNKNOWN)); - hasUnknownKeyCol = true; - } - } - if (rowCount <= 0.0) { - // if we failed to get rowCount from column stats, then try to get it from TableIf - rowCount = catalogRelation.getTable().getRowCountForNereids(); - } - - if (hasUnknownKeyCol && ConnectContext.get() != null && ConnectContext.get().getStatementContext() != null) { - ConnectContext.get().getStatementContext().setHasUnknownColStats(true); - } - return normalizeCatalogRelationColumnStatsRowCount(rowCount, columnStatisticBuilderMap); - } - - private Statistics normalizeCatalogRelationColumnStatsRowCount(double rowCount, - Map<Expression, ColumnStatisticBuilder> columnStatisticBuilderMap) { - Map<Expression, ColumnStatistic> columnStatisticMap = new HashMap<>(); - for (Expression slot : columnStatisticBuilderMap.keySet()) { - columnStatisticMap.put(slot, - columnStatisticBuilderMap.get(slot).setCount(rowCount).build()); + adjustColStats(catalogRelation, slot, colStatsBuilder); + rowCount = Math.max(rowCount, colStatsBuilder.getCount()); + builder.putColumnStatistics(slot, colStatsBuilder.build()); } - return new Statistics(rowCount, columnStatisticMap); + checkIfUnknownStatsUsedAsKey(builder); + return builder.build(); } private Statistics computeTopN(TopN topN) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/algebra/OlapScan.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/algebra/OlapScan.java index d5fe7c23413..65f36394621 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/algebra/OlapScan.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/algebra/OlapScan.java @@ -28,6 +28,12 @@ public interface OlapScan { long getSelectedIndexId(); + /** + * if this is mv, return selectedIndexId, o.w -1 + * @return -1 or selectedIndexId + */ + long getSelectedIndexIdForMV(); + List<Long> getSelectedPartitionIds(); List<Long> getSelectedTabletIds(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalDeferMaterializeOlapScan.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalDeferMaterializeOlapScan.java index 77da6537dd7..bff416c19d2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalDeferMaterializeOlapScan.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalDeferMaterializeOlapScan.java @@ -93,6 +93,11 @@ public class LogicalDeferMaterializeOlapScan extends LogicalCatalogRelation impl return logicalOlapScan.getSelectedIndexId(); } + @Override + public long getSelectedIndexIdForMV() { + return logicalOlapScan.getSelectedIndexIdForMV(); + } + @Override public List<Long> getSelectedPartitionIds() { return logicalOlapScan.getSelectedPartitionIds(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalOlapScan.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalOlapScan.java index 65e4710836d..f14cd661d95 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalOlapScan.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalOlapScan.java @@ -333,6 +333,14 @@ public class LogicalOlapScan extends LogicalCatalogRelation implements OlapScan return selectedIndexId; } + @Override + public long getSelectedIndexIdForMV() { + if (getTable().getBaseIndexId() != selectedIndexId) { + return selectedIndexId; + } + return -1; + } + public boolean isIndexSelected() { return indexSelected; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalDeferMaterializeOlapScan.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalDeferMaterializeOlapScan.java index f82bd6dbec5..1acfb8dc4c0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalDeferMaterializeOlapScan.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalDeferMaterializeOlapScan.java @@ -90,6 +90,11 @@ public class PhysicalDeferMaterializeOlapScan extends PhysicalCatalogRelation im return physicalOlapScan.getSelectedIndexId(); } + @Override + public long getSelectedIndexIdForMV() { + return physicalOlapScan.getSelectedIndexIdForMV(); + } + @Override public List<Long> getSelectedPartitionIds() { return physicalOlapScan.getSelectedPartitionIds(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalOlapScan.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalOlapScan.java index 76713a51e29..1839f46c552 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalOlapScan.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalOlapScan.java @@ -90,6 +90,14 @@ public class PhysicalOlapScan extends PhysicalCatalogRelation implements OlapSca return selectedIndexId; } + @Override + public long getSelectedIndexIdForMV() { + if (getTable().getBaseIndexId() != selectedIndexId) { + return selectedIndexId; + } + return -1; + } + @Override public List<Long> getSelectedTabletIds() { return selectedTabletIds; diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsBuilder.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsBuilder.java index 53d8f49cb14..29f04f2926e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsBuilder.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsBuilder.java @@ -21,11 +21,12 @@ import org.apache.doris.nereids.trees.expressions.Expression; import java.util.HashMap; import java.util.Map; +import java.util.Set; public class StatisticsBuilder { private double rowCount; - private int widthInJoinCluster; + private int widthInJoinCluster = 1; private final Map<Expression, ColumnStatistic> expressionToColumnStats; public StatisticsBuilder() { @@ -60,6 +61,10 @@ public class StatisticsBuilder { return this; } + public Set<Map.Entry<Expression, ColumnStatistic>> getExpressionColumnStatsEntries() { + return expressionToColumnStats.entrySet(); + } + public Statistics build() { return new Statistics(rowCount, widthInJoinCluster, expressionToColumnStats); } diff --git a/regression-test/suites/nereids_p0/stats/partition_col_stats.groovy b/regression-test/suites/nereids_p0/stats/partition_col_stats.groovy index 89a32a80d91..3436b6dd86e 100644 --- a/regression-test/suites/nereids_p0/stats/partition_col_stats.groovy +++ b/regression-test/suites/nereids_p0/stats/partition_col_stats.groovy @@ -36,7 +36,7 @@ suite("partition_col_stats") { """ //run this sql to make stats be cached sql "select * from pt where k1<3;" - sleep(10) + sleep(10000) explain{ sql "physical plan select * from pt where k1<3;" contains("stats=4") --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org