This is an automated email from the ASF dual-hosted git repository.

englefly pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 29eab1ae532 [feat](Nereids) after partition prune, output rows of scan 
node only contains rows from selected partitions  (#36760)
29eab1ae532 is described below

commit 29eab1ae532352e708832999aadc558a9165c245
Author: minghong <engle...@gmail.com>
AuthorDate: Wed Jun 26 19:54:45 2024 +0800

    [feat](Nereids) after partition prune, output rows of scan node only 
contains rows from selected partitions  (#36760)
    
    1. update rowcount if some partitions are pruned
    2. refactor StatsCalcualtor for Scan
---
 .../org/apache/doris/nereids/StatementContext.java |  12 +-
 .../doris/nereids/rules/rewrite/ColumnPruning.java |   2 +-
 .../doris/nereids/stats/StatsCalculator.java       | 391 +++++++++++++--------
 .../nereids/trees/plans/algebra/OlapScan.java      |   6 +
 .../logical/LogicalDeferMaterializeOlapScan.java   |   5 +
 .../trees/plans/logical/LogicalOlapScan.java       |   8 +
 .../physical/PhysicalDeferMaterializeOlapScan.java |   5 +
 .../trees/plans/physical/PhysicalOlapScan.java     |   8 +
 .../apache/doris/statistics/StatisticsBuilder.java |   7 +-
 .../nereids_p0/stats/partition_col_stats.groovy    |   2 +-
 10 files changed, 297 insertions(+), 149 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java
index 58ccaae34d0..e79f079129d 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java
@@ -18,7 +18,6 @@
 package org.apache.doris.nereids;
 
 import org.apache.doris.analysis.StatementBase;
-import org.apache.doris.catalog.Column;
 import org.apache.doris.catalog.TableIf;
 import org.apache.doris.catalog.constraint.TableIdentifier;
 import org.apache.doris.common.Id;
@@ -32,6 +31,7 @@ import org.apache.doris.nereids.trees.expressions.ExprId;
 import org.apache.doris.nereids.trees.expressions.Expression;
 import org.apache.doris.nereids.trees.expressions.Placeholder;
 import org.apache.doris.nereids.trees.expressions.Slot;
+import org.apache.doris.nereids.trees.expressions.SlotReference;
 import org.apache.doris.nereids.trees.expressions.StatementScopeIdGenerator;
 import org.apache.doris.nereids.trees.plans.ObjectId;
 import org.apache.doris.nereids.trees.plans.PlaceholderId;
@@ -135,7 +135,7 @@ public class StatementContext implements Closeable {
     private final Map<Slot, Relation> slotToRelation = Maps.newHashMap();
 
     // the columns in Plan.getExpressions(), such as columns in join condition 
or filter condition, group by expression
-    private final Set<Column> keyColumns = Sets.newHashSet();
+    private final Set<SlotReference> keySlots = Sets.newHashSet();
     private BitSet disableRules;
 
     // table locks
@@ -516,12 +516,12 @@ public class StatementContext implements Closeable {
         }
     }
 
-    public void addKeyColumn(Column column) {
-        keyColumns.add(column);
+    public void addKeySlot(SlotReference slot) {
+        keySlots.add(slot);
     }
 
-    public boolean isKeyColumn(Column column) {
-        return keyColumns.contains(column);
+    public boolean isKeySlot(SlotReference slot) {
+        return keySlots.contains(slot);
     }
 
     /** Get table id with lazy */
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/ColumnPruning.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/ColumnPruning.java
index e8d4c6d96ab..20a91ca5657 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/ColumnPruning.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/ColumnPruning.java
@@ -138,7 +138,7 @@ public class ColumnPruning extends 
DefaultPlanRewriter<PruneContext> implements
             if (stmtContext != null) {
                 for (Slot key : keys) {
                     if (key instanceof SlotReference) {
-                        ((SlotReference) 
key).getColumn().ifPresent(stmtContext::addKeyColumn);
+                        stmtContext.addKeySlot((SlotReference) key);
                     }
                 }
             }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
index a96ec287f76..bf78fe2a0bf 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
@@ -18,7 +18,9 @@
 package org.apache.doris.nereids.stats;
 
 import org.apache.doris.analysis.IntLiteral;
+import org.apache.doris.catalog.Column;
 import org.apache.doris.catalog.Env;
+import org.apache.doris.catalog.MTMV;
 import org.apache.doris.catalog.OlapTable;
 import org.apache.doris.catalog.TableIf;
 import org.apache.doris.common.FeConstants;
@@ -299,15 +301,212 @@ public class StatsCalculator extends 
DefaultPlanVisitor<Statistics, Void> {
         return computeFilter(filter);
     }
 
+    /**
+     * returns the sum of deltaRowCount for all selected partitions or for the 
table.
+     */
+    private long computeDeltaRowCount(OlapScan olapScan, SlotReference slot) {
+        AnalysisManager analysisManager = 
Env.getCurrentEnv().getAnalysisManager();
+        TableStatsMeta tableMeta = 
analysisManager.findTableStatsStatus(olapScan.getTable().getId());
+
+        long deltaRowCount = 0;
+        if (tableMeta != null) {
+            ColStatsMeta colMeta = tableMeta.findColumnStatsMeta(
+                    
olapScan.getTable().getIndexNameById(olapScan.getSelectedIndexId()), 
slot.getName());
+            if (colMeta != null) {
+                if (olapScan.getSelectedPartitionIds().isEmpty()) {
+                    deltaRowCount = tableMeta.updatedRows.get() - 
colMeta.updatedRows;
+                } else {
+                    // sum partition delta row
+                    for (long partitionId : 
olapScan.getSelectedPartitionIds()) {
+                        deltaRowCount += 
tableMeta.partitionUpdateRows.getOrDefault(partitionId, 0L)
+                                - 
colMeta.partitionUpdateRows.getOrDefault(partitionId, 0L);
+                    }
+                }
+            }
+        }
+        return deltaRowCount;
+    }
+
+    private void adjustColStats(CatalogRelation catalogRelation, SlotReference 
slot,
+            ColumnStatisticBuilder builder) {
+        if (builder.getAvgSizeByte() <= 0) {
+            
builder.setAvgSizeByte(slot.getDataType().toCatalogDataType().getSlotSize());
+        }
+        if (catalogRelation instanceof OlapScan) {
+            OlapScan olapScan = (OlapScan) catalogRelation;
+            long delta = computeDeltaRowCount(olapScan, slot);
+            if (delta > 0) {
+                builder.setCount(builder.getCount() + delta);
+                // clear min-max to avoid error estimation
+                // for example, after yesterday data loaded, user send query 
about yesterday immediately.
+                // since yesterday data are not analyzed, the max date is 
before yesterday, and hence optimizer
+                // estimates the filter result is zero
+                builder.setMinExpr(null).setMinValue(Double.NEGATIVE_INFINITY)
+                        
.setMaxExpr(null).setMaxValue(Double.POSITIVE_INFINITY);
+            }
+        }
+    }
+
+    private ColumnStatistic getColumnStatsFromTableCache(CatalogRelation 
catalogRelation, SlotReference slot) {
+        long idxId = -1;
+        if (catalogRelation instanceof OlapScan) {
+            idxId = ((OlapScan) catalogRelation).getSelectedIndexIdForMV();
+        }
+        return getColumnStatistic(catalogRelation.getTable(), slot.getName(), 
idxId);
+    }
+
+    private ColumnStatistic getColumnStatsFromPartitionCache(CatalogRelation 
catalogRelation, SlotReference slot,
+            List<String> partitionNames) {
+        long idxId = -1;
+        if (catalogRelation instanceof OlapScan) {
+            idxId = ((OlapScan) catalogRelation).getSelectedIndexIdForMV();
+        }
+        return getColumnStatistic(catalogRelation.getTable(), slot.getName(), 
idxId, partitionNames);
+    }
+
+    private long getSelectedPartitionRowCount(OlapScan olapScan) {
+        long partRowCountSum = 0;
+        for (long id : olapScan.getSelectedPartitionIds()) {
+            long partRowCount = 
olapScan.getTable().getPartition(id).getBaseIndex().getRowCount();
+            // if we cannot get any partition's rowCount, return -1 to 
fallback to table level stats
+            if (partRowCount <= 0) {
+                return -1;
+            }
+            partRowCountSum += partRowCount;
+        }
+        return partRowCountSum;
+    }
+
+    private void setHasUnknownColStatsInStatementContext() {
+        if (ConnectContext.get() != null && 
ConnectContext.get().getStatementContext() != null) {
+            
ConnectContext.get().getStatementContext().setHasUnknownColStats(true);
+        }
+    }
+
+    private void checkIfUnknownStatsUsedAsKey(StatisticsBuilder builder) {
+        if (ConnectContext.get() != null && 
ConnectContext.get().getStatementContext() != null) {
+            for (Map.Entry<Expression, ColumnStatistic> entry : 
builder.getExpressionColumnStatsEntries()) {
+                if (entry.getKey() instanceof SlotReference
+                        && 
ConnectContext.get().getStatementContext().isKeySlot((SlotReference) 
entry.getKey())) {
+                    if (entry.getValue().isUnKnown) {
+                        
ConnectContext.get().getStatementContext().setHasUnknownColStats(true);
+                        break;
+                    }
+                }
+            }
+        }
+    }
+
+    private Statistics computeOlapScan(LogicalOlapScan olapScan) {
+        OlapTable olapTable = olapScan.getTable();
+
+        if (olapScan.getSelectedIndexId() != 
olapScan.getTable().getBaseIndexId() || olapTable instanceof MTMV) {
+            // mv is selected, return its estimated stats
+            Optional<Statistics> optStats = 
cascadesContext.getStatementContext()
+                    .getStatistics(olapScan.getRelationId());
+            if (optStats.isPresent()) {
+                double actualRowCount = 
olapScan.getTable().getRowCountForNereids();
+                // if estimated mv rowCount is more than actual row count, 
fall back to base table stats
+                if (actualRowCount > optStats.get().getRowCount()) {
+                    return optStats.get();
+                }
+            }
+        }
+
+        StatisticsBuilder builder = new StatisticsBuilder();
+
+        // for system table or FeUt, use ColumnStatistic.UNKNOWN
+        if (StatisticConstants.isSystemTable(olapTable) || 
!FeConstants.enableInternalSchemaDb
+                || ConnectContext.get() == null
+                || ConnectContext.get().getSessionVariable().internalSession) {
+            for (Slot slot : olapScan.getOutput()) {
+                builder.putColumnStatistics(slot, ColumnStatistic.UNKNOWN);
+            }
+            setHasUnknownColStatsInStatementContext();
+            builder.setRowCount(olapTable.getRowCountForNereids());
+            return builder.build();
+        }
+
+        // for regression shape test, get row count from columnStats.count
+        if (ConnectContext.get() == null || 
!ConnectContext.get().getSessionVariable().enableStats) {
+            // get row count from any visible slotReference's colStats
+            double rowCount = 1;
+            for (Slot slot : olapScan.getOutput()) {
+                if (isVisibleSlotReference(slot)) {
+                    ColumnStatistic cache = getColumnStatistic(olapTable, 
slot.getName(),
+                            olapScan.getSelectedIndexIdForMV());
+                    rowCount = Math.max(rowCount, cache.count);
+                }
+                builder.putColumnStatistics(slot,
+                        new 
ColumnStatisticBuilder(ColumnStatistic.UNKNOWN).setCount(rowCount).build());
+            }
+            setHasUnknownColStatsInStatementContext();
+            return builder.setRowCount(rowCount).build();
+        }
+
+        // build Stats for olapScan
+        // if slot is not slotReference or is invisible, use UNKNOWN
+        List<SlotReference> outputSlotReferences = new ArrayList<>();
+        for (Slot slot : olapScan.getOutput()) {
+            if (isVisibleSlotReference(slot)) {
+                outputSlotReferences.add((SlotReference) slot);
+            } else {
+                builder.putColumnStatistics(slot, ColumnStatistic.UNKNOWN);
+            }
+        }
+        // build col stats for outputSlotReferences
+        if (!olapScan.getSelectedPartitionIds().isEmpty()) {
+            double rowCount = getSelectedPartitionRowCount(olapScan);
+            // if partition row count is not available, fallback to table stats
+            if (rowCount > 0) {
+                List<String> selectedPartitionNames = new 
ArrayList<>(olapScan.getSelectedPartitionIds().size());
+                olapScan.getSelectedPartitionIds().forEach(id -> {
+                    
selectedPartitionNames.add(olapScan.getTable().getPartition(id).getName());
+                });
+                for (SlotReference slot : outputSlotReferences) {
+                    ColumnStatistic cache = 
getColumnStatsFromPartitionCache(olapScan, slot, selectedPartitionNames);
+                    ColumnStatisticBuilder colStatsBuilder = new 
ColumnStatisticBuilder(cache);
+                    adjustColStats(olapScan, slot, colStatsBuilder);
+                    builder.putColumnStatistics(slot, colStatsBuilder.build());
+                    rowCount = Math.max(rowCount, colStatsBuilder.getCount());
+                }
+                checkIfUnknownStatsUsedAsKey(builder);
+                return builder.setRowCount(rowCount).build();
+            }
+        }
+
+        // get table level stats
+        double rowCount = olapScan.getTable().getRowCountForNereids();
+        for (SlotReference slot : outputSlotReferences) {
+            ColumnStatistic cache = getColumnStatsFromTableCache(olapScan, 
slot);
+            ColumnStatisticBuilder colStatsBuilder = new 
ColumnStatisticBuilder(cache);
+            adjustColStats(olapScan, slot, colStatsBuilder);
+            builder.putColumnStatistics(slot, colStatsBuilder.build());
+            rowCount = Math.max(rowCount, colStatsBuilder.getCount());
+        }
+        checkIfUnknownStatsUsedAsKey(builder);
+        return builder.setRowCount(rowCount).build();
+    }
+
     @Override
     public Statistics visitLogicalOlapScan(LogicalOlapScan olapScan, Void 
context) {
-        return computeCatalogRelation(olapScan);
+        return computeOlapScan(olapScan);
+    }
+
+    private boolean isVisibleSlotReference(Slot slot) {
+        if (slot instanceof SlotReference) {
+            Optional<Column> colOpt = ((SlotReference) slot).getColumn();
+            if (colOpt.isPresent()) {
+                return colOpt.get().isVisible();
+            }
+        }
+        return false;
     }
 
     @Override
     public Statistics 
visitLogicalDeferMaterializeOlapScan(LogicalDeferMaterializeOlapScan 
deferMaterializeOlapScan,
             Void context) {
-        return 
computeCatalogRelation(deferMaterializeOlapScan.getLogicalOlapScan());
+        return computeOlapScan(deferMaterializeOlapScan.getLogicalOlapScan());
     }
 
     @Override
@@ -726,6 +925,30 @@ public class StatsCalculator extends 
DefaultPlanVisitor<Statistics, Void> {
         }
     }
 
+    private ColumnStatistic getColumnStatistic(TableIf table, String colName, 
long idxId) {
+        ConnectContext connectContext = ConnectContext.get();
+        if (connectContext != null && 
connectContext.getSessionVariable().internalSession) {
+            return ColumnStatistic.UNKNOWN;
+        }
+        long catalogId;
+        long dbId;
+        try {
+            catalogId = table.getDatabase().getCatalog().getId();
+            dbId = table.getDatabase().getId();
+        } catch (Exception e) {
+            // Use -1 for catalog id and db id when failed to get them from 
metadata.
+            // This is OK because catalog id and db id is not in the hashcode 
function of ColumnStatistics cache
+            // and the table id is globally unique.
+            if (LOG.isDebugEnabled()) {
+                LOG.debug(String.format("Fail to get catalog id and db id for 
table %s", table.getName()));
+            }
+            catalogId = -1;
+            dbId = -1;
+        }
+        return Env.getCurrentEnv().getStatisticsCache().getColumnStatistics(
+                catalogId, dbId, table.getId(), idxId, colName);
+    }
+
     private ColumnStatistic getColumnStatistic(TableIf table, String colName, 
long idxId, List<String> partitionNames) {
         ConnectContext connectContext = ConnectContext.get();
         if (connectContext != null && 
connectContext.getSessionVariable().internalSession) {
@@ -757,6 +980,7 @@ public class StatsCalculator extends 
DefaultPlanVisitor<Statistics, Void> {
                 PartitionColumnStatisticBuilder builder = new 
PartitionColumnStatisticBuilder();
                 boolean hasUnknown = false;
                 // check if there is any unknown stats to avoid unnecessary 
partition column stats merge.
+                List<PartitionColumnStatistic> pColStatsLists = new 
ArrayList<>(partitionNames.size());
                 for (String partitionName : partitionNames) {
                     PartitionColumnStatistic pcolStats = 
Env.getCurrentEnv().getStatisticsCache()
                             .getPartitionColumnStatistics(
@@ -764,19 +988,14 @@ public class StatsCalculator extends 
DefaultPlanVisitor<Statistics, Void> {
                     if (pcolStats.isUnKnown) {
                         hasUnknown = true;
                         break;
+                    } else {
+                        pColStatsLists.add(pcolStats);
                     }
                 }
                 if (!hasUnknown) {
                     boolean isFirst = true;
                     // try to merge partition column stats
-                    for (String partitionName : partitionNames) {
-                        PartitionColumnStatistic pcolStats = 
Env.getCurrentEnv().getStatisticsCache()
-                                .getPartitionColumnStatistics(
-                                        catalogId, dbId, table.getId(), idxId, 
partitionName, colName);
-                        if (pcolStats.isUnKnown) {
-                            hasUnknown = true;
-                            break;
-                        }
+                    for (PartitionColumnStatistic pcolStats : pColStatsLists) {
                         if (isFirst) {
                             builder = new 
PartitionColumnStatisticBuilder(pcolStats);
                             isFirst = false;
@@ -784,9 +1003,7 @@ public class StatsCalculator extends 
DefaultPlanVisitor<Statistics, Void> {
                             builder.merge(pcolStats);
                         }
                     }
-                    if (!hasUnknown) {
-                        return builder.toColumnStatistics();
-                    }
+                    return builder.toColumnStatistics();
                 }
             }
             // if any partition-col-stats is unknown, fall back to table level 
col stats
@@ -795,23 +1012,21 @@ public class StatsCalculator extends 
DefaultPlanVisitor<Statistics, Void> {
         }
     }
 
-    // TODO: 1. Subtract the pruned partition
-    //       2. Consider the influence of runtime filter
-    //       3. Get NDV and column data size from StatisticManger, 
StatisticManager doesn't support it now.
+    /**
+     * compute stats for catalogRelations except OlapScan
+     */
     private Statistics computeCatalogRelation(CatalogRelation catalogRelation) 
{
-        if (catalogRelation instanceof LogicalOlapScan) {
-            LogicalOlapScan olap = (LogicalOlapScan) catalogRelation;
-            if (olap.getSelectedIndexId() != olap.getTable().getBaseIndexId()) 
{
-                // mv is selected, return its estimated stats
-                Optional<Statistics> optStats = 
cascadesContext.getStatementContext()
-                        .getStatistics(olap.getRelationId());
-                if (optStats.isPresent()) {
-                    double actualRowCount = 
catalogRelation.getTable().getRowCountForNereids();
-                    if (actualRowCount > optStats.get().getRowCount()) {
-                        return optStats.get();
-                    }
-                }
+        StatisticsBuilder builder = new StatisticsBuilder();
+        // for FeUt, use ColumnStatistic.UNKNOWN
+        if (!FeConstants.enableInternalSchemaDb
+                || ConnectContext.get() == null
+                || ConnectContext.get().getSessionVariable().internalSession) {
+            
builder.setRowCount(catalogRelation.getTable().getRowCountForNereids());
+            for (Slot slot : catalogRelation.getOutput()) {
+                builder.putColumnStatistics(slot, ColumnStatistic.UNKNOWN);
             }
+            setHasUnknownColStatsInStatementContext();
+            return builder.build();
         }
 
         List<Slot> output = catalogRelation.getOutput();
@@ -822,121 +1037,17 @@ public class StatsCalculator extends 
DefaultPlanVisitor<Statistics, Void> {
             }
         }
         Set<SlotReference> slotSet = slotSetBuilder.build();
-        Map<Expression, ColumnStatisticBuilder> columnStatisticBuilderMap = 
new HashMap<>();
-        TableIf table = catalogRelation.getTable();
-        AnalysisManager analysisManager = 
Env.getCurrentEnv().getAnalysisManager();
-        TableStatsMeta tableMeta = 
analysisManager.findTableStatsStatus(table.getId());
-        long tableUpdatedRows = tableMeta == null ? 0 : 
tableMeta.updatedRows.get();
-        boolean hasUnknownKeyCol = false;
-        long idxId = -1;
-        List<String> selectedPartitionNames;
-        if (catalogRelation instanceof OlapScan) {
-            OlapScan olapScan = (OlapScan) catalogRelation;
-            if (olapScan.getTable().getBaseIndexId() != 
olapScan.getSelectedIndexId()) {
-                idxId = olapScan.getSelectedIndexId();
-            }
-            selectedPartitionNames = new 
ArrayList<>(olapScan.getSelectedPartitionIds().size());
-            olapScan.getSelectedPartitionIds().forEach(id -> {
-                
selectedPartitionNames.add(olapScan.getTable().getPartition(id).getName());
-            });
-        } else {
-            selectedPartitionNames = new ArrayList<>();
-        }
-        double rowCount = 0.0;
-        for (SlotReference slotReference : slotSet) {
-            boolean usedAsKey = false;
-            if (ConnectContext.get() != null && 
slotReference.getColumn().isPresent()
-                    && ConnectContext.get().getStatementContext() != null) {
-                usedAsKey = 
ConnectContext.get().getStatementContext().isKeyColumn(slotReference.getColumn().get());
-            }
-            String colName = slotReference.getColumn().isPresent()
-                    ? slotReference.getColumn().get().getName()
-                    : slotReference.getName();
-            boolean shouldIgnoreThisCol = 
StatisticConstants.shouldIgnoreCol(table, slotReference.getColumn().get());
 
-            if (colName == null) {
-                throw new RuntimeException(String.format("Invalid slot: %s", 
slotReference.getExprId()));
-            }
-            // compute delta row
-            long deltaRowCount = 0;
-            if (catalogRelation instanceof OlapScan) {
-                OlapTable olapTable = (OlapTable) table;
-                if (tableMeta != null) {
-                    ColStatsMeta colMeta = tableMeta.findColumnStatsMeta(
-                            olapTable.getIndexNameById(idxId == -1 ? 
olapTable.getBaseIndexId() : idxId), colName);
-                    if (colMeta != null) {
-                        if (((OlapScan) 
catalogRelation).getSelectedPartitionIds().isEmpty()) {
-                            deltaRowCount = tableUpdatedRows - 
colMeta.updatedRows;
-                        } else {
-                            // sum partition delta row
-                            for (long partitionId : ((OlapScan) 
catalogRelation).getSelectedPartitionIds()) {
-                                deltaRowCount += 
tableMeta.partitionUpdateRows.getOrDefault(partitionId, 0L)
-                                        - 
colMeta.partitionUpdateRows.getOrDefault(partitionId, 0L);
-                            }
-                        }
-                    }
-                }
-
-            }
-            ColumnStatistic cache;
-            if (!FeConstants.enableInternalSchemaDb
-                    || shouldIgnoreThisCol) {
-                cache = ColumnStatistic.UNKNOWN;
-            } else {
-                cache = getColumnStatistic(table, colName, idxId, 
selectedPartitionNames);
-            }
+        double rowCount = catalogRelation.getTable().getRowCountForNereids();
+        for (SlotReference slot : slotSet) {
+            ColumnStatistic cache = 
getColumnStatsFromTableCache(catalogRelation, slot);
             ColumnStatisticBuilder colStatsBuilder = new 
ColumnStatisticBuilder(cache);
-            if (cache.avgSizeByte <= 0) {
-                
colStatsBuilder.setAvgSizeByte(slotReference.getColumn().get().getType().getSlotSize());
-            }
-            if (!cache.isUnKnown) {
-                rowCount = Math.max(rowCount, cache.count + deltaRowCount);
-            } else {
-                if (usedAsKey) {
-                    hasUnknownKeyCol = true;
-                }
-            }
-            if (ConnectContext.get() != null && 
ConnectContext.get().getSessionVariable().enableStats) {
-                // deltaRowCount > 0 indicates that
-                // new data is loaded to the table after this column was 
analyzed last time.
-                // In this case, need to eliminate min/max value for this 
column.
-                if (deltaRowCount > 0) {
-                    // clear min-max to avoid error estimation
-                    // for example, after yesterday data loaded, user send 
query about yesterday immediately.
-                    // since yesterday data are not analyzed, the max date is 
before yesterday, and hence optimizer
-                    // estimates the filter result is zero
-                    
colStatsBuilder.setMinExpr(null).setMinValue(Double.NEGATIVE_INFINITY)
-                        
.setMaxExpr(null).setMaxValue(Double.POSITIVE_INFINITY);
-                    if (LOG.isDebugEnabled()) {
-                        LOG.debug("{}.{} is partially analyzed, clear min/max 
values in column stats",
-                                table.getName(), colName);
-                    }
-                }
-                columnStatisticBuilderMap.put(slotReference, colStatsBuilder);
-            } else {
-                columnStatisticBuilderMap.put(slotReference, new 
ColumnStatisticBuilder(ColumnStatistic.UNKNOWN));
-                hasUnknownKeyCol = true;
-            }
-        }
-        if (rowCount <= 0.0) {
-            // if we failed to get rowCount from column stats, then try to get 
it from TableIf
-            rowCount = catalogRelation.getTable().getRowCountForNereids();
-        }
-
-        if (hasUnknownKeyCol && ConnectContext.get() != null && 
ConnectContext.get().getStatementContext() != null) {
-            
ConnectContext.get().getStatementContext().setHasUnknownColStats(true);
-        }
-        return normalizeCatalogRelationColumnStatsRowCount(rowCount, 
columnStatisticBuilderMap);
-    }
-
-    private Statistics normalizeCatalogRelationColumnStatsRowCount(double 
rowCount,
-            Map<Expression, ColumnStatisticBuilder> columnStatisticBuilderMap) 
{
-        Map<Expression, ColumnStatistic> columnStatisticMap = new HashMap<>();
-        for (Expression slot : columnStatisticBuilderMap.keySet()) {
-            columnStatisticMap.put(slot,
-                    
columnStatisticBuilderMap.get(slot).setCount(rowCount).build());
+            adjustColStats(catalogRelation, slot, colStatsBuilder);
+            rowCount = Math.max(rowCount, colStatsBuilder.getCount());
+            builder.putColumnStatistics(slot, colStatsBuilder.build());
         }
-        return new Statistics(rowCount, columnStatisticMap);
+        checkIfUnknownStatsUsedAsKey(builder);
+        return builder.build();
     }
 
     private Statistics computeTopN(TopN topN) {
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/algebra/OlapScan.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/algebra/OlapScan.java
index d5fe7c23413..65f36394621 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/algebra/OlapScan.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/algebra/OlapScan.java
@@ -28,6 +28,12 @@ public interface OlapScan {
 
     long getSelectedIndexId();
 
+    /**
+     * if this is mv, return selectedIndexId, o.w -1
+     * @return -1 or selectedIndexId
+     */
+    long getSelectedIndexIdForMV();
+
     List<Long> getSelectedPartitionIds();
 
     List<Long> getSelectedTabletIds();
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalDeferMaterializeOlapScan.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalDeferMaterializeOlapScan.java
index 77da6537dd7..bff416c19d2 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalDeferMaterializeOlapScan.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalDeferMaterializeOlapScan.java
@@ -93,6 +93,11 @@ public class LogicalDeferMaterializeOlapScan extends 
LogicalCatalogRelation impl
         return logicalOlapScan.getSelectedIndexId();
     }
 
+    @Override
+    public long getSelectedIndexIdForMV() {
+        return logicalOlapScan.getSelectedIndexIdForMV();
+    }
+
     @Override
     public List<Long> getSelectedPartitionIds() {
         return logicalOlapScan.getSelectedPartitionIds();
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalOlapScan.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalOlapScan.java
index 65e4710836d..f14cd661d95 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalOlapScan.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalOlapScan.java
@@ -333,6 +333,14 @@ public class LogicalOlapScan extends 
LogicalCatalogRelation implements OlapScan
         return selectedIndexId;
     }
 
+    @Override
+    public long getSelectedIndexIdForMV() {
+        if (getTable().getBaseIndexId() != selectedIndexId) {
+            return selectedIndexId;
+        }
+        return -1;
+    }
+
     public boolean isIndexSelected() {
         return indexSelected;
     }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalDeferMaterializeOlapScan.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalDeferMaterializeOlapScan.java
index f82bd6dbec5..1acfb8dc4c0 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalDeferMaterializeOlapScan.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalDeferMaterializeOlapScan.java
@@ -90,6 +90,11 @@ public class PhysicalDeferMaterializeOlapScan extends 
PhysicalCatalogRelation im
         return physicalOlapScan.getSelectedIndexId();
     }
 
+    @Override
+    public long getSelectedIndexIdForMV() {
+        return physicalOlapScan.getSelectedIndexIdForMV();
+    }
+
     @Override
     public List<Long> getSelectedPartitionIds() {
         return physicalOlapScan.getSelectedPartitionIds();
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalOlapScan.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalOlapScan.java
index 76713a51e29..1839f46c552 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalOlapScan.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalOlapScan.java
@@ -90,6 +90,14 @@ public class PhysicalOlapScan extends 
PhysicalCatalogRelation implements OlapSca
         return selectedIndexId;
     }
 
+    @Override
+    public long getSelectedIndexIdForMV() {
+        if (getTable().getBaseIndexId() != selectedIndexId) {
+            return selectedIndexId;
+        }
+        return -1;
+    }
+
     @Override
     public List<Long> getSelectedTabletIds() {
         return selectedTabletIds;
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsBuilder.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsBuilder.java
index 53d8f49cb14..29f04f2926e 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsBuilder.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsBuilder.java
@@ -21,11 +21,12 @@ import 
org.apache.doris.nereids.trees.expressions.Expression;
 
 import java.util.HashMap;
 import java.util.Map;
+import java.util.Set;
 
 public class StatisticsBuilder {
 
     private double rowCount;
-    private int widthInJoinCluster;
+    private int widthInJoinCluster = 1;
     private final Map<Expression, ColumnStatistic> expressionToColumnStats;
 
     public StatisticsBuilder() {
@@ -60,6 +61,10 @@ public class StatisticsBuilder {
         return this;
     }
 
+    public Set<Map.Entry<Expression, ColumnStatistic>> 
getExpressionColumnStatsEntries() {
+        return expressionToColumnStats.entrySet();
+    }
+
     public Statistics build() {
         return new Statistics(rowCount, widthInJoinCluster, 
expressionToColumnStats);
     }
diff --git a/regression-test/suites/nereids_p0/stats/partition_col_stats.groovy 
b/regression-test/suites/nereids_p0/stats/partition_col_stats.groovy
index 89a32a80d91..3436b6dd86e 100644
--- a/regression-test/suites/nereids_p0/stats/partition_col_stats.groovy
+++ b/regression-test/suites/nereids_p0/stats/partition_col_stats.groovy
@@ -36,7 +36,7 @@ suite("partition_col_stats") {
     """
     //run this sql to make stats be cached
     sql "select * from pt where k1<3;"
-    sleep(10)
+    sleep(10000)
     explain{
         sql "physical plan select * from pt where k1<3;"
         contains("stats=4")


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org


Reply via email to