This is an automated email from the ASF dual-hosted git repository.

englefly pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 8debc96d74 [enhancement](nereids) update FilterEstimation and Agg in 
stats derive (#17790)
8debc96d74 is described below

commit 8debc96d74b2a5c2c4d24d47f8df47554096333d
Author: minghong <engle...@gmail.com>
AuthorDate: Fri Mar 17 18:01:50 2023 +0800

    [enhancement](nereids) update FilterEstimation and Agg in stats derive 
(#17790)
    
    * 1. update ndv in Stats,
    2. skip __DORIS_DELETE_SIGN__=0 in stats derive,
    3. equalTo in stats derive
    4. update agg stats derive, support the case: all column_stats are unknown
    
    * computeSize
    
    * fix ut
---
 .../apache/doris/nereids/memo/GroupExpression.java | 16 ++---
 .../doris/nereids/stats/FilterEstimation.java      | 69 +++++++++++++---------
 .../apache/doris/nereids/stats/JoinEstimation.java |  6 ++
 .../doris/nereids/stats/StatsCalculator.java       | 35 ++++++++---
 .../plans/physical/PhysicalHashAggregate.java      |  2 +-
 .../plans/physical/PhysicalNestedLoopJoin.java     |  3 +-
 .../org/apache/doris/statistics/Statistics.java    | 31 +++++++---
 .../org/apache/doris/nereids/memo/RankTest.java    |  3 +-
 .../doris/nereids/stats/FilterEstimationTest.java  | 37 ++++++------
 .../doris/nereids/stats/StatsCalculatorTest.java   | 12 ++--
 .../suites/nereids_syntax_p0/join.groovy           | 27 ---------
 11 files changed, 132 insertions(+), 109 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/GroupExpression.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/GroupExpression.java
index 5b4774284a..12e9816f8e 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/GroupExpression.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/GroupExpression.java
@@ -57,7 +57,7 @@ public class GroupExpression {
     private final BitSet ruleMasks;
     private boolean statDerived;
 
-    private long estOutputRowCount = -1;
+    private double estOutputRowCount = -1;
 
     //Record the rule that generate this plan. It's used for debugging
     private Rule fromRule;
@@ -301,16 +301,13 @@ public class GroupExpression {
         return new Statistics(child(idx).getStatistics());
     }
 
-    public void setEstOutputRowCount(long estOutputRowCount) {
+    public void setEstOutputRowCount(double estOutputRowCount) {
         this.estOutputRowCount = estOutputRowCount;
     }
 
-    public long getEstOutputRowCount() {
-        return estOutputRowCount;
-    }
-
     @Override
     public String toString() {
+        DecimalFormat format = new DecimalFormat("#,###.##");
         StringBuilder builder = new StringBuilder("id:");
         builder.append(id.asInt());
         if (ownerGroup == null) {
@@ -318,11 +315,8 @@ public class GroupExpression {
         } else {
             builder.append("#").append(ownerGroup.getGroupId().asInt());
         }
-
-        DecimalFormat decimalFormat = new DecimalFormat();
-        decimalFormat.setGroupingSize(3);
-        builder.append(" cost=").append(decimalFormat.format((long) cost));
-        builder.append(" estRows=").append(estOutputRowCount);
+        builder.append(" cost=").append(format.format((long) cost));
+        builder.append(" estRows=").append(format.format(estOutputRowCount));
         builder.append(" (plan=").append(plan.toString()).append(") 
children=[");
         for (Group group : children) {
             builder.append(group.getGroupId()).append(" ");
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
index 2aa55114d8..e2159f1040 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
@@ -31,10 +31,10 @@ import 
org.apache.doris.nereids.trees.expressions.LessThanEqual;
 import org.apache.doris.nereids.trees.expressions.Not;
 import org.apache.doris.nereids.trees.expressions.NullSafeEqual;
 import org.apache.doris.nereids.trees.expressions.Or;
+import org.apache.doris.nereids.trees.expressions.Slot;
 import org.apache.doris.nereids.trees.expressions.SlotReference;
 import org.apache.doris.nereids.trees.expressions.literal.Literal;
 import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
-import org.apache.doris.nereids.types.coercion.NumericType;
 import org.apache.doris.statistics.Bucket;
 import org.apache.doris.statistics.ColumnStatistic;
 import org.apache.doris.statistics.ColumnStatisticBuilder;
@@ -48,6 +48,7 @@ import com.google.common.base.Preconditions;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 
 /**
  * Calculate selectivity of expression that produces boolean value.
@@ -85,10 +86,10 @@ public class FilterEstimation extends 
ExpressionVisitor<Statistics, EstimationCo
             Statistics rightStats = rightExpr.accept(this, context);
             double rowCount = leftStats.getRowCount() + 
rightStats.getRowCount() - andStats.getRowCount();
             Statistics orStats = context.statistics.withRowCount(rowCount);
-            for (Map.Entry<Expression, ColumnStatistic> entry : 
leftStats.columnStatistics().entrySet()) {
-                ColumnStatistic leftColStats = entry.getValue();
+            for (Map.Entry<Expression, ColumnStatistic> entry : 
orStats.columnStatistics().entrySet()) {
+                ColumnStatistic leftColStats = 
leftStats.findColumnStatistics(entry.getKey());
                 ColumnStatistic rightColStats = 
rightStats.findColumnStatistics(entry.getKey());
-                ColumnStatisticBuilder estimatedColStatsBuilder = new 
ColumnStatisticBuilder(leftColStats);
+                ColumnStatisticBuilder estimatedColStatsBuilder = new 
ColumnStatisticBuilder(entry.getValue());
                 if (leftColStats.minValue <= rightColStats.minValue) {
                     
estimatedColStatsBuilder.setMinValue(leftColStats.minValue);
                     estimatedColStatsBuilder.setMinExpr(leftColStats.minExpr);
@@ -113,7 +114,17 @@ public class FilterEstimation extends 
ExpressionVisitor<Statistics, EstimationCo
     @Override
     public Statistics visitComparisonPredicate(ComparisonPredicate cp, 
EstimationContext context) {
         Expression left = cp.left();
+        if (left instanceof SlotReference && ((SlotReference) 
left).getColumn().isPresent()) {
+            if ("__DORIS_DELETE_SIGN__".equals(((SlotReference) 
left).getColumn().get().getName())) {
+                return context.statistics;
+            }
+        }
         Expression right = cp.right();
+        if (right instanceof SlotReference && ((SlotReference) 
right).getColumn().isPresent()) {
+            if ("__DORIS_DELETE_SIGN__".equals(((SlotReference) 
right).getColumn().get().getName())) {
+                return context.statistics;
+            }
+        }
         ColumnStatistic statsForLeft = ExpressionEstimation.estimate(left, 
context.statistics);
         ColumnStatistic statsForRight = ExpressionEstimation.estimate(right, 
context.statistics);
         if (!(left instanceof Literal) && !(right instanceof Literal)) {
@@ -152,10 +163,6 @@ public class FilterEstimation extends 
ExpressionVisitor<Statistics, EstimationCo
         if (statsForLeft == ColumnStatistic.UNKNOWN) {
             return context.statistics.withSel(DEFAULT_INEQUALITY_COEFFICIENT);
         }
-        Expression rightExpr = cp.child(1);
-        if (!(rightExpr.getDataType() instanceof NumericType)) {
-            return context.statistics.withSel(DEFAULT_INEQUALITY_COEFFICIENT);
-        }
         double selectivity;
         double ndv = statsForLeft.ndv;
         double val = statsForRight.maxValue;
@@ -175,7 +182,33 @@ public class FilterEstimation extends 
ExpressionVisitor<Statistics, EstimationCo
             if (statsForLeft.histogram != null) {
                 return estimateEqualToWithHistogram(cp.left(), statsForLeft, 
val, context);
             }
-            return context.statistics.withSel(selectivity);
+            // cp.left : func(A), we assume func(A) has same statistics with A
+            // for example: cast(N_NAME as varchar(*)) = 'GERMANY',
+            // we assume cast(N_NAME as varchar(*)) and N_NAME have the same 
col stats
+            Set<Slot> leftSlots = cp.left().getInputSlots();
+            Preconditions.checkArgument(leftSlots.size() <= 1,
+                    "stats derive: equal condition only support at one column, 
but we meet "
+                            + leftSlots.size()
+            );
+
+            Statistics equalStats = context.statistics.withSel(selectivity);
+            /*
+            leftSlots could be empty, for example:
+            select * from (select 'jj' as kk1, sum(k2) from ${tableName2} 
where k10 = '2015-04-02' group by kk1)tt
+            where kk1 in ('jj')
+            kk1 in ('jj') => kk1 = 'jj' => 'jj'='jj
+            TODO const fold could eliminate this equalTo.
+             */
+            if (!leftSlots.isEmpty()) {
+                Slot leftSlot = leftSlots.iterator().next();
+                //update min/max of cp.left
+                ColumnStatistic columnStats = 
equalStats.findColumnStatistics(leftSlot);
+                ColumnStatisticBuilder colStatsBuilder = new 
ColumnStatisticBuilder(columnStats);
+                colStatsBuilder.setMaxValue(val);
+                colStatsBuilder.setMinValue(val);
+                equalStats.addColumnStats(leftSlot, colStatsBuilder.build());
+            }
+            return equalStats;
         } else {
             if (cp instanceof LessThan || cp instanceof LessThanEqual) {
                 if (context.isNot) {
@@ -238,7 +271,6 @@ public class FilterEstimation extends 
ExpressionVisitor<Statistics, EstimationCo
               A.selectivity = 7/10
         */
         double validInOptCount = 0;
-        double columnSelectivity = 1.0;
         double selectivity = 1.0;
         ColumnStatisticBuilder compareExprStatsBuilder = new 
ColumnStatisticBuilder(compareExprStats);
         if (isNotIn) {
@@ -250,7 +282,6 @@ public class FilterEstimation extends 
ExpressionVisitor<Statistics, EstimationCo
                 }
             }
             validInOptCount = Math.max(1, compareExprStats.ndv - 
validInOptCount);
-            columnSelectivity = compareExprStats.ndv == 0 ? 0 : Math.max(1, 
validInOptCount) / compareExprStats.ndv;
         } else {
             for (Expression option : options) {
                 ColumnStatistic optionStats = 
ExpressionEstimation.estimate(option, context.statistics);
@@ -263,29 +294,13 @@ public class FilterEstimation extends 
ExpressionVisitor<Statistics, EstimationCo
             }
             maxOption = Math.min(maxOption, compareExprStats.maxValue);
             minOption = Math.max(minOption, compareExprStats.minValue);
-            if (maxOption == minOption) {
-                columnSelectivity = 1.0;
-            } else {
-                double outputRange = maxOption - minOption;
-                double originRange = Math.max(1, compareExprStats.maxValue - 
compareExprStats.minValue);
-                double orginDensity = StatsMathUtil.minNonNaN(1,
-                        compareExprStats.ndv / 
StatsMathUtil.nonZeroDivisor(originRange));
-                double outputDensity = StatsMathUtil.minNonNaN(1,
-                        validInOptCount / 
StatsMathUtil.nonZeroDivisor(outputRange));
-                columnSelectivity = StatsMathUtil.minNonNaN(1, outputDensity
-                        / StatsMathUtil.nonZeroDivisor(orginDensity));
-            }
             compareExprStatsBuilder.setMaxValue(maxOption);
             compareExprStatsBuilder.setMinValue(minOption);
         }
 
         selectivity = StatsMathUtil.minNonNaN(1.0, validInOptCount / 
compareExprStats.ndv);
-
-        compareExprStatsBuilder.setSelectivity(compareExprStats.selectivity * 
columnSelectivity);
         compareExprStatsBuilder.setNdv(validInOptCount);
-
         Statistics estimated = new Statistics(context.statistics);
-
         estimated = estimated.withSel(selectivity);
         if (compareExpr instanceof SlotReference) {
             estimated.addColumnStats(compareExpr,
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java
index c1e30f1da7..e77c060bba 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java
@@ -21,6 +21,7 @@ import org.apache.doris.common.Pair;
 import org.apache.doris.nereids.trees.expressions.Expression;
 import org.apache.doris.nereids.trees.plans.JoinType;
 import org.apache.doris.nereids.trees.plans.algebra.Join;
+import org.apache.doris.nereids.util.ExpressionUtils;
 import org.apache.doris.statistics.Statistics;
 import org.apache.doris.statistics.StatisticsBuilder;
 
@@ -70,6 +71,11 @@ public class JoinEstimation {
                 .build();
         List<Expression> joinConditions = join.getHashJoinConjuncts();
         Statistics innerJoinStats = estimateInnerJoin(crossJoinStats, 
joinConditions);
+        if (!join.getOtherJoinConjuncts().isEmpty()) {
+            FilterEstimation filterEstimation = new FilterEstimation();
+            innerJoinStats = filterEstimation.estimate(
+                    ExpressionUtils.and(join.getOtherJoinConjuncts()), 
innerJoinStats);
+        }
         innerJoinStats.setWidth(leftStats.getWidth() + rightStats.getWidth());
         innerJoinStats.setPenalty(0);
         double rowCount;
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
index 5e196a4441..8f10d6f4ea 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
@@ -106,6 +106,7 @@ import java.util.stream.Collectors;
  * Used to calculate the stats for each plan
  */
 public class StatsCalculator extends DefaultPlanVisitor<Statistics, Void> {
+    public static double DEFAULT_AGGREGATE_RATIO = 0.5;
     private final GroupExpression groupExpression;
 
     private StatsCalculator(GroupExpression groupExpression) {
@@ -130,7 +131,7 @@ public class StatsCalculator extends 
DefaultPlanVisitor<Statistics, Void> {
         if (originStats == null || originStats.getRowCount() > 
stats.getRowCount()) {
             groupExpression.getOwnerGroup().setStatistics(stats);
         }
-        groupExpression.setEstOutputRowCount((long) stats.getRowCount());
+        groupExpression.setEstOutputRowCount(stats.getRowCount());
         groupExpression.setStatDerived(true);
     }
 
@@ -436,12 +437,32 @@ public class StatsCalculator extends 
DefaultPlanVisitor<Statistics, Void> {
         // TODO: since we have no column stats here. just use a fix ratio to 
compute the row count.
         List<Expression> groupByExpressions = 
aggregate.getGroupByExpressions();
         Statistics childStats = groupExpression.childStatistics(0);
-        Map<Expression, ColumnStatistic> childSlotToColumnStats = 
childStats.columnStatistics();
-        double resultSetCount = groupByExpressions.stream().flatMap(expr -> 
expr.getInputSlots().stream())
-                
.filter(childSlotToColumnStats::containsKey).map(childSlotToColumnStats::get).map(s
 -> s.ndv)
-                .reduce(1d, (a, b) -> a * b);
-        if (resultSetCount <= 0) {
-            resultSetCount = 1L;
+        double resultSetCount = 1;
+        if (!groupByExpressions.isEmpty()) {
+            Map<Expression, ColumnStatistic> childSlotToColumnStats = 
childStats.columnStatistics();
+            double inputRowCount = childStats.getRowCount();
+            if (inputRowCount == 0) {
+                //on empty relation, Agg output 1 tuple
+                resultSetCount = 1;
+            } else {
+                List<ColumnStatistic> groupByKeyStats = 
groupByExpressions.stream()
+                        .flatMap(expr -> expr.getInputSlots().stream())
+                        .map(Slot::getExprId)
+                        .filter(childSlotToColumnStats::containsKey)
+                        .map(childSlotToColumnStats::get)
+                        .filter(s -> !s.isUnKnown)
+                        .collect(Collectors.toList());
+                if (groupByKeyStats.isEmpty()) {
+                    //all column stats are unknown, use default ratio
+                    resultSetCount = inputRowCount * DEFAULT_AGGREGATE_RATIO;
+                } else {
+                    resultSetCount = groupByKeyStats.stream()
+                            .map(s -> s.ndv)
+                            .reduce(1.0, (a, b) -> a * b);
+                    //agg output tuples should be less than input tuples
+                    resultSetCount = Math.min(resultSetCount, inputRowCount);
+                }
+            }
         }
         resultSetCount = Math.min(resultSetCount, childStats.getRowCount());
         Map<Expression, ColumnStatistic> slotToColumnStats = Maps.newHashMap();
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalHashAggregate.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalHashAggregate.java
index 3da6bf2d9d..1d8cdbf71f 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalHashAggregate.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalHashAggregate.java
@@ -257,7 +257,7 @@ public class PhysicalHashAggregate<CHILD_TYPE extends Plan> 
extends PhysicalUnar
     @Override
     public PhysicalHashAggregate<CHILD_TYPE> 
withAggOutput(List<NamedExpression> newOutput) {
         return new PhysicalHashAggregate<>(groupByExpressions, newOutput, 
partitionExpressions,
-                aggregateParam, maybeUsingStream, groupExpression, 
getLogicalProperties(),
+                aggregateParam, maybeUsingStream, Optional.empty(), 
getLogicalProperties(),
                 requireProperties, physicalProperties, statistics, child());
     }
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalNestedLoopJoin.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalNestedLoopJoin.java
index c6e263c99a..29e92b5038 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalNestedLoopJoin.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalNestedLoopJoin.java
@@ -119,7 +119,8 @@ public class PhysicalNestedLoopJoin<
                 "type", joinType,
                 "otherJoinCondition", otherJoinConjuncts,
                 "isMarkJoin", markJoinSlotReference.isPresent(),
-                "markJoinSlotReference", markJoinSlotReference.isPresent() ? 
markJoinSlotReference.get() : "empty"
+                "markJoinSlotReference", markJoinSlotReference.isPresent() ? 
markJoinSlotReference.get() : "empty",
+                "stats", statistics
         );
     }
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java
index 048f342d89..b9cf6040e8 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java
@@ -20,12 +20,12 @@ package org.apache.doris.statistics;
 import org.apache.doris.nereids.stats.StatsMathUtil;
 import org.apache.doris.nereids.trees.expressions.Expression;
 
+import java.text.DecimalFormat;
 import java.util.HashMap;
 import java.util.Map;
 import java.util.Map.Entry;
 
 public class Statistics {
-
     private final double rowCount;
 
     private final Map<Expression, ColumnStatistic> expressionToColumnStats;
@@ -38,6 +38,19 @@ public class Statistics {
     @Deprecated
     private double penalty;
 
+    /**
+     * after filter, compute the new ndv of a column
+     * @param ndv original ndv of column
+     * @param newRowCount the row count of table after filter
+     * @param oldRowCount the row count of table before filter
+     * @return the new ndv after filter
+     */
+    public static double computeNdv(double ndv, double newRowCount, double 
oldRowCount) {
+        double selectOneTuple = newRowCount / 
StatsMathUtil.nonZeroDivisor(oldRowCount);
+        double allTuplesOfSameDistinctValueNotSelected = Math.pow((1 - 
selectOneTuple), oldRowCount / ndv);
+        return Math.min(ndv * (1 - allTuplesOfSameDistinctValueNotSelected), 
newRowCount);
+    }
+
     public Statistics(Statistics another) {
         this.rowCount = another.rowCount;
         this.expressionToColumnStats = new 
HashMap<>(another.expressionToColumnStats);
@@ -72,17 +85,18 @@ public class Statistics {
 
     public Statistics withRowCount(double rowCount) {
         Statistics statistics = new Statistics(rowCount, new 
HashMap<>(expressionToColumnStats), width, penalty);
-        statistics.fix(rowCount / StatsMathUtil.nonZeroDivisor(this.rowCount));
+        statistics.fix(rowCount, StatsMathUtil.nonZeroDivisor(this.rowCount));
         return statistics;
     }
 
-    public void fix(double sel) {
+    public void fix(double newRowCount, double originRowCount) {
+        double sel = newRowCount / originRowCount;
         for (Entry<Expression, ColumnStatistic> entry : 
expressionToColumnStats.entrySet()) {
             ColumnStatistic columnStatistic = entry.getValue();
             ColumnStatisticBuilder columnStatisticBuilder = new 
ColumnStatisticBuilder(columnStatistic);
-            
columnStatisticBuilder.setNdv(Math.min(Math.ceil(columnStatistic.ndv * sel), 
rowCount));
-            
columnStatisticBuilder.setNumNulls(Math.min(Math.ceil(columnStatistic.numNulls 
* sel), rowCount));
-            
columnStatisticBuilder.setCount(Math.min(Math.ceil(columnStatistic.count * 
sel), rowCount));
+            columnStatisticBuilder.setNdv(computeNdv(columnStatistic.ndv, 
newRowCount, originRowCount));
+            
columnStatisticBuilder.setNumNulls(Math.min(columnStatistic.numNulls * sel, 
rowCount));
+            columnStatisticBuilder.setCount(newRowCount);
             expressionToColumnStats.put(entry.getKey(), 
columnStatisticBuilder.build());
         }
     }
@@ -105,7 +119,7 @@ public class Statistics {
     public double computeSize() {
         if (computeSize <= 0) {
             computeSize = Math.max(1, expressionToColumnStats.values().stream()
-                    .map(s -> s.dataSize).reduce(0D, Double::sum)
+                    .map(s -> s.avgSizeByte).reduce(0D, Double::sum)
             ) * rowCount;
         }
         return computeSize;
@@ -113,7 +127,8 @@ public class Statistics {
 
     @Override
     public String toString() {
-        return String.format("rows=%.4f", rowCount);
+        DecimalFormat format = new DecimalFormat("#,###.##");
+        return format.format(rowCount);
     }
 
     public void setWidth(double width) {
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/memo/RankTest.java 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/memo/RankTest.java
index 70fe91afc9..977ba7beb2 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/nereids/memo/RankTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/memo/RankTest.java
@@ -55,12 +55,13 @@ public class RankTest extends TPCHTestBase {
                     .optimize()
                     .getCascadesContext()
                     .getMemo();
-            PhysicalPlan plan1 = memo.unrank(memo.rank(1).first);
             PhysicalPlan plan2 = PlanChecker.from(connectContext)
                     .analyze(field.get(null).toString())
                     .rewrite()
                     .optimize()
                     .getBestPlanTree(PhysicalProperties.GATHER);
+            PhysicalPlan plan1 = memo.unrank(memo.rank(1).first);
+
             Assertions.assertTrue(PlanChecker.isPlanEqualWithoutID(plan1, 
plan2));
         }
     }
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java
 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java
index 3992e2de9a..691cf53720 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java
@@ -197,8 +197,7 @@ class FilterEstimationTest {
         Statistics stat = new Statistics(1000, slotToColumnStat);
         FilterEstimation filterEstimation = new FilterEstimation();
         Statistics expected = filterEstimation.estimate(or, stat);
-        Assertions.assertTrue(
-                Precision.equals(50, expected.getRowCount(), 0.01));
+        Assertions.assertEquals(51, expected.getRowCount(), 0.1);
     }
 
     // a > 500 and b < 100 or a > c
@@ -450,9 +449,9 @@ class FilterEstimationTest {
         Assertions.assertEquals(1000 * 7.0 / 10.0, estimated.getRowCount());
     }
 
-    //c>100
+    // c>100
     // a is primary-key, a.ndv is reduced
-    // b is normal, b.ndv is not changed
+    // b is normal, b.ndv is smaller: newNdv = ndv * (1 - Math.pow(1 - 
selectivity, rowCount / ndv));
     // c.selectivity is still 1, but its range becomes half
     @Test
     public void test12() {
@@ -466,8 +465,8 @@ class FilterEstimationTest {
                 .setNdv(1000)
                 .setAvgSizeByte(4)
                 .setNumNulls(0)
-                .setMinValue(10000)
-                .setMaxValue(1000)
+                .setMinValue(1000)
+                .setMaxValue(10000)
                 .setSelectivity(1.0);
         ColumnStatisticBuilder builderB = new ColumnStatisticBuilder()
                 .setNdv(100)
@@ -492,7 +491,7 @@ class FilterEstimationTest {
         ColumnStatistic statsA = estimated.findColumnStatistics(a);
         Assertions.assertEquals(500, statsA.ndv);
         ColumnStatistic statsB = estimated.findColumnStatistics(b);
-        Assertions.assertEquals(50, statsB.ndv);
+        Assertions.assertEquals(100, statsB.ndv, 0.1);
         ColumnStatistic statsC = estimated.findColumnStatistics(c);
         Assertions.assertEquals(50, statsC.ndv);
         Assertions.assertEquals(100, statsC.minValue);
@@ -502,9 +501,10 @@ class FilterEstimationTest {
     /**
      * test filter estimation, like 20>c>10, c in (0,40)
      * filter range has intersection with (c.min, c.max)
-     *     a primary key, a.ndv reduced by 1/4, a.selectivity=0.25
-     *     b normal field, b.ndv not changed, b.selectivity=1.0
-     *     c.ndv = 10/40 * c.ndv, c.selectivity=1
+     *      rows = 100
+     *     a primary key, a.ndv reduced by 1/4
+     *     b normal field, b.ndv=20 =>
+     *     c.ndv = 10/40 * c.ndv
      */
     @Test
     public void testFilterInsideMinMax() {
@@ -547,13 +547,13 @@ class FilterEstimationTest {
         Statistics estimated = filterEstimation.estimate(and, stat);
         Assertions.assertEquals(25, estimated.getRowCount());
         ColumnStatistic statsA = estimated.findColumnStatistics(a);
-        Assertions.assertEquals(25, statsA.ndv);
+        Assertions.assertEquals(25, statsA.ndv, 0.1);
         //Assertions.assertEquals(0.25, statsA.selectivity);
         Assertions.assertEquals(0, statsA.minValue);
         Assertions.assertEquals(100, statsA.maxValue);
 
         ColumnStatistic statsB = estimated.findColumnStatistics(b);
-        Assertions.assertEquals(5, statsB.ndv);
+        Assertions.assertEquals(15.6, statsB.ndv, 0.1);
         Assertions.assertEquals(0, statsB.minValue);
         Assertions.assertEquals(500, statsB.maxValue);
         Assertions.assertEquals(1.0, statsB.selectivity);
@@ -686,10 +686,10 @@ class FilterEstimationTest {
         ColumnStatistic statsA = estimated.findColumnStatistics(a);
         ColumnStatistic statsB = estimated.findColumnStatistics(b);
         ColumnStatistic statsC = estimated.findColumnStatistics(c);
-        Assertions.assertEquals(5, statsA.ndv);
+        Assertions.assertEquals(5, statsA.ndv, 0.1);
         Assertions.assertEquals(0, statsA.minValue);
         Assertions.assertEquals(100, statsA.maxValue);
-        Assertions.assertEquals(1, statsB.ndv);
+        Assertions.assertEquals(4.5, statsB.ndv, 0.1);
         Assertions.assertEquals(0, statsB.minValue);
         Assertions.assertEquals(500, statsB.maxValue);
         Assertions.assertEquals(2, statsC.ndv);
@@ -763,10 +763,10 @@ class FilterEstimationTest {
         System.out.println(statsA);
         System.out.println(statsB);
         System.out.println(statsC);
-        Assertions.assertEquals(5, statsA.ndv);
+        Assertions.assertEquals(5, statsA.ndv, 0.1);
         Assertions.assertEquals(0, statsA.minValue);
         Assertions.assertEquals(100, statsA.maxValue);
-        Assertions.assertEquals(1, statsB.ndv);
+        Assertions.assertEquals(4.5, statsB.ndv, 0.1);
         Assertions.assertEquals(0, statsB.minValue);
         Assertions.assertEquals(500, statsB.maxValue);
         Assertions.assertEquals(2, statsC.ndv);
@@ -832,13 +832,10 @@ class FilterEstimationTest {
         ColumnStatistic statsA = estimated.findColumnStatistics(a);
         ColumnStatistic statsB = estimated.findColumnStatistics(b);
         ColumnStatistic statsC = estimated.findColumnStatistics(c);
-        System.out.println(statsA);
-        System.out.println(statsB);
-        System.out.println(statsC);
         Assertions.assertEquals(75, statsA.ndv);
         Assertions.assertEquals(0, statsA.minValue);
         Assertions.assertEquals(100, statsA.maxValue);
-        Assertions.assertEquals(15, statsB.ndv);
+        Assertions.assertEquals(19.9, statsB.ndv, 0.1);
         Assertions.assertEquals(0, statsB.minValue);
         Assertions.assertEquals(500, statsB.maxValue);
         Assertions.assertEquals(30, statsC.ndv);
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/StatsCalculatorTest.java
 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/StatsCalculatorTest.java
index ddccd7eddc..14501cdd91 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/StatsCalculatorTest.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/StatsCalculatorTest.java
@@ -145,14 +145,14 @@ public class StatsCalculatorTest {
         Group ownerGroup = newGroup();
         groupExpression.setOwnerGroup(ownerGroup);
         StatsCalculator.estimate(groupExpression);
-        Assertions.assertEquals((long) 500, 
ownerGroup.getStatistics().getRowCount(), 0.001);
+        Assertions.assertEquals((10000 * 0.1 * 0.05), 
ownerGroup.getStatistics().getRowCount(), 0.001);
 
         LogicalFilter<GroupPlan> logicalFilterOr = new LogicalFilter<>(or, 
groupPlan);
         GroupExpression groupExpressionOr = new 
GroupExpression(logicalFilterOr, ImmutableList.of(childGroup));
         Group ownerGroupOr = newGroup();
         groupExpressionOr.setOwnerGroup(ownerGroupOr);
         StatsCalculator.estimate(groupExpressionOr);
-        Assertions.assertEquals((long) 1000,
+        Assertions.assertEquals((long) (10000 * (0.1 + 0.05 - 0.1 * 0.05)),
                 ownerGroupOr.getStatistics().getRowCount(), 0.001);
     }
 
@@ -292,8 +292,8 @@ public class StatsCalculatorTest {
         Statistics limitStats = ownerGroup.getStatistics();
         Assertions.assertEquals(1, limitStats.getRowCount());
         ColumnStatistic slot1Stats = limitStats.columnStatistics().get(slot1);
-        Assertions.assertEquals(1, slot1Stats.ndv);
-        Assertions.assertEquals(1, slot1Stats.numNulls);
+        Assertions.assertEquals(1, slot1Stats.ndv, 0.1);
+        Assertions.assertEquals(0.5, slot1Stats.numNulls);
     }
 
     @Test
@@ -322,7 +322,7 @@ public class StatsCalculatorTest {
         Statistics topNStats = ownerGroup.getStatistics();
         Assertions.assertEquals(1, topNStats.getRowCount());
         ColumnStatistic slot1Stats = topNStats.columnStatistics().get(slot1);
-        Assertions.assertEquals(1, slot1Stats.ndv);
-        Assertions.assertEquals(1, slot1Stats.numNulls);
+        Assertions.assertEquals(1, slot1Stats.ndv, 0.1);
+        Assertions.assertEquals(0.5, slot1Stats.numNulls);
     }
 }
diff --git a/regression-test/suites/nereids_syntax_p0/join.groovy 
b/regression-test/suites/nereids_syntax_p0/join.groovy
index 982c4d8316..9bda506a3d 100644
--- a/regression-test/suites/nereids_syntax_p0/join.groovy
+++ b/regression-test/suites/nereids_syntax_p0/join.groovy
@@ -204,33 +204,6 @@ suite("join") {
         insert into outerjoin_D values( 1 );
     """
 
-    def explainStr =
-        sql(""" explain SELECT count(1)
-                FROM 
-                    (SELECT sub1.wtid,
-                        count(*)
-                    FROM 
-                        (SELECT a.wtid ,
-                        a.wfid
-                        FROM test_table_b a ) sub1
-                        INNER JOIN [shuffle] 
-                            (SELECT a.wtid,
-                        a.wfid
-                            FROM test_table_a a ) sub2
-                                ON sub1.wtid = sub2.wtid
-                                    AND sub1.wfid = sub2.wfid
-                            GROUP BY  sub1.wtid ) qqqq;""").toString()
-    logger.info(explainStr)
-    assertTrue(
-        //if analyze finished
-            explainStr.contains("VAGGREGATE (update serialize)") && 
explainStr.contains("VAGGREGATE (merge finalize)")
-                    && explainStr.contains("wtid[#8] = wtid[#3]") && 
explainStr.contains("projections: wtid[#5], wfid[#6]")
-                    ||
-        //analyze not finished
-                    explainStr.contains("VAGGREGATE (update finalize)") && 
explainStr.contains("VAGGREGATE (update finalize)")
-                    && explainStr.contains("VEXCHANGE") && 
explainStr.contains("VHASH JOIN")
-    )
-
     test {
         sql"""select * from test_table_a a cross join test_table_b b on a.wtid 
> b.wtid"""
         check{result, exception, startTime, endTime ->


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to