This is an automated email from the ASF dual-hosted git repository.

englefly pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 6f6054d37eb [fix](nereids)filter estimation for slot=unknown (#39592)
6f6054d37eb is described below

commit 6f6054d37eb7f7d3123b69c96761e6610325a215
Author: minghong <engle...@gmail.com>
AuthorDate: Mon Aug 26 11:39:45 2024 +0800

    [fix](nereids)filter estimation for slot=unknown (#39592)
    
    ## Proposed changes
    detect new pattern: slot=unknown
    suppose slot.ndv = 5, slot.row=100
    expect filter result row is 20, but in master, the result is 0
    
    Issue Number: close #xxx
    
    <!--Describe your changes.-->
---
 .../doris/nereids/stats/FilterEstimation.java      | 26 ++++++--
 .../doris/nereids/stats/FilterEstimationTest.java  | 75 ++++++++++++++++++++++
 2 files changed, 95 insertions(+), 6 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
index d44e6198170..5569580d29b 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
@@ -331,14 +331,28 @@ public class FilterEstimation extends 
ExpressionVisitor<Statistics, EstimationCo
             ColumnStatistic statsForRight,
             EstimationContext context) {
         double selectivity;
-        double ndv = statsForLeft.ndv;
-        double val = statsForRight.maxValue;
-        if (val > statsForLeft.maxValue || val < statsForLeft.minValue) {
-            selectivity = 0.0;
+        if (statsForLeft.isUnKnown) {
+            selectivity = DEFAULT_INEQUALITY_COEFFICIENT;
         } else {
-            selectivity = StatsMathUtil.minNonNaN(1.0, 1.0 / ndv);
+            double ndv = statsForLeft.ndv;
+            if (statsForRight.isUnKnown) {
+                if (ndv >= 1.0) {
+                    selectivity = 1.0 / ndv;
+                } else {
+                    selectivity = DEFAULT_INEQUALITY_COEFFICIENT;
+                }
+            } else {
+                double val = statsForRight.maxValue;
+                if (val > statsForLeft.maxValue || val < 
statsForLeft.minValue) {
+                    selectivity = 0.0;
+                } else if (ndv >= 1.0) {
+                    selectivity = StatsMathUtil.minNonNaN(1.0, 1.0 / ndv);
+                } else {
+                    selectivity = DEFAULT_INEQUALITY_COEFFICIENT;
+                }
+                selectivity = getNotNullSelectivity(statsForLeft, selectivity);
+            }
         }
-        selectivity = getNotNullSelectivity(statsForLeft, selectivity);
         Statistics equalStats = context.statistics.withSel(selectivity);
         Expression left = cp.left();
         equalStats.addColumnStats(left, statsForRight);
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java
 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java
index dd5a38a4a62..d7c44e082cf 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java
@@ -33,6 +33,7 @@ import org.apache.doris.nereids.trees.expressions.Not;
 import org.apache.doris.nereids.trees.expressions.NullSafeEqual;
 import org.apache.doris.nereids.trees.expressions.Or;
 import org.apache.doris.nereids.trees.expressions.SlotReference;
+import org.apache.doris.nereids.trees.expressions.functions.scalar.Left;
 import org.apache.doris.nereids.trees.expressions.literal.DateLiteral;
 import org.apache.doris.nereids.trees.expressions.literal.DoubleLiteral;
 import org.apache.doris.nereids.trees.expressions.literal.IntegerLiteral;
@@ -177,6 +178,80 @@ class FilterEstimationTest {
         Assertions.assertEquals(10, aStatsEst.ndv);
     }
 
+    @Test
+    public void knownEqualToUnknown() {
+        SlotReference ym = new SlotReference("a", new VarcharType(7));
+        double rowCount = 404962.0;
+        double ndv = 14.0;
+        ColumnStatistic ymStats = new ColumnStatisticBuilder()
+                .setCount(rowCount)
+                .setNdv(ndv)
+                .setMinExpr(new StringLiteral("2023-07"))
+                .setMinValue(14126741000630328.000000)
+                .setMaxExpr(new StringLiteral("2024-08"))
+                .setMaxValue(14126741017407544.000000)
+                .setAvgSizeByte(7)
+                .build();
+        Statistics stats = new StatisticsBuilder()
+                .setRowCount(404962).putColumnStatistics(ym, ymStats)
+                .build();
+
+        EqualTo predicate = new EqualTo(ym,
+                new Left(new 
org.apache.doris.nereids.trees.expressions.literal.StringLiteral("2024-08-14"),
+                        new IntegerLiteral(7))
+        );
+        FilterEstimation filterEstimation = new FilterEstimation();
+        Statistics outStats = filterEstimation.estimate(predicate, stats);
+        Assertions.assertEquals(rowCount / ndv, outStats.getRowCount());
+    }
+
+    @Test
+    public void knownEqualToUnknownWithLittleNdv() {
+        SlotReference ym = new SlotReference("a", new VarcharType(7));
+        double rowCount = 404962.0;
+        double ndv = 0.5;
+        ColumnStatistic ymStats = new ColumnStatisticBuilder()
+                .setCount(rowCount)
+                .setNdv(ndv)
+                .setMinExpr(new StringLiteral("2023-07"))
+                .setMinValue(14126741000630328.000000)
+                .setMaxExpr(new StringLiteral("2024-08"))
+                .setMaxValue(14126741017407544.000000)
+                .setAvgSizeByte(7)
+                .build();
+        Statistics stats = new StatisticsBuilder()
+                .setRowCount(404962).putColumnStatistics(ym, ymStats)
+                .build();
+
+        EqualTo predicate = new EqualTo(ym,
+                new Left(new 
org.apache.doris.nereids.trees.expressions.literal.StringLiteral("2024-08-14"),
+                        new IntegerLiteral(7))
+        );
+        FilterEstimation filterEstimation = new FilterEstimation();
+        Statistics outStats = filterEstimation.estimate(predicate, stats);
+        Assertions.assertEquals(rowCount * 
FilterEstimation.DEFAULT_INEQUALITY_COEFFICIENT,
+                outStats.getRowCount());
+    }
+
+    @Test
+    public void unknownEqualToUnknown() {
+        SlotReference ym = new SlotReference("a", new VarcharType(7));
+        ColumnStatistic ymStats = ColumnStatistic.UNKNOWN;
+        double rowCount = 404962.0;
+        Statistics stats = new StatisticsBuilder()
+                .setRowCount(rowCount).putColumnStatistics(ym, ymStats)
+                .build();
+
+        EqualTo predicate = new EqualTo(ym,
+                new Left(new 
org.apache.doris.nereids.trees.expressions.literal.StringLiteral("2024-08-14"),
+                        new IntegerLiteral(7))
+        );
+        FilterEstimation filterEstimation = new FilterEstimation();
+        Statistics outStats = filterEstimation.estimate(predicate, stats);
+        Assertions.assertEquals(rowCount * 
FilterEstimation.DEFAULT_INEQUALITY_COEFFICIENT,
+                outStats.getRowCount());
+    }
+
     // a > 500 and b < 100 or a = c
     @Test
     public void test1() {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to