This is an automated email from the ASF dual-hosted git repository. englefly pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 6f6054d37eb [fix](nereids)filter estimation for slot=unknown (#39592) 6f6054d37eb is described below commit 6f6054d37eb7f7d3123b69c96761e6610325a215 Author: minghong <engle...@gmail.com> AuthorDate: Mon Aug 26 11:39:45 2024 +0800 [fix](nereids)filter estimation for slot=unknown (#39592) ## Proposed changes detect new pattern: slot=unknown suppose slot.ndv = 5, slot.row=100 expect filter result row is 20, but in master, the result is 0 Issue Number: close #xxx <!--Describe your changes.--> --- .../doris/nereids/stats/FilterEstimation.java | 26 ++++++-- .../doris/nereids/stats/FilterEstimationTest.java | 75 ++++++++++++++++++++++ 2 files changed, 95 insertions(+), 6 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java index d44e6198170..5569580d29b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java @@ -331,14 +331,28 @@ public class FilterEstimation extends ExpressionVisitor<Statistics, EstimationCo ColumnStatistic statsForRight, EstimationContext context) { double selectivity; - double ndv = statsForLeft.ndv; - double val = statsForRight.maxValue; - if (val > statsForLeft.maxValue || val < statsForLeft.minValue) { - selectivity = 0.0; + if (statsForLeft.isUnKnown) { + selectivity = DEFAULT_INEQUALITY_COEFFICIENT; } else { - selectivity = StatsMathUtil.minNonNaN(1.0, 1.0 / ndv); + double ndv = statsForLeft.ndv; + if (statsForRight.isUnKnown) { + if (ndv >= 1.0) { + selectivity = 1.0 / ndv; + } else { + selectivity = DEFAULT_INEQUALITY_COEFFICIENT; + } + } else { + double val = statsForRight.maxValue; + if (val > statsForLeft.maxValue || val < statsForLeft.minValue) { + selectivity = 0.0; + } else if (ndv >= 1.0) { + selectivity = StatsMathUtil.minNonNaN(1.0, 1.0 / ndv); + } else { + selectivity = DEFAULT_INEQUALITY_COEFFICIENT; + } + selectivity = getNotNullSelectivity(statsForLeft, selectivity); + } } - selectivity = getNotNullSelectivity(statsForLeft, selectivity); Statistics equalStats = context.statistics.withSel(selectivity); Expression left = cp.left(); equalStats.addColumnStats(left, statsForRight); diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java index dd5a38a4a62..d7c44e082cf 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java @@ -33,6 +33,7 @@ import org.apache.doris.nereids.trees.expressions.Not; import org.apache.doris.nereids.trees.expressions.NullSafeEqual; import org.apache.doris.nereids.trees.expressions.Or; import org.apache.doris.nereids.trees.expressions.SlotReference; +import org.apache.doris.nereids.trees.expressions.functions.scalar.Left; import org.apache.doris.nereids.trees.expressions.literal.DateLiteral; import org.apache.doris.nereids.trees.expressions.literal.DoubleLiteral; import org.apache.doris.nereids.trees.expressions.literal.IntegerLiteral; @@ -177,6 +178,80 @@ class FilterEstimationTest { Assertions.assertEquals(10, aStatsEst.ndv); } + @Test + public void knownEqualToUnknown() { + SlotReference ym = new SlotReference("a", new VarcharType(7)); + double rowCount = 404962.0; + double ndv = 14.0; + ColumnStatistic ymStats = new ColumnStatisticBuilder() + .setCount(rowCount) + .setNdv(ndv) + .setMinExpr(new StringLiteral("2023-07")) + .setMinValue(14126741000630328.000000) + .setMaxExpr(new StringLiteral("2024-08")) + .setMaxValue(14126741017407544.000000) + .setAvgSizeByte(7) + .build(); + Statistics stats = new StatisticsBuilder() + .setRowCount(404962).putColumnStatistics(ym, ymStats) + .build(); + + EqualTo predicate = new EqualTo(ym, + new Left(new org.apache.doris.nereids.trees.expressions.literal.StringLiteral("2024-08-14"), + new IntegerLiteral(7)) + ); + FilterEstimation filterEstimation = new FilterEstimation(); + Statistics outStats = filterEstimation.estimate(predicate, stats); + Assertions.assertEquals(rowCount / ndv, outStats.getRowCount()); + } + + @Test + public void knownEqualToUnknownWithLittleNdv() { + SlotReference ym = new SlotReference("a", new VarcharType(7)); + double rowCount = 404962.0; + double ndv = 0.5; + ColumnStatistic ymStats = new ColumnStatisticBuilder() + .setCount(rowCount) + .setNdv(ndv) + .setMinExpr(new StringLiteral("2023-07")) + .setMinValue(14126741000630328.000000) + .setMaxExpr(new StringLiteral("2024-08")) + .setMaxValue(14126741017407544.000000) + .setAvgSizeByte(7) + .build(); + Statistics stats = new StatisticsBuilder() + .setRowCount(404962).putColumnStatistics(ym, ymStats) + .build(); + + EqualTo predicate = new EqualTo(ym, + new Left(new org.apache.doris.nereids.trees.expressions.literal.StringLiteral("2024-08-14"), + new IntegerLiteral(7)) + ); + FilterEstimation filterEstimation = new FilterEstimation(); + Statistics outStats = filterEstimation.estimate(predicate, stats); + Assertions.assertEquals(rowCount * FilterEstimation.DEFAULT_INEQUALITY_COEFFICIENT, + outStats.getRowCount()); + } + + @Test + public void unknownEqualToUnknown() { + SlotReference ym = new SlotReference("a", new VarcharType(7)); + ColumnStatistic ymStats = ColumnStatistic.UNKNOWN; + double rowCount = 404962.0; + Statistics stats = new StatisticsBuilder() + .setRowCount(rowCount).putColumnStatistics(ym, ymStats) + .build(); + + EqualTo predicate = new EqualTo(ym, + new Left(new org.apache.doris.nereids.trees.expressions.literal.StringLiteral("2024-08-14"), + new IntegerLiteral(7)) + ); + FilterEstimation filterEstimation = new FilterEstimation(); + Statistics outStats = filterEstimation.estimate(predicate, stats); + Assertions.assertEquals(rowCount * FilterEstimation.DEFAULT_INEQUALITY_COEFFICIENT, + outStats.getRowCount()); + } + // a > 500 and b < 100 or a = c @Test public void test1() { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org