This is an automated email from the ASF dual-hosted git repository. englefly pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 8debc96d74 [enhancement](nereids) update FilterEstimation and Agg in stats derive (#17790) 8debc96d74 is described below commit 8debc96d74b2a5c2c4d24d47f8df47554096333d Author: minghong <engle...@gmail.com> AuthorDate: Fri Mar 17 18:01:50 2023 +0800 [enhancement](nereids) update FilterEstimation and Agg in stats derive (#17790) * 1. update ndv in Stats, 2. skip __DORIS_DELETE_SIGN__=0 in stats derive, 3. equalTo in stats derive 4. update agg stats derive, support the case: all column_stats are unknown * computeSize * fix ut --- .../apache/doris/nereids/memo/GroupExpression.java | 16 ++--- .../doris/nereids/stats/FilterEstimation.java | 69 +++++++++++++--------- .../apache/doris/nereids/stats/JoinEstimation.java | 6 ++ .../doris/nereids/stats/StatsCalculator.java | 35 ++++++++--- .../plans/physical/PhysicalHashAggregate.java | 2 +- .../plans/physical/PhysicalNestedLoopJoin.java | 3 +- .../org/apache/doris/statistics/Statistics.java | 31 +++++++--- .../org/apache/doris/nereids/memo/RankTest.java | 3 +- .../doris/nereids/stats/FilterEstimationTest.java | 37 ++++++------ .../doris/nereids/stats/StatsCalculatorTest.java | 12 ++-- .../suites/nereids_syntax_p0/join.groovy | 27 --------- 11 files changed, 132 insertions(+), 109 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/GroupExpression.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/GroupExpression.java index 5b4774284a..12e9816f8e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/GroupExpression.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/GroupExpression.java @@ -57,7 +57,7 @@ public class GroupExpression { private final BitSet ruleMasks; private boolean statDerived; - private long estOutputRowCount = -1; + private double estOutputRowCount = -1; //Record the rule that generate this plan. It's used for debugging private Rule fromRule; @@ -301,16 +301,13 @@ public class GroupExpression { return new Statistics(child(idx).getStatistics()); } - public void setEstOutputRowCount(long estOutputRowCount) { + public void setEstOutputRowCount(double estOutputRowCount) { this.estOutputRowCount = estOutputRowCount; } - public long getEstOutputRowCount() { - return estOutputRowCount; - } - @Override public String toString() { + DecimalFormat format = new DecimalFormat("#,###.##"); StringBuilder builder = new StringBuilder("id:"); builder.append(id.asInt()); if (ownerGroup == null) { @@ -318,11 +315,8 @@ public class GroupExpression { } else { builder.append("#").append(ownerGroup.getGroupId().asInt()); } - - DecimalFormat decimalFormat = new DecimalFormat(); - decimalFormat.setGroupingSize(3); - builder.append(" cost=").append(decimalFormat.format((long) cost)); - builder.append(" estRows=").append(estOutputRowCount); + builder.append(" cost=").append(format.format((long) cost)); + builder.append(" estRows=").append(format.format(estOutputRowCount)); builder.append(" (plan=").append(plan.toString()).append(") children=["); for (Group group : children) { builder.append(group.getGroupId()).append(" "); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java index 2aa55114d8..e2159f1040 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java @@ -31,10 +31,10 @@ import org.apache.doris.nereids.trees.expressions.LessThanEqual; import org.apache.doris.nereids.trees.expressions.Not; import org.apache.doris.nereids.trees.expressions.NullSafeEqual; import org.apache.doris.nereids.trees.expressions.Or; +import org.apache.doris.nereids.trees.expressions.Slot; import org.apache.doris.nereids.trees.expressions.SlotReference; import org.apache.doris.nereids.trees.expressions.literal.Literal; import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; -import org.apache.doris.nereids.types.coercion.NumericType; import org.apache.doris.statistics.Bucket; import org.apache.doris.statistics.ColumnStatistic; import org.apache.doris.statistics.ColumnStatisticBuilder; @@ -48,6 +48,7 @@ import com.google.common.base.Preconditions; import java.util.ArrayList; import java.util.List; import java.util.Map; +import java.util.Set; /** * Calculate selectivity of expression that produces boolean value. @@ -85,10 +86,10 @@ public class FilterEstimation extends ExpressionVisitor<Statistics, EstimationCo Statistics rightStats = rightExpr.accept(this, context); double rowCount = leftStats.getRowCount() + rightStats.getRowCount() - andStats.getRowCount(); Statistics orStats = context.statistics.withRowCount(rowCount); - for (Map.Entry<Expression, ColumnStatistic> entry : leftStats.columnStatistics().entrySet()) { - ColumnStatistic leftColStats = entry.getValue(); + for (Map.Entry<Expression, ColumnStatistic> entry : orStats.columnStatistics().entrySet()) { + ColumnStatistic leftColStats = leftStats.findColumnStatistics(entry.getKey()); ColumnStatistic rightColStats = rightStats.findColumnStatistics(entry.getKey()); - ColumnStatisticBuilder estimatedColStatsBuilder = new ColumnStatisticBuilder(leftColStats); + ColumnStatisticBuilder estimatedColStatsBuilder = new ColumnStatisticBuilder(entry.getValue()); if (leftColStats.minValue <= rightColStats.minValue) { estimatedColStatsBuilder.setMinValue(leftColStats.minValue); estimatedColStatsBuilder.setMinExpr(leftColStats.minExpr); @@ -113,7 +114,17 @@ public class FilterEstimation extends ExpressionVisitor<Statistics, EstimationCo @Override public Statistics visitComparisonPredicate(ComparisonPredicate cp, EstimationContext context) { Expression left = cp.left(); + if (left instanceof SlotReference && ((SlotReference) left).getColumn().isPresent()) { + if ("__DORIS_DELETE_SIGN__".equals(((SlotReference) left).getColumn().get().getName())) { + return context.statistics; + } + } Expression right = cp.right(); + if (right instanceof SlotReference && ((SlotReference) right).getColumn().isPresent()) { + if ("__DORIS_DELETE_SIGN__".equals(((SlotReference) right).getColumn().get().getName())) { + return context.statistics; + } + } ColumnStatistic statsForLeft = ExpressionEstimation.estimate(left, context.statistics); ColumnStatistic statsForRight = ExpressionEstimation.estimate(right, context.statistics); if (!(left instanceof Literal) && !(right instanceof Literal)) { @@ -152,10 +163,6 @@ public class FilterEstimation extends ExpressionVisitor<Statistics, EstimationCo if (statsForLeft == ColumnStatistic.UNKNOWN) { return context.statistics.withSel(DEFAULT_INEQUALITY_COEFFICIENT); } - Expression rightExpr = cp.child(1); - if (!(rightExpr.getDataType() instanceof NumericType)) { - return context.statistics.withSel(DEFAULT_INEQUALITY_COEFFICIENT); - } double selectivity; double ndv = statsForLeft.ndv; double val = statsForRight.maxValue; @@ -175,7 +182,33 @@ public class FilterEstimation extends ExpressionVisitor<Statistics, EstimationCo if (statsForLeft.histogram != null) { return estimateEqualToWithHistogram(cp.left(), statsForLeft, val, context); } - return context.statistics.withSel(selectivity); + // cp.left : func(A), we assume func(A) has same statistics with A + // for example: cast(N_NAME as varchar(*)) = 'GERMANY', + // we assume cast(N_NAME as varchar(*)) and N_NAME have the same col stats + Set<Slot> leftSlots = cp.left().getInputSlots(); + Preconditions.checkArgument(leftSlots.size() <= 1, + "stats derive: equal condition only support at one column, but we meet " + + leftSlots.size() + ); + + Statistics equalStats = context.statistics.withSel(selectivity); + /* + leftSlots could be empty, for example: + select * from (select 'jj' as kk1, sum(k2) from ${tableName2} where k10 = '2015-04-02' group by kk1)tt + where kk1 in ('jj') + kk1 in ('jj') => kk1 = 'jj' => 'jj'='jj + TODO const fold could eliminate this equalTo. + */ + if (!leftSlots.isEmpty()) { + Slot leftSlot = leftSlots.iterator().next(); + //update min/max of cp.left + ColumnStatistic columnStats = equalStats.findColumnStatistics(leftSlot); + ColumnStatisticBuilder colStatsBuilder = new ColumnStatisticBuilder(columnStats); + colStatsBuilder.setMaxValue(val); + colStatsBuilder.setMinValue(val); + equalStats.addColumnStats(leftSlot, colStatsBuilder.build()); + } + return equalStats; } else { if (cp instanceof LessThan || cp instanceof LessThanEqual) { if (context.isNot) { @@ -238,7 +271,6 @@ public class FilterEstimation extends ExpressionVisitor<Statistics, EstimationCo A.selectivity = 7/10 */ double validInOptCount = 0; - double columnSelectivity = 1.0; double selectivity = 1.0; ColumnStatisticBuilder compareExprStatsBuilder = new ColumnStatisticBuilder(compareExprStats); if (isNotIn) { @@ -250,7 +282,6 @@ public class FilterEstimation extends ExpressionVisitor<Statistics, EstimationCo } } validInOptCount = Math.max(1, compareExprStats.ndv - validInOptCount); - columnSelectivity = compareExprStats.ndv == 0 ? 0 : Math.max(1, validInOptCount) / compareExprStats.ndv; } else { for (Expression option : options) { ColumnStatistic optionStats = ExpressionEstimation.estimate(option, context.statistics); @@ -263,29 +294,13 @@ public class FilterEstimation extends ExpressionVisitor<Statistics, EstimationCo } maxOption = Math.min(maxOption, compareExprStats.maxValue); minOption = Math.max(minOption, compareExprStats.minValue); - if (maxOption == minOption) { - columnSelectivity = 1.0; - } else { - double outputRange = maxOption - minOption; - double originRange = Math.max(1, compareExprStats.maxValue - compareExprStats.minValue); - double orginDensity = StatsMathUtil.minNonNaN(1, - compareExprStats.ndv / StatsMathUtil.nonZeroDivisor(originRange)); - double outputDensity = StatsMathUtil.minNonNaN(1, - validInOptCount / StatsMathUtil.nonZeroDivisor(outputRange)); - columnSelectivity = StatsMathUtil.minNonNaN(1, outputDensity - / StatsMathUtil.nonZeroDivisor(orginDensity)); - } compareExprStatsBuilder.setMaxValue(maxOption); compareExprStatsBuilder.setMinValue(minOption); } selectivity = StatsMathUtil.minNonNaN(1.0, validInOptCount / compareExprStats.ndv); - - compareExprStatsBuilder.setSelectivity(compareExprStats.selectivity * columnSelectivity); compareExprStatsBuilder.setNdv(validInOptCount); - Statistics estimated = new Statistics(context.statistics); - estimated = estimated.withSel(selectivity); if (compareExpr instanceof SlotReference) { estimated.addColumnStats(compareExpr, diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java index c1e30f1da7..e77c060bba 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java @@ -21,6 +21,7 @@ import org.apache.doris.common.Pair; import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.plans.JoinType; import org.apache.doris.nereids.trees.plans.algebra.Join; +import org.apache.doris.nereids.util.ExpressionUtils; import org.apache.doris.statistics.Statistics; import org.apache.doris.statistics.StatisticsBuilder; @@ -70,6 +71,11 @@ public class JoinEstimation { .build(); List<Expression> joinConditions = join.getHashJoinConjuncts(); Statistics innerJoinStats = estimateInnerJoin(crossJoinStats, joinConditions); + if (!join.getOtherJoinConjuncts().isEmpty()) { + FilterEstimation filterEstimation = new FilterEstimation(); + innerJoinStats = filterEstimation.estimate( + ExpressionUtils.and(join.getOtherJoinConjuncts()), innerJoinStats); + } innerJoinStats.setWidth(leftStats.getWidth() + rightStats.getWidth()); innerJoinStats.setPenalty(0); double rowCount; diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java index 5e196a4441..8f10d6f4ea 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java @@ -106,6 +106,7 @@ import java.util.stream.Collectors; * Used to calculate the stats for each plan */ public class StatsCalculator extends DefaultPlanVisitor<Statistics, Void> { + public static double DEFAULT_AGGREGATE_RATIO = 0.5; private final GroupExpression groupExpression; private StatsCalculator(GroupExpression groupExpression) { @@ -130,7 +131,7 @@ public class StatsCalculator extends DefaultPlanVisitor<Statistics, Void> { if (originStats == null || originStats.getRowCount() > stats.getRowCount()) { groupExpression.getOwnerGroup().setStatistics(stats); } - groupExpression.setEstOutputRowCount((long) stats.getRowCount()); + groupExpression.setEstOutputRowCount(stats.getRowCount()); groupExpression.setStatDerived(true); } @@ -436,12 +437,32 @@ public class StatsCalculator extends DefaultPlanVisitor<Statistics, Void> { // TODO: since we have no column stats here. just use a fix ratio to compute the row count. List<Expression> groupByExpressions = aggregate.getGroupByExpressions(); Statistics childStats = groupExpression.childStatistics(0); - Map<Expression, ColumnStatistic> childSlotToColumnStats = childStats.columnStatistics(); - double resultSetCount = groupByExpressions.stream().flatMap(expr -> expr.getInputSlots().stream()) - .filter(childSlotToColumnStats::containsKey).map(childSlotToColumnStats::get).map(s -> s.ndv) - .reduce(1d, (a, b) -> a * b); - if (resultSetCount <= 0) { - resultSetCount = 1L; + double resultSetCount = 1; + if (!groupByExpressions.isEmpty()) { + Map<Expression, ColumnStatistic> childSlotToColumnStats = childStats.columnStatistics(); + double inputRowCount = childStats.getRowCount(); + if (inputRowCount == 0) { + //on empty relation, Agg output 1 tuple + resultSetCount = 1; + } else { + List<ColumnStatistic> groupByKeyStats = groupByExpressions.stream() + .flatMap(expr -> expr.getInputSlots().stream()) + .map(Slot::getExprId) + .filter(childSlotToColumnStats::containsKey) + .map(childSlotToColumnStats::get) + .filter(s -> !s.isUnKnown) + .collect(Collectors.toList()); + if (groupByKeyStats.isEmpty()) { + //all column stats are unknown, use default ratio + resultSetCount = inputRowCount * DEFAULT_AGGREGATE_RATIO; + } else { + resultSetCount = groupByKeyStats.stream() + .map(s -> s.ndv) + .reduce(1.0, (a, b) -> a * b); + //agg output tuples should be less than input tuples + resultSetCount = Math.min(resultSetCount, inputRowCount); + } + } } resultSetCount = Math.min(resultSetCount, childStats.getRowCount()); Map<Expression, ColumnStatistic> slotToColumnStats = Maps.newHashMap(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalHashAggregate.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalHashAggregate.java index 3da6bf2d9d..1d8cdbf71f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalHashAggregate.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalHashAggregate.java @@ -257,7 +257,7 @@ public class PhysicalHashAggregate<CHILD_TYPE extends Plan> extends PhysicalUnar @Override public PhysicalHashAggregate<CHILD_TYPE> withAggOutput(List<NamedExpression> newOutput) { return new PhysicalHashAggregate<>(groupByExpressions, newOutput, partitionExpressions, - aggregateParam, maybeUsingStream, groupExpression, getLogicalProperties(), + aggregateParam, maybeUsingStream, Optional.empty(), getLogicalProperties(), requireProperties, physicalProperties, statistics, child()); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalNestedLoopJoin.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalNestedLoopJoin.java index c6e263c99a..29e92b5038 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalNestedLoopJoin.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalNestedLoopJoin.java @@ -119,7 +119,8 @@ public class PhysicalNestedLoopJoin< "type", joinType, "otherJoinCondition", otherJoinConjuncts, "isMarkJoin", markJoinSlotReference.isPresent(), - "markJoinSlotReference", markJoinSlotReference.isPresent() ? markJoinSlotReference.get() : "empty" + "markJoinSlotReference", markJoinSlotReference.isPresent() ? markJoinSlotReference.get() : "empty", + "stats", statistics ); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java index 048f342d89..b9cf6040e8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java @@ -20,12 +20,12 @@ package org.apache.doris.statistics; import org.apache.doris.nereids.stats.StatsMathUtil; import org.apache.doris.nereids.trees.expressions.Expression; +import java.text.DecimalFormat; import java.util.HashMap; import java.util.Map; import java.util.Map.Entry; public class Statistics { - private final double rowCount; private final Map<Expression, ColumnStatistic> expressionToColumnStats; @@ -38,6 +38,19 @@ public class Statistics { @Deprecated private double penalty; + /** + * after filter, compute the new ndv of a column + * @param ndv original ndv of column + * @param newRowCount the row count of table after filter + * @param oldRowCount the row count of table before filter + * @return the new ndv after filter + */ + public static double computeNdv(double ndv, double newRowCount, double oldRowCount) { + double selectOneTuple = newRowCount / StatsMathUtil.nonZeroDivisor(oldRowCount); + double allTuplesOfSameDistinctValueNotSelected = Math.pow((1 - selectOneTuple), oldRowCount / ndv); + return Math.min(ndv * (1 - allTuplesOfSameDistinctValueNotSelected), newRowCount); + } + public Statistics(Statistics another) { this.rowCount = another.rowCount; this.expressionToColumnStats = new HashMap<>(another.expressionToColumnStats); @@ -72,17 +85,18 @@ public class Statistics { public Statistics withRowCount(double rowCount) { Statistics statistics = new Statistics(rowCount, new HashMap<>(expressionToColumnStats), width, penalty); - statistics.fix(rowCount / StatsMathUtil.nonZeroDivisor(this.rowCount)); + statistics.fix(rowCount, StatsMathUtil.nonZeroDivisor(this.rowCount)); return statistics; } - public void fix(double sel) { + public void fix(double newRowCount, double originRowCount) { + double sel = newRowCount / originRowCount; for (Entry<Expression, ColumnStatistic> entry : expressionToColumnStats.entrySet()) { ColumnStatistic columnStatistic = entry.getValue(); ColumnStatisticBuilder columnStatisticBuilder = new ColumnStatisticBuilder(columnStatistic); - columnStatisticBuilder.setNdv(Math.min(Math.ceil(columnStatistic.ndv * sel), rowCount)); - columnStatisticBuilder.setNumNulls(Math.min(Math.ceil(columnStatistic.numNulls * sel), rowCount)); - columnStatisticBuilder.setCount(Math.min(Math.ceil(columnStatistic.count * sel), rowCount)); + columnStatisticBuilder.setNdv(computeNdv(columnStatistic.ndv, newRowCount, originRowCount)); + columnStatisticBuilder.setNumNulls(Math.min(columnStatistic.numNulls * sel, rowCount)); + columnStatisticBuilder.setCount(newRowCount); expressionToColumnStats.put(entry.getKey(), columnStatisticBuilder.build()); } } @@ -105,7 +119,7 @@ public class Statistics { public double computeSize() { if (computeSize <= 0) { computeSize = Math.max(1, expressionToColumnStats.values().stream() - .map(s -> s.dataSize).reduce(0D, Double::sum) + .map(s -> s.avgSizeByte).reduce(0D, Double::sum) ) * rowCount; } return computeSize; @@ -113,7 +127,8 @@ public class Statistics { @Override public String toString() { - return String.format("rows=%.4f", rowCount); + DecimalFormat format = new DecimalFormat("#,###.##"); + return format.format(rowCount); } public void setWidth(double width) { diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/memo/RankTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/memo/RankTest.java index 70fe91afc9..977ba7beb2 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/memo/RankTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/memo/RankTest.java @@ -55,12 +55,13 @@ public class RankTest extends TPCHTestBase { .optimize() .getCascadesContext() .getMemo(); - PhysicalPlan plan1 = memo.unrank(memo.rank(1).first); PhysicalPlan plan2 = PlanChecker.from(connectContext) .analyze(field.get(null).toString()) .rewrite() .optimize() .getBestPlanTree(PhysicalProperties.GATHER); + PhysicalPlan plan1 = memo.unrank(memo.rank(1).first); + Assertions.assertTrue(PlanChecker.isPlanEqualWithoutID(plan1, plan2)); } } diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java index 3992e2de9a..691cf53720 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java @@ -197,8 +197,7 @@ class FilterEstimationTest { Statistics stat = new Statistics(1000, slotToColumnStat); FilterEstimation filterEstimation = new FilterEstimation(); Statistics expected = filterEstimation.estimate(or, stat); - Assertions.assertTrue( - Precision.equals(50, expected.getRowCount(), 0.01)); + Assertions.assertEquals(51, expected.getRowCount(), 0.1); } // a > 500 and b < 100 or a > c @@ -450,9 +449,9 @@ class FilterEstimationTest { Assertions.assertEquals(1000 * 7.0 / 10.0, estimated.getRowCount()); } - //c>100 + // c>100 // a is primary-key, a.ndv is reduced - // b is normal, b.ndv is not changed + // b is normal, b.ndv is smaller: newNdv = ndv * (1 - Math.pow(1 - selectivity, rowCount / ndv)); // c.selectivity is still 1, but its range becomes half @Test public void test12() { @@ -466,8 +465,8 @@ class FilterEstimationTest { .setNdv(1000) .setAvgSizeByte(4) .setNumNulls(0) - .setMinValue(10000) - .setMaxValue(1000) + .setMinValue(1000) + .setMaxValue(10000) .setSelectivity(1.0); ColumnStatisticBuilder builderB = new ColumnStatisticBuilder() .setNdv(100) @@ -492,7 +491,7 @@ class FilterEstimationTest { ColumnStatistic statsA = estimated.findColumnStatistics(a); Assertions.assertEquals(500, statsA.ndv); ColumnStatistic statsB = estimated.findColumnStatistics(b); - Assertions.assertEquals(50, statsB.ndv); + Assertions.assertEquals(100, statsB.ndv, 0.1); ColumnStatistic statsC = estimated.findColumnStatistics(c); Assertions.assertEquals(50, statsC.ndv); Assertions.assertEquals(100, statsC.minValue); @@ -502,9 +501,10 @@ class FilterEstimationTest { /** * test filter estimation, like 20>c>10, c in (0,40) * filter range has intersection with (c.min, c.max) - * a primary key, a.ndv reduced by 1/4, a.selectivity=0.25 - * b normal field, b.ndv not changed, b.selectivity=1.0 - * c.ndv = 10/40 * c.ndv, c.selectivity=1 + * rows = 100 + * a primary key, a.ndv reduced by 1/4 + * b normal field, b.ndv=20 => + * c.ndv = 10/40 * c.ndv */ @Test public void testFilterInsideMinMax() { @@ -547,13 +547,13 @@ class FilterEstimationTest { Statistics estimated = filterEstimation.estimate(and, stat); Assertions.assertEquals(25, estimated.getRowCount()); ColumnStatistic statsA = estimated.findColumnStatistics(a); - Assertions.assertEquals(25, statsA.ndv); + Assertions.assertEquals(25, statsA.ndv, 0.1); //Assertions.assertEquals(0.25, statsA.selectivity); Assertions.assertEquals(0, statsA.minValue); Assertions.assertEquals(100, statsA.maxValue); ColumnStatistic statsB = estimated.findColumnStatistics(b); - Assertions.assertEquals(5, statsB.ndv); + Assertions.assertEquals(15.6, statsB.ndv, 0.1); Assertions.assertEquals(0, statsB.minValue); Assertions.assertEquals(500, statsB.maxValue); Assertions.assertEquals(1.0, statsB.selectivity); @@ -686,10 +686,10 @@ class FilterEstimationTest { ColumnStatistic statsA = estimated.findColumnStatistics(a); ColumnStatistic statsB = estimated.findColumnStatistics(b); ColumnStatistic statsC = estimated.findColumnStatistics(c); - Assertions.assertEquals(5, statsA.ndv); + Assertions.assertEquals(5, statsA.ndv, 0.1); Assertions.assertEquals(0, statsA.minValue); Assertions.assertEquals(100, statsA.maxValue); - Assertions.assertEquals(1, statsB.ndv); + Assertions.assertEquals(4.5, statsB.ndv, 0.1); Assertions.assertEquals(0, statsB.minValue); Assertions.assertEquals(500, statsB.maxValue); Assertions.assertEquals(2, statsC.ndv); @@ -763,10 +763,10 @@ class FilterEstimationTest { System.out.println(statsA); System.out.println(statsB); System.out.println(statsC); - Assertions.assertEquals(5, statsA.ndv); + Assertions.assertEquals(5, statsA.ndv, 0.1); Assertions.assertEquals(0, statsA.minValue); Assertions.assertEquals(100, statsA.maxValue); - Assertions.assertEquals(1, statsB.ndv); + Assertions.assertEquals(4.5, statsB.ndv, 0.1); Assertions.assertEquals(0, statsB.minValue); Assertions.assertEquals(500, statsB.maxValue); Assertions.assertEquals(2, statsC.ndv); @@ -832,13 +832,10 @@ class FilterEstimationTest { ColumnStatistic statsA = estimated.findColumnStatistics(a); ColumnStatistic statsB = estimated.findColumnStatistics(b); ColumnStatistic statsC = estimated.findColumnStatistics(c); - System.out.println(statsA); - System.out.println(statsB); - System.out.println(statsC); Assertions.assertEquals(75, statsA.ndv); Assertions.assertEquals(0, statsA.minValue); Assertions.assertEquals(100, statsA.maxValue); - Assertions.assertEquals(15, statsB.ndv); + Assertions.assertEquals(19.9, statsB.ndv, 0.1); Assertions.assertEquals(0, statsB.minValue); Assertions.assertEquals(500, statsB.maxValue); Assertions.assertEquals(30, statsC.ndv); diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/StatsCalculatorTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/StatsCalculatorTest.java index ddccd7eddc..14501cdd91 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/StatsCalculatorTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/StatsCalculatorTest.java @@ -145,14 +145,14 @@ public class StatsCalculatorTest { Group ownerGroup = newGroup(); groupExpression.setOwnerGroup(ownerGroup); StatsCalculator.estimate(groupExpression); - Assertions.assertEquals((long) 500, ownerGroup.getStatistics().getRowCount(), 0.001); + Assertions.assertEquals((10000 * 0.1 * 0.05), ownerGroup.getStatistics().getRowCount(), 0.001); LogicalFilter<GroupPlan> logicalFilterOr = new LogicalFilter<>(or, groupPlan); GroupExpression groupExpressionOr = new GroupExpression(logicalFilterOr, ImmutableList.of(childGroup)); Group ownerGroupOr = newGroup(); groupExpressionOr.setOwnerGroup(ownerGroupOr); StatsCalculator.estimate(groupExpressionOr); - Assertions.assertEquals((long) 1000, + Assertions.assertEquals((long) (10000 * (0.1 + 0.05 - 0.1 * 0.05)), ownerGroupOr.getStatistics().getRowCount(), 0.001); } @@ -292,8 +292,8 @@ public class StatsCalculatorTest { Statistics limitStats = ownerGroup.getStatistics(); Assertions.assertEquals(1, limitStats.getRowCount()); ColumnStatistic slot1Stats = limitStats.columnStatistics().get(slot1); - Assertions.assertEquals(1, slot1Stats.ndv); - Assertions.assertEquals(1, slot1Stats.numNulls); + Assertions.assertEquals(1, slot1Stats.ndv, 0.1); + Assertions.assertEquals(0.5, slot1Stats.numNulls); } @Test @@ -322,7 +322,7 @@ public class StatsCalculatorTest { Statistics topNStats = ownerGroup.getStatistics(); Assertions.assertEquals(1, topNStats.getRowCount()); ColumnStatistic slot1Stats = topNStats.columnStatistics().get(slot1); - Assertions.assertEquals(1, slot1Stats.ndv); - Assertions.assertEquals(1, slot1Stats.numNulls); + Assertions.assertEquals(1, slot1Stats.ndv, 0.1); + Assertions.assertEquals(0.5, slot1Stats.numNulls); } } diff --git a/regression-test/suites/nereids_syntax_p0/join.groovy b/regression-test/suites/nereids_syntax_p0/join.groovy index 982c4d8316..9bda506a3d 100644 --- a/regression-test/suites/nereids_syntax_p0/join.groovy +++ b/regression-test/suites/nereids_syntax_p0/join.groovy @@ -204,33 +204,6 @@ suite("join") { insert into outerjoin_D values( 1 ); """ - def explainStr = - sql(""" explain SELECT count(1) - FROM - (SELECT sub1.wtid, - count(*) - FROM - (SELECT a.wtid , - a.wfid - FROM test_table_b a ) sub1 - INNER JOIN [shuffle] - (SELECT a.wtid, - a.wfid - FROM test_table_a a ) sub2 - ON sub1.wtid = sub2.wtid - AND sub1.wfid = sub2.wfid - GROUP BY sub1.wtid ) qqqq;""").toString() - logger.info(explainStr) - assertTrue( - //if analyze finished - explainStr.contains("VAGGREGATE (update serialize)") && explainStr.contains("VAGGREGATE (merge finalize)") - && explainStr.contains("wtid[#8] = wtid[#3]") && explainStr.contains("projections: wtid[#5], wfid[#6]") - || - //analyze not finished - explainStr.contains("VAGGREGATE (update finalize)") && explainStr.contains("VAGGREGATE (update finalize)") - && explainStr.contains("VEXCHANGE") && explainStr.contains("VHASH JOIN") - ) - test { sql"""select * from test_table_a a cross join test_table_b b on a.wtid > b.wtid""" check{result, exception, startTime, endTime -> --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org