This is an automated email from the ASF dual-hosted git repository. huajianlan pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new b15e2ddeaa [fix](Nereids): fix and enable stats derive job (#11755) b15e2ddeaa is described below commit b15e2ddeaa07460973193545d47889fc186a458a Author: jakevin <jakevin...@gmail.com> AuthorDate: Thu Aug 18 21:26:35 2022 +0800 [fix](Nereids): fix and enable stats derive job (#11755) fix and enable statistics derive job Add mock for statistics in computeScan(). --- .../org/apache/doris/nereids/NereidsPlanner.java | 10 +- .../java/org/apache/doris/nereids/PlanContext.java | 28 ++---- .../apache/doris/nereids/cost/CostCalculator.java | 10 +- .../apache/doris/nereids/cost/CostEstimate.java | 10 ++ .../doris/nereids/jobs/cascades/ApplyRuleJob.java | 3 +- .../nereids/jobs/cascades/CostAndEnforcerJob.java | 4 +- .../nereids/stats/FilterSelectivityCalculator.java | 30 ++---- .../apache/doris/nereids/stats/JoinEstimation.java | 2 + .../doris/nereids/stats/StatsCalculator.java | 26 +++++- .../java/org/apache/doris/qe/SessionVariable.java | 13 --- .../doris/nereids/jobs/CostAndEnforcerJobTest.java | 102 --------------------- 11 files changed, 62 insertions(+), 176 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java index d0dc24cc12..a0a24f9f66 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java @@ -43,7 +43,6 @@ import org.apache.doris.nereids.trees.plans.physical.PhysicalPlan; import org.apache.doris.planner.PlanFragment; import org.apache.doris.planner.Planner; import org.apache.doris.planner.ScanNode; -import org.apache.doris.qe.ConnectContext; import com.google.common.collect.Lists; @@ -112,10 +111,7 @@ public class NereidsPlanner extends Planner { // rule-based optimize rewrite(); - // TODO: remove this condition, when stats collector is fully developed. - if (ConnectContext.get().getSessionVariable().isEnableNereidsCBO()) { - deriveStats(); - } + deriveStats(); // TODO: What is the appropriate time to set physical properties? Maybe before enter. // cascades style optimize phase. @@ -154,7 +150,9 @@ public class NereidsPlanner extends Planner { } private void deriveStats() { - new DeriveStatsJob(getRoot().getLogicalExpression(), cascadesContext.getCurrentJobContext()).execute(); + cascadesContext + .pushJob(new DeriveStatsJob(getRoot().getLogicalExpression(), cascadesContext.getCurrentJobContext())); + cascadesContext.getJobScheduler().executeJobPool(cascadesContext); } /** diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/PlanContext.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/PlanContext.java index 9d352d7027..d68067bc96 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/PlanContext.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/PlanContext.java @@ -40,17 +40,12 @@ import java.util.List; public class PlanContext { // array of children's derived stats private final List<StatsDeriveResult> childrenStats = Lists.newArrayList(); - // statistics of attached plan/gexpr - private StatsDeriveResult statistics; - // attached plan - private Plan plan; // attached group expression - private GroupExpression groupExpression; - - public PlanContext(Plan plan) { - this.plan = plan; - } + private final GroupExpression groupExpression; + /** + * Constructor for PlanContext. + */ public PlanContext(GroupExpression groupExpression) { this.groupExpression = groupExpression; @@ -59,10 +54,6 @@ public class PlanContext { } } - public Plan getPlan() { - return plan; - } - public GroupExpression getGroupExpression() { return groupExpression; } @@ -71,21 +62,14 @@ public class PlanContext { return childrenStats; } - public StatsDeriveResult getStatistics() { - return statistics; - } - - public void setStatistics(StatsDeriveResult stats) { - this.statistics = stats; - } - public StatsDeriveResult getStatisticsWithCheck() { + StatsDeriveResult statistics = groupExpression.getOwnerGroup().getStatistics(); Preconditions.checkNotNull(statistics); return statistics; } public LogicalProperties childLogicalPropertyAt(int index) { - return plan.child(index).getLogicalProperties(); + return groupExpression.child(index).getLogicalProperties(); } public List<Slot> getChildOutputSlots(int index) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostCalculator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostCalculator.java index d90970f7f4..f697e934ed 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostCalculator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostCalculator.java @@ -45,12 +45,10 @@ public class CostCalculator { * Constructor. */ public static double calculateCost(GroupExpression groupExpression) { - // TODO: Enable following code after enable stats derive. - // PlanContext planContext = new PlanContext(groupExpression); - // CostEstimator costCalculator = new CostEstimator(); - // CostEstimate costEstimate = groupExpression.getPlan().accept(costCalculator, planContext); - // return costFormula(costEstimate); - return 0; + PlanContext planContext = new PlanContext(groupExpression); + CostEstimator costCalculator = new CostEstimator(); + CostEstimate costEstimate = groupExpression.getPlan().accept(costCalculator, planContext); + return costFormula(costEstimate); } private static double costFormula(CostEstimate costEstimate) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostEstimate.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostEstimate.java index af2002e4cb..da60840877 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostEstimate.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostEstimate.java @@ -35,6 +35,16 @@ public final class CostEstimate { * Constructor of CostEstimate. */ public CostEstimate(double cpuCost, double memoryCost, double networkCost) { + // TODO: remove them after finish statistics. + if (cpuCost < 0) { + cpuCost = 0; + } + if (memoryCost < 0) { + memoryCost = 0; + } + if (networkCost < 0) { + networkCost = 0; + } Preconditions.checkArgument(!(cpuCost < 0), "cpuCost cannot be negative: %s", cpuCost); Preconditions.checkArgument(!(memoryCost < 0), "memoryCost cannot be negative: %s", memoryCost); Preconditions.checkArgument(!(networkCost < 0), "networkCost cannot be negative: %s", networkCost); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/cascades/ApplyRuleJob.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/cascades/ApplyRuleJob.java index 781af4aa0d..2b395141ad 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/cascades/ApplyRuleJob.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/cascades/ApplyRuleJob.java @@ -71,12 +71,13 @@ public class ApplyRuleJob extends Job { GroupExpression newGroupExpression = pair.second; if (newPlan instanceof LogicalPlan) { - pushTask(new DeriveStatsJob(newGroupExpression, context)); if (exploredOnly) { pushTask(new ExploreGroupExpressionJob(newGroupExpression, context)); + pushTask(new DeriveStatsJob(newGroupExpression, context)); continue; } pushTask(new OptimizeGroupExpressionJob(newGroupExpression, context)); + pushTask(new DeriveStatsJob(newGroupExpression, context)); } else { pushTask(new CostAndEnforcerJob(newGroupExpression, context)); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/cascades/CostAndEnforcerJob.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/cascades/CostAndEnforcerJob.java index f52dd4f2d8..5cd0096a09 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/cascades/CostAndEnforcerJob.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/cascades/CostAndEnforcerJob.java @@ -172,12 +172,12 @@ public class CostAndEnforcerJob extends Job implements Cloneable { } /* update current group statistics and re-compute costs. */ - if (groupExpression.children().stream().anyMatch(group -> group.getStatistics() != null)) { + if (groupExpression.children().stream().anyMatch(group -> group.getStatistics() == null)) { return; } PlanContext planContext = new PlanContext(groupExpression); // TODO: calculate stats. ?????? - groupExpression.getOwnerGroup().setStatistics(planContext.getStatistics()); + groupExpression.getOwnerGroup().setStatistics(planContext.getStatisticsWithCheck()); enforce(outputProperty, requestChildrenProperty); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterSelectivityCalculator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterSelectivityCalculator.java index f4daf9f8b8..0953b8ffbb 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterSelectivityCalculator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterSelectivityCalculator.java @@ -21,14 +21,11 @@ import org.apache.doris.nereids.trees.expressions.ComparisonPredicate; import org.apache.doris.nereids.trees.expressions.CompoundPredicate; import org.apache.doris.nereids.trees.expressions.EqualTo; import org.apache.doris.nereids.trees.expressions.Expression; -import org.apache.doris.nereids.trees.expressions.GreaterThan; -import org.apache.doris.nereids.trees.expressions.GreaterThanEqual; -import org.apache.doris.nereids.trees.expressions.LessThan; import org.apache.doris.nereids.trees.expressions.Literal; import org.apache.doris.nereids.trees.expressions.Or; import org.apache.doris.nereids.trees.expressions.Slot; import org.apache.doris.nereids.trees.expressions.SlotReference; -import org.apache.doris.nereids.trees.expressions.visitor.DefaultExpressionVisitor; +import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; import org.apache.doris.statistics.ColumnStats; import com.google.common.base.Preconditions; @@ -38,7 +35,7 @@ import java.util.Map; /** * Calculate selectivity of the filter. */ -public class FilterSelectivityCalculator extends DefaultExpressionVisitor<Double, Void> { +public class FilterSelectivityCalculator extends ExpressionVisitor<Double, Void> { private static double DEFAULT_SELECTIVITY = 0.1; @@ -63,13 +60,19 @@ public class FilterSelectivityCalculator extends DefaultExpressionVisitor<Double return expression.accept(this, null); } + + @Override + public Double visit(Expression expr, Void context) { + return DEFAULT_SELECTIVITY; + } + @Override public Double visitCompoundPredicate(CompoundPredicate compoundPredicate, Void context) { Expression leftExpr = compoundPredicate.child(0); Expression rightExpr = compoundPredicate.child(1); double leftSel = 1; double rightSel = 1; - leftSel = estimate(leftExpr); + leftSel = estimate(leftExpr); rightSel = estimate(rightExpr); return compoundPredicate instanceof Or ? leftSel + rightSel - leftSel * rightSel : leftSel * rightSel; } @@ -92,19 +95,4 @@ public class FilterSelectivityCalculator extends DefaultExpressionVisitor<Double } // TODO: Should consider the distribution of data. - @Override - public Double visitGreaterThan(GreaterThan greaterThan, Void context) { - return DEFAULT_SELECTIVITY; - } - - @Override - public Double visitGreaterThanEqual(GreaterThanEqual greaterThanEqual, Void context) { - return DEFAULT_SELECTIVITY; - } - - @Override - public Double visitLessThan(LessThan lessThan, Void context) { - return DEFAULT_SELECTIVITY; - } - } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java index ca141471c8..1592da17ab 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java @@ -129,6 +129,7 @@ public class JoinEstimation { if (lhsCard == -1 || rhsCard == -1) { return lhsCard; } + long result = -1; for (Expression eqJoinConjunct : eqConjunctList) { Expression left = eqJoinConjunct.child(0); @@ -160,6 +161,7 @@ public class JoinEstimation { result = Math.min(result, joinCard); } } + return result; } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java index dcd33f9711..a5fee61259 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java @@ -17,7 +17,9 @@ package org.apache.doris.nereids.stats; -import org.apache.doris.catalog.Table; +import org.apache.doris.catalog.MaterializedIndex; +import org.apache.doris.catalog.Partition; +import org.apache.doris.common.AnalysisException; import org.apache.doris.nereids.memo.GroupExpression; import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.NamedExpression; @@ -52,6 +54,7 @@ import org.apache.doris.nereids.trees.plans.visitor.DefaultPlanVisitor; import org.apache.doris.nereids.util.Utils; import org.apache.doris.qe.ConnectContext; import org.apache.doris.statistics.ColumnStats; +import org.apache.doris.statistics.Statistics; import org.apache.doris.statistics.StatsDeriveResult; import org.apache.doris.statistics.TableStats; @@ -194,9 +197,9 @@ public class StatsCalculator extends DefaultPlanVisitor<StatsDeriveResult, Void> // 2. Consider the influence of runtime filter // 3. Get NDV and column data size from StatisticManger, StatisticManager doesn't support it now. private StatsDeriveResult computeScan(Scan scan) { - Table table = scan.getTable(); TableStats tableStats = Utils.execWithReturnVal(() -> - ConnectContext.get().getEnv().getStatisticsManager().getStatistics().getTableStats(table.getId()) + // TODO: tmp mock the table stats, after we support the table stats, we should remove this mock. + mockRowCountInStatistic(scan) ); Map<Slot, ColumnStats> slotToColumnStats = new HashMap<>(); Set<SlotReference> slotSet = scan.getOutput().stream().filter(SlotReference.class::isInstance) @@ -216,6 +219,23 @@ public class StatsCalculator extends DefaultPlanVisitor<StatsDeriveResult, Void> return stats; } + // TODO: tmp mock the table stats, after we support the table stats, we should remove this mock. + private TableStats mockRowCountInStatistic(Scan scan) throws AnalysisException { + long cardinality = 0; + if (scan instanceof PhysicalOlapScan) { + PhysicalOlapScan olapScan = (PhysicalOlapScan) scan; + for (long selectedPartitionId : olapScan.getSelectedPartitionId()) { + final Partition partition = olapScan.getTable().getPartition(selectedPartitionId); + final MaterializedIndex baseIndex = partition.getBaseIndex(); + cardinality += baseIndex.getRowCount(); + } + } + Statistics statistics = ConnectContext.get().getEnv().getStatisticsManager().getStatistics(); + + statistics.mockTableStatsWithRowCount(scan.getTable().getId(), cardinality); + return statistics.getTableStats(scan.getTable().getId()); + } + private StatsDeriveResult computeTopN(TopN topN) { StatsDeriveResult stats = groupExpression.getCopyOfChildStats(0); return stats.updateRowCountByLimit(topN.getLimit()); diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java index 9017b6d5e4..40feef115f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java @@ -93,8 +93,6 @@ public class SessionVariable implements Serializable, Writable { public static final String ENABLE_COST_BASED_JOIN_REORDER = "enable_cost_based_join_reorder"; - public static final String ENABLE_NEREIDS_CBO = "enable_nereids_cbo"; - public static final int MIN_EXEC_INSTANCE_NUM = 1; public static final int MAX_EXEC_INSTANCE_NUM = 32; // if set to true, some of stmt will be forwarded to master FE to get result @@ -448,9 +446,6 @@ public class SessionVariable implements Serializable, Writable { @VariableMgr.VarAttr(name = ENABLE_COST_BASED_JOIN_REORDER) private boolean enableJoinReorderBasedCost = false; - @VariableMgr.VarAttr(name = ENABLE_NEREIDS_CBO) - private boolean enableNereidsCBO = false; - @VariableMgr.VarAttr(name = ENABLE_FOLD_CONSTANT_BY_BE) private boolean enableFoldConstantByBe = false; @@ -1019,14 +1014,6 @@ public class SessionVariable implements Serializable, Writable { this.enableJoinReorderBasedCost = enableJoinReorderBasedCost; } - public boolean isEnableNereidsCBO() { - return enableNereidsCBO; - } - - public void setEnableNereidsCBO(boolean enableNereidsCBO) { - this.enableNereidsCBO = enableNereidsCBO; - } - public void setDisableJoinReorder(boolean disableJoinReorder) { this.disableJoinReorder = disableJoinReorder; } diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/jobs/CostAndEnforcerJobTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/jobs/CostAndEnforcerJobTest.java deleted file mode 100644 index 35dd5284b7..0000000000 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/jobs/CostAndEnforcerJobTest.java +++ /dev/null @@ -1,102 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.nereids.jobs; - -import org.apache.doris.nereids.CascadesContext; -import org.apache.doris.nereids.cost.CostCalculator; -import org.apache.doris.nereids.jobs.cascades.OptimizeGroupJob; -import org.apache.doris.nereids.memo.GroupExpression; -import org.apache.doris.nereids.trees.expressions.EqualTo; -import org.apache.doris.nereids.trees.expressions.Expression; -import org.apache.doris.nereids.trees.expressions.SlotReference; -import org.apache.doris.nereids.trees.plans.JoinType; -import org.apache.doris.nereids.trees.plans.logical.LogicalJoin; -import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan; -import org.apache.doris.nereids.util.MemoTestUtils; -import org.apache.doris.nereids.util.PlanConstructor; - -import com.google.common.collect.Lists; -import mockit.Mock; -import mockit.MockUp; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Test; - -import java.util.List; -import java.util.Optional; -import java.util.stream.Collectors; - -public class CostAndEnforcerJobTest { - /* - * topJoin - * / \ - * C bottomJoin - * / \ - * A B - */ - - private static List<LogicalOlapScan> scans = Lists.newArrayList(); - private static List<List<SlotReference>> outputs = Lists.newArrayList(); - - @BeforeAll - public static void init() { - LogicalOlapScan scan1 = PlanConstructor.newLogicalOlapScan(0, "a", 0); - LogicalOlapScan scan2 = PlanConstructor.newLogicalOlapScan(1, "b", 1); - LogicalOlapScan scan3 = PlanConstructor.newLogicalOlapScan(2, "c", 0); - - scans.add(scan1); - scans.add(scan2); - scans.add(scan3); - - List<SlotReference> t1Output = scan1.getOutput().stream().map(slot -> (SlotReference) slot) - .collect(Collectors.toList()); - List<SlotReference> t2Output = scan2.getOutput().stream().map(slot -> (SlotReference) slot) - .collect(Collectors.toList()); - List<SlotReference> t3Output = scan3.getOutput().stream().map(slot -> (SlotReference) slot) - .collect(Collectors.toList()); - outputs.add(t1Output); - outputs.add(t2Output); - outputs.add(t3Output); - } - - @Test - public void testExecute() { - new MockUp<CostCalculator>() { - @Mock - public double calculateCost(GroupExpression groupExpression) { - return 0; - } - }; - - /* - * bottomJoin - * / \ - * A B - */ - Expression bottomJoinOnCondition = new EqualTo(outputs.get(0).get(0), outputs.get(1).get(0)); - - LogicalJoin<LogicalOlapScan, LogicalOlapScan> bottomJoin = new LogicalJoin<>(JoinType.INNER_JOIN, - Optional.of(bottomJoinOnCondition), scans.get(0), scans.get(1)); - - CascadesContext cascadesContext = MemoTestUtils.createCascadesContext(bottomJoin); - cascadesContext.pushJob( - new OptimizeGroupJob( - cascadesContext.getMemo().getRoot(), - cascadesContext.getCurrentJobContext())); - cascadesContext.getJobScheduler().executeJobPool(cascadesContext); - } -} --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org