This is an automated email from the ASF dual-hosted git repository. lingmiao pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 2bd8b0713a [feature](Nereids): cost and stats; (#10199) 2bd8b0713a is described below commit 2bd8b0713ad3f6dcd950da994543974e64f00e09 Author: jakevin <30525741+jackwe...@users.noreply.github.com> AuthorDate: Wed Jun 22 22:46:22 2022 +0800 [feature](Nereids): cost and stats; (#10199) feature: cost and stats; + Add cost model data struct. + Add operator visitor for calculating cost. + Add plan context. --- .../org/apache/doris/nereids/OperatorVisitor.java | 62 ++++++++++++ .../java/org/apache/doris/nereids/PlanContext.java | 106 +++++++++++++++++++++ .../apache/doris/nereids/cost/CostCalculator.java | 102 ++++++++++++++++++++ .../apache/doris/nereids/cost/CostEstimate.java | 74 ++++++++++++++ .../doris/nereids/operators/AbstractOperator.java | 13 ++- .../apache/doris/nereids/operators/Operator.java | 4 + .../doris/nereids/operators/plans/JoinType.java | 4 + .../doris/nereids/trees/expressions/ExprId.java | 2 +- .../doris/nereids/trees/expressions/Slot.java | 7 +- .../trees/plans/PhysicalPlanTranslator.java | 2 +- .../apache/doris/statistics/StatsDeriveResult.java | 32 ++++++- 11 files changed, 399 insertions(+), 9 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/OperatorVisitor.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/OperatorVisitor.java new file mode 100644 index 0000000000..7b168355ca --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/OperatorVisitor.java @@ -0,0 +1,62 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids; + +import org.apache.doris.nereids.operators.Operator; +import org.apache.doris.nereids.operators.plans.physical.PhysicalAggregation; +import org.apache.doris.nereids.operators.plans.physical.PhysicalFilter; +import org.apache.doris.nereids.operators.plans.physical.PhysicalHashJoin; +import org.apache.doris.nereids.operators.plans.physical.PhysicalOlapScan; +import org.apache.doris.nereids.operators.plans.physical.PhysicalProject; +import org.apache.doris.nereids.operators.plans.physical.PhysicalSort; + +/** + * Base class for the processing of logical and physical operator. + * + * @param <R> Return type of each visit method. + * @param <C> Context type. + */ +public abstract class OperatorVisitor<R, C> { + /* Operator */ + + public abstract R visitOperator(Operator operator, C context); + + public R visitPhysicalAggregation(PhysicalAggregation physicalAggregation, C context) { + return null; + } + + public R visitPhysicalOlapScan(PhysicalOlapScan physicalOlapScan, C context) { + return null; + } + + public R visitPhysicalSort(PhysicalSort physicalSort, C context) { + return null; + } + + public R visitPhysicalHashJoin(PhysicalHashJoin physicalHashJoin, C context) { + return null; + } + + public R visitPhysicalProject(PhysicalProject physicalProject, C context) { + return null; + } + + public R visitPhysicalFilter(PhysicalFilter physicalFilter, C context) { + return null; + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/PlanContext.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/PlanContext.java new file mode 100644 index 0000000000..50ead51f0e --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/PlanContext.java @@ -0,0 +1,106 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + + +package org.apache.doris.nereids; + +import org.apache.doris.common.Id; +import org.apache.doris.nereids.memo.GroupExpression; +import org.apache.doris.nereids.properties.LogicalProperties; +import org.apache.doris.nereids.trees.expressions.Slot; +import org.apache.doris.nereids.trees.plans.Plan; +import org.apache.doris.statistics.StatsDeriveResult; + +import com.google.common.base.Preconditions; +import com.google.common.collect.Lists; + +import java.util.List; + +/** + * Context for plan. + * Abstraction for group expressions and stand-alone expressions/DAGs. + * A ExpressionHandle is attached to either {@link Plan} or {@link GroupExpression}. + * Inspired by GPORCA-CExpressionHandle. + */ +public class PlanContext { + // array of children's derived stats + private final List<StatsDeriveResult> childrenStats = Lists.newArrayList(); + // statistics of attached plan/gexpr + private StatsDeriveResult statistics; + // attached plan + private Plan plan; + // attached group expression + private GroupExpression groupExpression; + + public PlanContext(Plan plan) { + this.plan = plan; + } + + public PlanContext(GroupExpression groupExpression) { + this.groupExpression = groupExpression; + } + + public Plan getPlan() { + return plan; + } + + public GroupExpression getGroupExpression() { + return groupExpression; + } + + public List<StatsDeriveResult> getChildrenStats() { + return childrenStats; + } + + public StatsDeriveResult getStatistics() { + return statistics; + } + + public void setStatistics(StatsDeriveResult stats) { + this.statistics = stats; + } + + public StatsDeriveResult getStatisticsWithCheck() { + Preconditions.checkNotNull(statistics); + return statistics; + } + + public LogicalProperties childLogicalPropertyAt(int index) { + return plan.child(index).getLogicalProperties(); + } + + public List<Slot> getChildOutputSlots(int index) { + return childLogicalPropertyAt(index).getOutput(); + } + + public List<Id> getChildOutputIds(int index) { + List<Id> ids = Lists.newArrayList(); + childLogicalPropertyAt(index).getOutput().forEach(slot -> { + ids.add(slot.getId()); + }); + return ids; + } + + /** + * Get child statistics. + */ + public StatsDeriveResult getChildStatistics(int index) { + StatsDeriveResult statistics = childrenStats.get(index); + Preconditions.checkNotNull(statistics); + return statistics; + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostCalculator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostCalculator.java new file mode 100644 index 0000000000..90cc2f8751 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostCalculator.java @@ -0,0 +1,102 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.cost; + +import org.apache.doris.common.Id; +import org.apache.doris.nereids.OperatorVisitor; +import org.apache.doris.nereids.PlanContext; +import org.apache.doris.nereids.memo.GroupExpression; +import org.apache.doris.nereids.operators.Operator; +import org.apache.doris.nereids.operators.plans.physical.PhysicalAggregation; +import org.apache.doris.nereids.operators.plans.physical.PhysicalHashJoin; +import org.apache.doris.nereids.operators.plans.physical.PhysicalOlapScan; +import org.apache.doris.nereids.operators.plans.physical.PhysicalProject; +import org.apache.doris.statistics.StatsDeriveResult; + +import com.google.common.base.Preconditions; + +import java.util.List; + +/** + * Calculate the cost of a plan. + * Inspired by Presto. + */ +public class CostCalculator { + /** + * Constructor. + */ + public double calculateCost(GroupExpression groupExpression) { + PlanContext planContext = new PlanContext(groupExpression); + CostEstimator costCalculator = new CostEstimator(); + CostEstimate costEstimate = groupExpression.getOperator().accept(costCalculator, planContext); + return costFormula(costEstimate); + } + + private double costFormula(CostEstimate costEstimate) { + double cpuCostWeight = 1; + double memoryCostWeight = 1; + double networkCostWeight = 1; + return costEstimate.getCpuCost() * cpuCostWeight + costEstimate.getMemoryCost() * memoryCostWeight + + costEstimate.getNetworkCost() * networkCostWeight; + } + + private static class CostEstimator extends OperatorVisitor<CostEstimate, PlanContext> { + @Override + public CostEstimate visitOperator(Operator operator, PlanContext context) { + return CostEstimate.zero(); + } + + @Override + public CostEstimate visitPhysicalAggregation(PhysicalAggregation physicalAggregation, PlanContext context) { + StatsDeriveResult statistics = context.getStatisticsWithCheck(); + return CostEstimate.ofCpu(statistics.computeSize()); + } + + @Override + public CostEstimate visitPhysicalOlapScan(PhysicalOlapScan physicalOlapScan, PlanContext context) { + StatsDeriveResult statistics = context.getStatisticsWithCheck(); + return CostEstimate.ofCpu(statistics.computeSize()); + } + + @Override + public CostEstimate visitPhysicalHashJoin(PhysicalHashJoin physicalHashJoin, PlanContext context) { + Preconditions.checkState(context.getGroupExpression().arity() == 2); + Preconditions.checkState(context.getChildrenStats().size() == 2); + + StatsDeriveResult leftStatistics = context.getChildStatistics(0); + StatsDeriveResult rightStatistics = context.getChildStatistics(1); + + // TODO: handle some case + + List<Id> leftIds = context.getChildOutputIds(0); + List<Id> rightIds = context.getChildOutputIds(1); + + // handle cross join, onClause is empty ..... + + return new CostEstimate( + leftStatistics.computeColumnSize(leftIds) + rightStatistics.computeColumnSize(rightIds), + rightStatistics.computeColumnSize(rightIds), 0); + } + + @Override + public CostEstimate visitPhysicalProject(PhysicalProject physicalProject, PlanContext context) { + StatsDeriveResult statistics = context.getStatisticsWithCheck(); + return CostEstimate.ofCpu(statistics.computeSize()); + } + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostEstimate.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostEstimate.java new file mode 100644 index 0000000000..af2002e4cb --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostEstimate.java @@ -0,0 +1,74 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.cost; + +import com.google.common.base.Preconditions; + +/** + * Use for estimating the cost of plan. + */ +public final class CostEstimate { + private static final CostEstimate INFINITE = + new CostEstimate(Double.POSITIVE_INFINITY, Double.POSITIVE_INFINITY, Double.POSITIVE_INFINITY); + private static final CostEstimate ZERO = new CostEstimate(0, 0, 0); + + private final double cpuCost; + private final double memoryCost; + private final double networkCost; + + /** + * Constructor of CostEstimate. + */ + public CostEstimate(double cpuCost, double memoryCost, double networkCost) { + Preconditions.checkArgument(!(cpuCost < 0), "cpuCost cannot be negative: %s", cpuCost); + Preconditions.checkArgument(!(memoryCost < 0), "memoryCost cannot be negative: %s", memoryCost); + Preconditions.checkArgument(!(networkCost < 0), "networkCost cannot be negative: %s", networkCost); + this.cpuCost = cpuCost; + this.memoryCost = memoryCost; + this.networkCost = networkCost; + } + + public static CostEstimate infinite() { + return INFINITE; + } + + public static CostEstimate zero() { + return ZERO; + } + + public double getCpuCost() { + return cpuCost; + } + + public double getMemoryCost() { + return memoryCost; + } + + public double getNetworkCost() { + return networkCost; + } + + public static CostEstimate ofCpu(double cpuCost) { + return new CostEstimate(cpuCost, 0, 0); + } + + public static CostEstimate ofMemory(double memoryCost) { + return new CostEstimate(0, memoryCost, 0); + } + +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/operators/AbstractOperator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/operators/AbstractOperator.java index ca1b5f3ed7..d3b7a25e92 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/operators/AbstractOperator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/operators/AbstractOperator.java @@ -17,6 +17,7 @@ package org.apache.doris.nereids.operators; +import org.apache.doris.nereids.OperatorVisitor; import org.apache.doris.nereids.trees.plans.Plan; import org.apache.doris.nereids.trees.plans.PlanOperatorVisitor; @@ -48,14 +49,22 @@ public abstract class AbstractOperator implements Operator { * Child operator should overwrite this method. * for example: * <code> - * visitor.visitPhysicalOlapScanPlan( - * (PhysicalPlan<? extends PhysicalPlan, PhysicalOlapScan>) plan, context); + * visitor.visitPhysicalOlapScanPlan( + * (PhysicalPlan<? extends PhysicalPlan, PhysicalOlapScan>) plan, context); * </code> */ public <R, C> R accept(PlanOperatorVisitor<R, C> visitor, Plan plan, C context) { return null; } + public <R, C> R accept(OperatorVisitor<R, C> visitor, C context) { + return visitor.visitOperator(this, context); + } + + public <R, C> R accept(OperatorVisitor<R, C> visitor, Operator operator, C context) { + return null; + } + public long getLimited() { return limited; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/operators/Operator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/operators/Operator.java index 914a1720b4..78524674af 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/operators/Operator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/operators/Operator.java @@ -17,6 +17,7 @@ package org.apache.doris.nereids.operators; +import org.apache.doris.nereids.OperatorVisitor; import org.apache.doris.nereids.memo.GroupExpression; import org.apache.doris.nereids.trees.TreeNode; import org.apache.doris.nereids.trees.plans.Plan; @@ -32,4 +33,7 @@ public interface Operator { <R, C> R accept(PlanOperatorVisitor<R, C> visitor, Plan plan, C context); + <R, C> R accept(OperatorVisitor<R, C> visitor, Operator operator, C context); + + <R, C> R accept(OperatorVisitor<R, C> visitor, C context); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/operators/plans/JoinType.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/operators/plans/JoinType.java index 9e2b8d41e3..c4b4bcfd31 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/operators/plans/JoinType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/operators/plans/JoinType.java @@ -84,6 +84,10 @@ public enum JoinType { } } + public final boolean isCrossJoin() { + return this == CROSS_JOIN; + } + public final boolean isInnerOrOuterOrCrossJoin() { return this == INNER_JOIN || this == CROSS_JOIN || this == FULL_OUTER_JOIN; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/ExprId.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/ExprId.java index c900ea42a7..6ed30f678f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/ExprId.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/ExprId.java @@ -44,7 +44,7 @@ public class ExprId extends Id<ExprId> { } /** - * Should be only called by {@link org.apache.doris.nereids.trees.expressions.NamedExpressionUtil}. + * Should be only called by {@link org.apache.doris.nereids.trees.expressions.NamedExpressionUtil}. */ public static IdGenerator<ExprId> createGenerator() { return new IdGenerator<ExprId>() { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Slot.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Slot.java index c235dd19e7..78f1ba3491 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Slot.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Slot.java @@ -23,10 +23,9 @@ import org.apache.doris.nereids.trees.NodeType; * Abstract class for all slot in expression. */ public abstract class Slot extends NamedExpression implements LeafExpression { + private ExprId id; - private int id; - - public Slot(NodeType type, int id, Expression... children) { + public Slot(NodeType type, ExprId id, Expression... children) { super(type, children); this.id = id; } @@ -40,7 +39,7 @@ public abstract class Slot extends NamedExpression implements LeafExpression { return this; } - public int getId() { + public ExprId getId() { return id; } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/PhysicalPlanTranslator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/PhysicalPlanTranslator.java index 9b9e3c5798..4544ea7bd7 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/PhysicalPlanTranslator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/PhysicalPlanTranslator.java @@ -277,7 +277,7 @@ public class PhysicalPlanTranslator extends PlanOperatorVisitor<PlanFragment, Pl tupleDescriptor.setTable(table); for (Slot slot : slotList) { SlotReference slotReference = (SlotReference) slot; - SlotDescriptor slotDescriptor = context.addSlotDesc(tupleDescriptor, slot.getId()); + SlotDescriptor slotDescriptor = context.addSlotDesc(tupleDescriptor, slot.getId().asInt()); slotDescriptor.setColumn(slotReference.getColumn()); slotDescriptor.setType(slotReference.getDataType().toCatalogDataType()); slotDescriptor.setIsMaterialized(true); diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsDeriveResult.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsDeriveResult.java index 1888b3dd45..4f2abb5691 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsDeriveResult.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsDeriveResult.java @@ -21,9 +21,13 @@ import org.apache.doris.common.Id; import com.google.common.collect.Maps; +import java.util.List; import java.util.Map; +import java.util.Map.Entry; -// This structure is maintained in each operator to store the statistical information results obtained by the operator. +/** + * This structure is maintained in each operator to store the statistical information results obtained by the operator. + */ public class StatsDeriveResult { private long rowCount = -1; // The data size of the corresponding column in the operator @@ -39,6 +43,32 @@ public class StatsDeriveResult { this.columnToNdv.putAll(columnToNdv); } + public float computeSize() { + return Math.max(1, columnToDataSize.values().stream().reduce((float) 0, Float::sum)) * rowCount; + } + + /** + * Compute the data size of all input columns. + * + * @param slotIds all input columns. + * @return sum data size. + */ + public float computeColumnSize(List<Id> slotIds) { + float count = 0; + boolean exist = false; + + for (Entry<Id, Float> entry : columnToDataSize.entrySet()) { + if (slotIds.contains(entry.getKey())) { + count += entry.getValue(); + exist = true; + } + } + if (!exist) { + return 1; + } + return count * rowCount; + } + public void setRowCount(long rowCount) { this.rowCount = rowCount; } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org