This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
The following commit(s) were added to refs/heads/master by this push: new 9ef09b8354 [feature](statistics) Statistics derivation.Step 1:ScanNode implement… (#8947) 9ef09b8354 is described below commit 9ef09b8354cbd499512efb4df4a5e9c8eba1b1af Author: zhengshiJ <32082872+zhengs...@users.noreply.github.com> AuthorDate: Fri Apr 29 10:41:12 2022 +0800 [feature](statistics) Statistics derivation.Step 1:ScanNode implement… (#8947) * [feature](statistics) Statistics derivation.Step 1:ScanNode implementation Co-authored-by: jianghaochen <jianghaoc...@meituan.com> --- .../org/apache/doris/planner/BrokerScanNode.java | 9 +- .../java/org/apache/doris/planner/EsScanNode.java | 2 +- .../org/apache/doris/planner/HiveScanNode.java | 2 +- .../org/apache/doris/planner/IcebergScanNode.java | 2 +- .../org/apache/doris/planner/LoadScanNode.java | 6 +- .../org/apache/doris/planner/MysqlScanNode.java | 2 +- .../org/apache/doris/planner/OdbcScanNode.java | 2 +- .../org/apache/doris/planner/OlapScanNode.java | 53 +++---- .../java/org/apache/doris/planner/PlanNode.java | 33 +++++ .../java/org/apache/doris/planner/ScanNode.java | 3 +- .../org/apache/doris/planner/SchemaScanNode.java | 2 +- .../apache/doris/planner/SingleNodePlanner.java | 1 - .../java/org/apache/doris/qe/StmtExecutor.java | 8 - .../apache/doris/statistics/BaseStatsDerive.java | 161 +++++++++++++++++++++ .../org/apache/doris/statistics/DeriveFactory.java | 36 +++++ .../doris/statistics/OlapScanStatsDerive.java | 112 ++++++++++++++ .../org/apache/doris/statistics/Statistics.java | 13 ++ .../apache/doris/statistics/StatisticsManager.java | 4 + .../apache/doris/statistics/StatsDeriveResult.java | 56 +++++++ .../doris/statistics/StatsRecursiveDerive.java | 56 +++++++ .../org/apache/doris/statistics/TableStats.java | 12 ++ .../org/apache/doris/analysis/ExplainTest.java | 13 +- .../org/apache/doris/planner/QueryPlanTest.java | 7 +- 23 files changed, 545 insertions(+), 50 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/BrokerScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/BrokerScanNode.java index 7338d9bbb0..73aa1fb04d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/BrokerScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/BrokerScanNode.java @@ -138,7 +138,14 @@ public class BrokerScanNode extends LoadScanNode { public BrokerScanNode(PlanNodeId id, TupleDescriptor destTupleDesc, String planNodeName, List<List<TBrokerFileStatus>> fileStatusesList, int filesAdded) { - super(id, destTupleDesc, planNodeName); + super(id, destTupleDesc, planNodeName, NodeType.BROKER_SCAN_NODE); + this.fileStatusesList = fileStatusesList; + this.filesAdded = filesAdded; + } + + public BrokerScanNode(PlanNodeId id, TupleDescriptor destTupleDesc, String planNodeName, + List<List<TBrokerFileStatus>> fileStatusesList, int filesAdded, NodeType nodeType) { + super(id, destTupleDesc, planNodeName, nodeType); this.fileStatusesList = fileStatusesList; this.filesAdded = filesAdded; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/EsScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/EsScanNode.java index 2ed1a4bde8..96dbdbe934 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/EsScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/EsScanNode.java @@ -72,7 +72,7 @@ public class EsScanNode extends ScanNode { boolean isFinalized = false; public EsScanNode(PlanNodeId id, TupleDescriptor desc, String planNodeName) { - super(id, desc, planNodeName); + super(id, desc, planNodeName, NodeType.ES_SCAN_NODE); table = (EsTable) (desc.getTable()); esTablePartitions = table.getEsTablePartitions(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/HiveScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/HiveScanNode.java index 76cf534313..c18533d302 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/HiveScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/HiveScanNode.java @@ -100,7 +100,7 @@ public class HiveScanNode extends BrokerScanNode { public HiveScanNode(PlanNodeId id, TupleDescriptor destTupleDesc, String planNodeName, List<List<TBrokerFileStatus>> fileStatusesList, int filesAdded) { - super(id, destTupleDesc, planNodeName, fileStatusesList, filesAdded); + super(id, destTupleDesc, planNodeName, fileStatusesList, filesAdded, NodeType.HIVE_SCAN_NODE); this.hiveTable = (HiveTable) destTupleDesc.getTable(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/IcebergScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/IcebergScanNode.java index 5428c9ee55..5c7dd1fad0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/IcebergScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/IcebergScanNode.java @@ -47,7 +47,7 @@ public class IcebergScanNode extends BrokerScanNode { public IcebergScanNode(PlanNodeId id, TupleDescriptor desc, String planNodeName, List<List<TBrokerFileStatus>> fileStatusesList, int filesAdded) { - super(id, desc, planNodeName, fileStatusesList, filesAdded); + super(id, desc, planNodeName, fileStatusesList, filesAdded, NodeType.ICEBREG_SCAN_NODE); icebergTable = (IcebergTable) desc.getTable(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/LoadScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/LoadScanNode.java index b6dbb4f782..82299e2ea6 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/LoadScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/LoadScanNode.java @@ -54,7 +54,11 @@ public abstract class LoadScanNode extends ScanNode { protected LoadTask.MergeType mergeType = LoadTask.MergeType.APPEND; public LoadScanNode(PlanNodeId id, TupleDescriptor desc, String planNodeName) { - super(id, desc, planNodeName); + super(id, desc, planNodeName, NodeType.LOAD_SCAN_NODE); + } + + public LoadScanNode(PlanNodeId id, TupleDescriptor desc, String planNodeName, NodeType nodeType) { + super(id, desc, planNodeName, nodeType); } protected void initAndSetWhereExpr(Expr whereExpr, TupleDescriptor tupleDesc, Analyzer analyzer) throws UserException { diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/MysqlScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/MysqlScanNode.java index fcc22125c9..9235b1fc75 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/MysqlScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/MysqlScanNode.java @@ -56,7 +56,7 @@ public class MysqlScanNode extends ScanNode { * Constructs node to scan given data files of table 'tbl'. */ public MysqlScanNode(PlanNodeId id, TupleDescriptor desc, MysqlTable tbl) { - super(id, desc, "SCAN MYSQL"); + super(id, desc, "SCAN MYSQL", NodeType.MYSQL_SCAN_NODE); tblName = "`" + tbl.getMysqlTableName() + "`"; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/OdbcScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/OdbcScanNode.java index 90f989d35c..1f32b9e938 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/OdbcScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/OdbcScanNode.java @@ -73,7 +73,7 @@ public class OdbcScanNode extends ScanNode { * Constructs node to scan given data files of table 'tbl'. */ public OdbcScanNode(PlanNodeId id, TupleDescriptor desc, OdbcTable tbl) { - super(id, desc, "SCAN ODBC"); + super(id, desc, "SCAN ODBC", NodeType.ODBC_SCAN_NODE); connectString = tbl.getConnectString(); odbcType = tbl.getOdbcTableType(); tblName = OdbcTable.databaseProperName(odbcType, tbl.getOdbcTableName()); diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java index f74666a18c..6a1fc49b03 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java @@ -53,6 +53,7 @@ import org.apache.doris.common.util.Util; import org.apache.doris.qe.ConnectContext; import org.apache.doris.qe.SessionVariable; import org.apache.doris.resource.Tag; +import org.apache.doris.statistics.StatsRecursiveDerive; import org.apache.doris.system.Backend; import org.apache.doris.thrift.TExplainLevel; import org.apache.doris.thrift.TNetworkAddress; @@ -146,7 +147,7 @@ public class OlapScanNode extends ScanNode { // Constructs node to scan given data files of table 'tbl'. public OlapScanNode(PlanNodeId id, TupleDescriptor desc, String planNodeName) { - super(id, desc, planNodeName); + super(id, desc, planNodeName, NodeType.OLAP_SCAN_NODE); olapTable = (OlapTable) desc.getTable(); } @@ -346,10 +347,25 @@ public class OlapScanNode extends ScanNode { * - So only an inaccurate cardinality can be calculated here. */ if (analyzer.safeIsEnableJoinReorderBasedCost()) { + mockRowCountInStatistic(); computeInaccurateCardinality(); } } + /** + * Remove the method after statistics collection is working properly + */ + public void mockRowCountInStatistic() { + long tableId = desc.getTable().getId(); + cardinality = 0; + for (long selectedPartitionId : selectedPartitionIds) { + final Partition partition = olapTable.getPartition(selectedPartitionId); + final MaterializedIndex baseIndex = partition.getBaseIndex(); + cardinality += baseIndex.getRowCount(); + } + Catalog.getCurrentCatalog().getStatisticsManager().getStatistics().mockTableStatsWithRowCount(tableId, cardinality); + } + @Override public void finalize(Analyzer analyzer) throws UserException { LOG.debug("OlapScanNode get scan range locations. Tuple: {}", desc); @@ -386,6 +402,12 @@ public class OlapScanNode extends ScanNode { } // when node scan has no data, cardinality should be 0 instead of a invalid value after computeStats() cardinality = cardinality == -1 ? 0 : cardinality; + + // update statsDeriveResult for real statistics + // After statistics collection is complete, remove the logic + if (analyzer.safeIsEnableJoinReorderBasedCost()) { + statsDeriveResult.setRowCount(cardinality); + } } @Override @@ -397,30 +419,9 @@ public class OlapScanNode extends ScanNode { numNodes = numNodes <= 0 ? 1 : numNodes; } - /** - * Calculate inaccurate cardinality. - * cardinality: the value of cardinality is the sum of rowcount which belongs to selectedPartitionIds - * The cardinality here is actually inaccurate, it will be greater than the actual value. - * There are two reasons - * 1. During the actual execution, not all tablets belonging to the selected partition will be scanned. - * Some tablets may have been pruned before execution. - * 2. The base index may eventually be replaced by mv index. - * <p> - * There are three steps to calculate cardinality - * 1. Calculate how many rows were scanned - * 2. Apply conjunct - * 3. Apply limit - */ - private void computeInaccurateCardinality() { - // step1: Calculate how many rows were scanned - cardinality = 0; - for (long selectedPartitionId : selectedPartitionIds) { - final Partition partition = olapTable.getPartition(selectedPartitionId); - final MaterializedIndex baseIndex = partition.getBaseIndex(); - cardinality += baseIndex.getRowCount(); - } - applyConjunctsSelectivity(); - capCardinalityAtLimit(); + private void computeInaccurateCardinality() throws UserException { + StatsRecursiveDerive.getStatsRecursiveDerive().statsRecursiveDerive(this); + cardinality = statsDeriveResult.getRowCount(); } private Collection<Long> partitionPrune(PartitionInfo partitionInfo, PartitionNames partitionNames) throws AnalysisException { @@ -563,7 +564,7 @@ public class OlapScanNode extends ScanNode { result.add(scanRangeLocations); } - // FIXME(dhc): we use cardinality here to simulate ndv + if (tablets.size() == 0) { desc.setCardinality(0); } else { diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/PlanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/PlanNode.java index 58a0019962..13ac58c40f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/PlanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/PlanNode.java @@ -36,6 +36,7 @@ import org.apache.doris.common.NotImplementedException; import org.apache.doris.common.TreeNode; import org.apache.doris.common.UserException; import org.apache.doris.common.util.VectorizedUtil; +import org.apache.doris.statistics.StatsDeriveResult; import org.apache.doris.thrift.TExplainLevel; import org.apache.doris.thrift.TFunctionBinaryType; import org.apache.doris.thrift.TPlan; @@ -135,6 +136,9 @@ abstract public class PlanNode extends TreeNode<PlanNode> { protected List<SlotId> outputSlotIds; + protected NodeType nodeType = NodeType.DEFAULT; + protected StatsDeriveResult statsDeriveResult; + protected PlanNode(PlanNodeId id, ArrayList<TupleId> tupleIds, String planNodeName) { this.id = id; this.limit = -1; @@ -173,12 +177,41 @@ abstract public class PlanNode extends TreeNode<PlanNode> { this.planNodeName = VectorizedUtil.isVectorized() ? "V" + planNodeName : planNodeName; this.numInstances = 1; + this.nodeType = nodeType; + } + + public enum NodeType { + DEFAULT, + AGG_NODE, + BROKER_SCAN_NODE, + HASH_JOIN_NODE, + HIVE_SCAN_NODE, + MERGE_NODE, + ES_SCAN_NODE, + ICEBREG_SCAN_NODE, + LOAD_SCAN_NODE, + MYSQL_SCAN_NODE, + ODBC_SCAN_NODE, + OLAP_SCAN_NODE, + SCHEMA_SCAN_NODE, } public String getPlanNodeName() { return planNodeName; } + public StatsDeriveResult getStatsDeriveResult() { + return statsDeriveResult; + } + + public NodeType getNodeType() { + return nodeType; + } + + public void setStatsDeriveResult(StatsDeriveResult statsDeriveResult) { + this.statsDeriveResult = statsDeriveResult; + } + /** * Sets tblRefIds_, tupleIds_, and nullableTupleIds_. * The default implementation is a no-op. diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/ScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/ScanNode.java index 8b8c52b5bc..a891f7616f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/ScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/ScanNode.java @@ -65,8 +65,9 @@ abstract public class ScanNode extends PlanNode { protected String sortColumn = null; protected Analyzer analyzer; - public ScanNode(PlanNodeId id, TupleDescriptor desc, String planNodeName) { + public ScanNode(PlanNodeId id, TupleDescriptor desc, String planNodeName, NodeType nodeType) { super(id, desc.getId().asList(), planNodeName); + super.nodeType = nodeType; this.desc = desc; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/SchemaScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/SchemaScanNode.java index c5692c38e8..07152bfd5e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/SchemaScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/SchemaScanNode.java @@ -57,7 +57,7 @@ public class SchemaScanNode extends ScanNode { * Constructs node to scan given data files of table 'tbl'. */ public SchemaScanNode(PlanNodeId id, TupleDescriptor desc) { - super(id, desc, "SCAN SCHEMA"); + super(id, desc, "SCAN SCHEMA", NodeType.SCHEMA_SCAN_NODE); this.tableName = desc.getTable().getName(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java b/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java index 8ad921bf09..a78435a3dd 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java @@ -1725,7 +1725,6 @@ public class SingleNodePlanner { scanNodeList.add(scanNode); scanNode.init(analyzer); - return scanNode; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java b/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java index e913f9cc96..7195591a29 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java @@ -699,14 +699,6 @@ public class StmtExecutor implements ProfileWriter { } if (explainOptions != null) parsedStmt.setIsExplain(explainOptions); } - - if (parsedStmt instanceof InsertStmt && parsedStmt.isExplain()) { - if (ConnectContext.get() != null && - ConnectContext.get().getExecutor() != null && - ConnectContext.get().getExecutor().getParsedStmt() != null) { - ConnectContext.get().getExecutor().getParsedStmt().setIsExplain(new ExplainOptions(true, false)); - } - } } plannerProfile.setQueryAnalysisFinishTime(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseStatsDerive.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseStatsDerive.java new file mode 100644 index 0000000000..ccb58e9287 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseStatsDerive.java @@ -0,0 +1,161 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.statistics; + +import com.google.common.base.Preconditions; +import com.google.common.collect.Lists; +import org.apache.doris.analysis.Expr; +import org.apache.doris.analysis.SlotId; +import org.apache.doris.common.UserException; +import org.apache.doris.planner.PlanNode; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; + +public class BaseStatsDerive { + private static final Logger LOG = LogManager.getLogger(BaseStatsDerive.class); + // estimate of the output rowCount of this node; + // invalid: -1 + protected long rowCount = -1; + protected long limit = -1; + + protected List<Expr> conjuncts = Lists.newArrayList(); + protected List<StatsDeriveResult> childrenStatsResult = Lists.newArrayList(); + + protected void init(PlanNode node) throws UserException { + limit = node.getLimit(); + conjuncts.addAll(node.getConjuncts()); + + for (PlanNode childNode : node.getChildren()) { + StatsDeriveResult result = childNode.getStatsDeriveResult(); + if (result == null) { + throw new UserException("childNode statsDeriveResult is null, childNodeType is " + childNode.getNodeType() + + "parentNodeType is " + node.getNodeType()); + } + childrenStatsResult.add(result); + } + } + + public StatsDeriveResult deriveStats() { + return new StatsDeriveResult(deriveRowCount(), deriveColumnToDataSize(), deriveColumnToNdv()); + } + + public boolean hasLimit() { + return limit > -1; + } + + protected void applyConjunctsSelectivity() { + if (rowCount == -1) { + return; + } + applySelectivity(); + } + + private void applySelectivity() { + double selectivity = computeSelectivity(); + Preconditions.checkState(rowCount >= 0); + long preConjunctrowCount = rowCount; + rowCount = Math.round(rowCount * selectivity); + // don't round rowCount down to zero for safety. + if (rowCount == 0 && preConjunctrowCount > 0) { + rowCount = 1; + } + } + + protected double computeSelectivity() { + for (Expr expr : conjuncts) { + expr.setSelectivity(); + } + return computeCombinedSelectivity(conjuncts); + } + + /** + * Returns the estimated combined selectivity of all conjuncts. Uses heuristics to + * address the following estimation challenges: + * 1. The individual selectivities of conjuncts may be unknown. + * 2. Two selectivities, whether known or unknown, could be correlated. Assuming + * independence can lead to significant underestimation. + * <p> + * The first issue is addressed by using a single default selectivity that is + * representative of all conjuncts with unknown selectivities. + * The second issue is addressed by an exponential backoff when multiplying each + * additional selectivity into the final result. + */ + protected double computeCombinedSelectivity(List<Expr> conjuncts) { + // Collect all estimated selectivities. + List<Double> selectivities = new ArrayList<>(); + for (Expr e : conjuncts) { + if (e.hasSelectivity()) selectivities.add(e.getSelectivity()); + } + if (selectivities.size() != conjuncts.size()) { + // Some conjuncts have no estimated selectivity. Use a single default + // representative selectivity for all those conjuncts. + selectivities.add(Expr.DEFAULT_SELECTIVITY); + } + // Sort the selectivities to get a consistent estimate, regardless of the original + // conjunct order. Sort in ascending order such that the most selective conjunct + // is fully applied. + Collections.sort(selectivities); + double result = 1.0; + // selectivity = 1 * (s1)^(1/1) * (s2)^(1/2) * ... * (sn-1)^(1/(n-1)) * (sn)^(1/n) + for (int i = 0; i < selectivities.size(); ++i) { + // Exponential backoff for each selectivity multiplied into the final result. + result *= Math.pow(selectivities.get(i), 1.0 / (double) (i + 1)); + } + // Bound result in [0, 1] + return Math.max(0.0, Math.min(1.0, result)); + } + + protected void capRowCountAtLimit() { + if (hasLimit()) { + rowCount = rowCount == -1 ? limit : Math.min(rowCount, limit); + } + } + + + // Currently it simply adds the number of rows of children + protected long deriveRowCount() { + for (StatsDeriveResult statsDeriveResult : childrenStatsResult) { + rowCount = Math.max(rowCount, statsDeriveResult.getRowCount()); + } + applyConjunctsSelectivity(); + capRowCountAtLimit(); + return rowCount; + } + + + protected HashMap<SlotId, Float> deriveColumnToDataSize() { + HashMap<SlotId, Float> columnToDataSize = new HashMap<>(); + for (StatsDeriveResult child : childrenStatsResult) { + columnToDataSize.putAll(child.getColumnToDataSize()); + } + return columnToDataSize; + } + + protected HashMap<SlotId, Long> deriveColumnToNdv() { + HashMap<SlotId, Long> columnToNdv = new HashMap<>(); + for (StatsDeriveResult child : childrenStatsResult) { + columnToNdv.putAll(child.getColumnToNdv()); + } + return columnToNdv; + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/DeriveFactory.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/DeriveFactory.java new file mode 100644 index 0000000000..d663bf5e08 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/DeriveFactory.java @@ -0,0 +1,36 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.statistics; + +import org.apache.doris.planner.PlanNode; + +public class DeriveFactory { + + public BaseStatsDerive getStatsDerive(PlanNode.NodeType nodeType) { + switch (nodeType) { + case AGG_NODE: + case HASH_JOIN_NODE: + case MERGE_NODE: + break; + case OLAP_SCAN_NODE: + return new OlapScanStatsDerive(); + case DEFAULT: + } + return new BaseStatsDerive(); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapScanStatsDerive.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapScanStatsDerive.java new file mode 100644 index 0000000000..ff514aa55e --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapScanStatsDerive.java @@ -0,0 +1,112 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.statistics; + +import com.google.common.base.Preconditions; +import org.apache.doris.analysis.SlotDescriptor; +import org.apache.doris.analysis.SlotId; +import org.apache.doris.catalog.Catalog; +import org.apache.doris.common.Pair; +import org.apache.doris.common.UserException; +import org.apache.doris.planner.OlapScanNode; +import org.apache.doris.planner.PlanNode; + +import java.util.HashMap; +import java.util.Map; + +public class OlapScanStatsDerive extends BaseStatsDerive { + // Currently, due to the structure of doris, + // the selected materialized view is not determined when calculating the statistical information of scan, + // so baseIndex is used for calculation when generating Planner. + + // The rowCount here is the number of rows. + private long inputRowCount = -1; + private Map<SlotId, Float> slotIdToDataSize; + private Map<SlotId, Long> slotIdToNdv; + private Map<SlotId, Pair<Long, String>> slotIdToTableIdAndColumnName; + + @Override + public void init(PlanNode node) throws UserException { + Preconditions.checkState(node instanceof OlapScanNode); + super.init(node); + buildStructure((OlapScanNode)node); + } + + @Override + public StatsDeriveResult deriveStats() { + /** + * Compute InAccurate cardinality before mv selector and tablet pruning. + * - Accurate statistical information relies on the selector of materialized views and bucket reduction. + * - However, Those both processes occur after the reorder algorithm is completed. + * - When Join reorder is turned on, the cardinality must be calculated before the reorder algorithm. + * - So only an inaccurate cardinality can be calculated here. + */ + rowCount = inputRowCount; + for (Map.Entry<SlotId, Pair<Long, String>> pairEntry : slotIdToTableIdAndColumnName.entrySet()) { + Pair<Long, Float> ndvAndDataSize = getNdvAndDataSizeFromStatistics(pairEntry.getValue()); + long ndv = ndvAndDataSize.first; + float dataSize = ndvAndDataSize.second; + slotIdToNdv.put(pairEntry.getKey(), ndv); + slotIdToDataSize.put(pairEntry.getKey(), dataSize); + } + return new StatsDeriveResult(deriveRowCount(), slotIdToDataSize, slotIdToNdv); + } + + public void buildStructure(OlapScanNode node) { + slotIdToDataSize = new HashMap<>(); + slotIdToNdv = new HashMap<>(); + if (node.getTupleDesc() != null + && node.getTupleDesc().getTable() != null) { + long tableId = node.getTupleDesc().getTable().getId(); + inputRowCount = Catalog.getCurrentCatalog().getStatisticsManager() + .getStatistics().getTableStats(tableId).getRowCount(); + } + for (SlotDescriptor slot : node.getTupleDesc().getSlots()) { + if (!slot.isMaterialized()) { + continue; + } + + long tableId = slot.getParent().getTable().getId(); + String columnName = slot.getColumn().getName(); + slotIdToTableIdAndColumnName.put(slot.getId(), new Pair<>(tableId, columnName)); + } + } + + //TODO:Implement the getStatistics interface + //now there is nothing in statistics, need to wait for collection finished + public Pair<Long, Float> getNdvAndDataSizeFromStatistics(Pair<Long, String> pair) { + long ndv = -1; + float dataSize = -1; + /* + if (Catalog.getCurrentCatalog() + .getStatisticsManager() + .getStatistics() + .getColumnStats(pair.first) != null) { + ndv = Catalog.getCurrentCatalog() + .getStatisticsManager() + .getStatistics() + .getColumnStats(pair.first).get(pair.second).getNdv(); + dataSize = Catalog.getCurrentCatalog() + .getStatisticsManager() + .getStatistics() + .getColumnStats(pair.first).get(pair.second).getDataSize(); + } + */ + return new Pair<>(ndv, dataSize); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java index b17a37858d..58003de90c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java @@ -70,4 +70,17 @@ public class Statistics { } return tableStats.getNameToColumnStats(); } + + // TODO: mock statistics need to be removed in the future + public void mockTableStatsWithRowCount(long tableId, long rowCount) { + TableStats tableStats = idToTableStats.get(tableId); + if (tableStats == null) { + tableStats = new TableStats(); + idToTableStats.put(tableId, tableStats); + } + + if (tableStats.getRowCount() != rowCount) { + tableStats.setRowCount(rowCount); + } + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsManager.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsManager.java index e994cce212..b1c88e36a5 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsManager.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsManager.java @@ -136,4 +136,8 @@ public class StatisticsManager { Table table = db.getTableOrAnalysisException(tableName); return table; } + + public Statistics getStatistics() { + return statistics; + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsDeriveResult.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsDeriveResult.java new file mode 100644 index 0000000000..b9b0d8024e --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsDeriveResult.java @@ -0,0 +1,56 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.statistics; + +import com.google.common.collect.Maps; +import org.apache.doris.analysis.SlotId; + +import java.util.Map; + +// This structure is maintained in each operator to store the statistical information results obtained by the operator. +public class StatsDeriveResult { + private long rowCount = -1; + // The data size of the corresponding column in the operator + // The actual key is slotId + private final Map<SlotId, Float> columnToDataSize = Maps.newHashMap(); + // The ndv of the corresponding column in the operator + // The actual key is slotId + private final Map<SlotId, Long> columnToNdv = Maps.newHashMap(); + + public StatsDeriveResult(long rowCount, Map<SlotId, Float> columnToDataSize, Map<SlotId, Long> columnToNdv) { + this.rowCount = rowCount; + this.columnToDataSize.putAll(columnToDataSize); + this.columnToNdv.putAll(columnToNdv); + } + + public void setRowCount(long rowCount) { + this.rowCount = rowCount; + } + + public long getRowCount() { + return rowCount; + } + + public Map<SlotId, Long> getColumnToNdv() { + return columnToNdv; + } + + public Map<SlotId, Float> getColumnToDataSize() { + return columnToDataSize; + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsRecursiveDerive.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsRecursiveDerive.java new file mode 100644 index 0000000000..e6159a594d --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsRecursiveDerive.java @@ -0,0 +1,56 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.statistics; + +import org.apache.doris.common.UserException; +import org.apache.doris.planner.PlanNode; + + +public class StatsRecursiveDerive { + private StatsRecursiveDerive() {} + + public static StatsRecursiveDerive getStatsRecursiveDerive() { + return Inner.INSTANCE; + } + + private static class Inner { + private static final StatsRecursiveDerive INSTANCE = new StatsRecursiveDerive(); + } + + /** + * Recursively complete the derivation of statistics for this node and all its children + * @param node + * This parameter is an input and output parameter, + * which will store the derivation result of statistical information in the corresponding node + */ + public void statsRecursiveDerive(PlanNode node) throws UserException { + if (node.getStatsDeriveResult() != null) { + return; + } + for (PlanNode childNode : node.getChildren()) { + if (childNode.getStatsDeriveResult() == null) { + statsRecursiveDerive(childNode); + } + } + DeriveFactory deriveFactory = new DeriveFactory(); + BaseStatsDerive deriveStats = deriveFactory.getStatsDerive(node.getNodeType()); + deriveStats.init(node); + StatsDeriveResult result = deriveStats.deriveStats(); + node.setStatsDeriveResult(result); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStats.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStats.java index efb35bbe10..ef494bd9f6 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStats.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStats.java @@ -94,4 +94,16 @@ public class TableStats { public Map<String, ColumnStats> getNameToColumnStats() { return nameToColumnStats; } + + public long getRowCount() { + return rowCount; + } + + public long getDataSize() { + return dataSize; + } + + public void setRowCount(long rowCount) { + this.rowCount = rowCount; + } } diff --git a/fe/fe-core/src/test/java/org/apache/doris/analysis/ExplainTest.java b/fe/fe-core/src/test/java/org/apache/doris/analysis/ExplainTest.java index 3fca5643a5..f94e69b142 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/analysis/ExplainTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/analysis/ExplainTest.java @@ -72,20 +72,27 @@ public class ExplainTest { Assert.assertEquals(dropDbStmt.toSql(), dropSchemaStmt.toSql()); } - public void testExplainSelect() throws Exception { - String sql = "explain select * from test_explain.explain_t1 where dt = '1001';"; + public void testExplainInsertInto() throws Exception { + String sql = "explain insert into test_explain.explain_t1 select * from test_explain.explain_t2"; String explainString = UtFrameUtils.getSQLPlanOrErrorMsg(ctx, sql, false); System.out.println(explainString); Assert.assertFalse(explainString.contains("CAST")); } - public void testExplainInsertInto() throws Exception { + public void testExplainVerboseInsertInto() throws Exception { String sql = "explain verbose insert into test_explain.explain_t1 select * from test_explain.explain_t2"; String explainString = UtFrameUtils.getSQLPlanOrErrorMsg(ctx, sql, true); System.out.println(explainString); Assert.assertTrue(explainString.contains("CAST")); } + public void testExplainSelect() throws Exception { + String sql = "explain select * from test_explain.explain_t1 where dt = '1001';"; + String explainString = UtFrameUtils.getSQLPlanOrErrorMsg(ctx, sql, false); + System.out.println(explainString); + Assert.assertFalse(explainString.contains("CAST")); + } + public void testExplainVerboseSelect() throws Exception { String queryStr = "explain verbose select * from test_explain.explain_t1 where dt = '1001';"; String explainString = UtFrameUtils.getSQLPlanOrErrorMsg(ctx, queryStr, true); diff --git a/fe/fe-core/src/test/java/org/apache/doris/planner/QueryPlanTest.java b/fe/fe-core/src/test/java/org/apache/doris/planner/QueryPlanTest.java index ee09be1d8a..62b741b61b 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/planner/QueryPlanTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/planner/QueryPlanTest.java @@ -1991,8 +1991,9 @@ public class QueryPlanTest { public void testExplainInsertInto() throws Exception { ExplainTest explainTest = new ExplainTest(); explainTest.before(connectContext); - explainTest.testExplainSelect(); explainTest.testExplainInsertInto(); + explainTest.testExplainVerboseInsertInto(); + explainTest.testExplainSelect(); explainTest.testExplainVerboseSelect(); explainTest.testExplainConcatSelect(); explainTest.testExplainVerboseConcatSelect(); @@ -2088,7 +2089,7 @@ public class QueryPlanTest { "\"storage_medium\" = \"HDD\",\n" + "\"storage_format\" = \"V2\"\n" + ");\n"); - String queryStr = "EXPLAIN INSERT INTO result_exprs\n" + + String queryStr = "EXPLAIN VERBOSE INSERT INTO result_exprs\n" + "SELECT a.aid,\n" + " b.bid\n" + "FROM\n" + @@ -2098,7 +2099,7 @@ public class QueryPlanTest { String explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, queryStr); Assert.assertFalse(explainString.contains("OUTPUT EXPRS:3 | 4")); System.out.println(explainString); - Assert.assertTrue(explainString.contains("OUTPUT EXPRS:`a`.`aid` | 4")); + Assert.assertTrue(explainString.contains("OUTPUT EXPRS:CAST(`a`.`aid` AS INT) | 4")); } @Test --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org