This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch dev-1.1.2 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/dev-1.1.2 by this push: new a746b2675e [cherry-pick 1.1.2](planner) Fix wrong planner with count(*) optmizer for cross join optimization (#11621) a746b2675e is described below commit a746b2675ead5cd4b24e2dd2f3893ba1c7946677 Author: Kikyou1997 <33112463+kikyou1...@users.noreply.github.com> AuthorDate: Tue Aug 9 16:23:50 2022 +0800 [cherry-pick 1.1.2](planner) Fix wrong planner with count(*) optmizer for cross join optimization (#11621) --- .../org/apache/doris/analysis/AggregateInfo.java | 6 -- .../apache/doris/analysis/FunctionCallExpr.java | 17 ----- .../org/apache/doris/analysis/InlineViewRef.java | 2 +- .../org/apache/doris/analysis/SlotDescriptor.java | 15 ++++ .../apache/doris/planner/SingleNodePlanner.java | 30 ++++++++ .../java/org/apache/doris/planner/SortNode.java | 5 +- .../data/aggregate/aggregate_count1.out | 4 ++ .../suites/query/aggregate/aggregate_count1.groovy | 82 ++++++++++++++++++++++ 8 files changed, 134 insertions(+), 27 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/AggregateInfo.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/AggregateInfo.java index b599f36909..128e60df7a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/AggregateInfo.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/AggregateInfo.java @@ -104,11 +104,6 @@ public final class AggregateInfo extends AggregateInfoBase { // if set, a subset of groupingExprs_; set and used during planning private List<Expr> partitionExprs_; - // indices into aggregateExprs for those that need to be materialized; - // shared between this, mergeAggInfo and secondPhaseDistinctAggInfo - private ArrayList<Integer> materializedAggregateSlots_ = Lists.newArrayList(); - // if true, this AggregateInfo is the first phase of a 2-phase DISTINCT computation - private boolean isDistinctAgg = false; // If true, the sql has MultiDistinct private boolean isMultiDistinct_; @@ -309,7 +304,6 @@ public final class AggregateInfo extends AggregateInfoBase { } this.isMultiDistinct_= estimateIfContainsMultiDistinct(distinctAggExprs); - isDistinctAgg = true; // add DISTINCT parameters to grouping exprs if (!isMultiDistinct_) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java index 80a7d631f7..0021a1cf9b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java @@ -330,23 +330,6 @@ public class FunctionCallExpr extends Expr { return fnParams.isDistinct(); } - public boolean isCountStar() { - if (fnName.getFunction().equalsIgnoreCase(FunctionSet.COUNT)) { - if (fnParams.isStar()) { - return true; - } else if (fnParams.exprs() == null || fnParams.exprs().isEmpty()) { - return true; - } else { - for (Expr expr : fnParams.exprs()) { - if (expr.isConstant()) { - return true; - } - } - } - } - return false; - } - public boolean isCountDistinctBitmapOrHLL() { if (!fnParams.isDistinct()) { return false; diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/InlineViewRef.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/InlineViewRef.java index 01791ed17f..ecc5c4360d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/InlineViewRef.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/InlineViewRef.java @@ -198,7 +198,6 @@ public class InlineViewRef extends TableRef { desc.setIsMaterialized(true); materializedTupleIds.add(desc.getId()); } - // create sMap and baseTblSmap and register auxiliary eq predicates between our // tuple descriptor's slots and our *unresolved* select list exprs; // we create these auxiliary predicates so that the analyzer can compute the value @@ -214,6 +213,7 @@ public class InlineViewRef extends TableRef { String colName = getColLabels().get(i); SlotDescriptor slotDesc = analyzer.registerColumnRef(getAliasAsName(), colName); Expr colExpr = queryStmt.getResultExprs().get(i); + slotDesc.setSourceExpr(colExpr); SlotRef slotRef = new SlotRef(slotDesc); sMap.put(slotRef, colExpr); baseTblSmap.put(slotRef, queryStmt.getBaseTblResultExprs().get(i)); diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/SlotDescriptor.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/SlotDescriptor.java index 8081ff57e0..939aa05874 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/SlotDescriptor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/SlotDescriptor.java @@ -157,6 +157,21 @@ public class SlotDescriptor { isMaterialized = value; } + public void materializeSrcExpr() { + if (sourceExprs_ == null) { + return; + } + for (Expr expr : sourceExprs_) { + if (!(expr instanceof SlotRef)) { + continue; + } + SlotRef slotRef = (SlotRef) expr; + SlotDescriptor slotDesc = slotRef.getDesc(); + slotDesc.setIsMaterialized(true); + slotDesc.materializeSrcExpr(); + } + } + public boolean getIsNullable() { return isNullable; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java b/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java index 305a44fe4e..21f58d9b8b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java @@ -1017,9 +1017,38 @@ public class SingleNodePlanner { for (int i = 1; i < selectStmt.getTableRefs().size(); ++i) { TableRef innerRef = selectStmt.getTableRefs().get(i); + // Optimization of CountStar would change the materialization of some outputSlot, + // it would cause the nullability inconsistent of SlotRef between outputTupleDesc + // and agg/groupBy expr. So if some not materialized slots in outputTuple of aggInfo + // is set to materialized after the optimization of CountStar, we need to call + // aggregateInfo.materializeRequiredSlots again to make sure all required slots is + // materialized. + boolean aggOutputNotHaveMaterializedSlot = false; + AggregateInfo aggregateInfo = null; + TupleDescriptor output = null; + if (innerRef instanceof InlineViewRef) { + InlineViewRef inlineViewRef = (InlineViewRef) innerRef; + QueryStmt queryStmt = inlineViewRef.getViewStmt(); + if (queryStmt instanceof SelectStmt) { + aggregateInfo = ((SelectStmt) queryStmt).getAggInfo(); + if (aggregateInfo != null) { + output = aggregateInfo.getOutputTupleDesc(); + aggOutputNotHaveMaterializedSlot = + output.getSlots().stream().noneMatch(SlotDescriptor::isMaterialized); + } + } + } root = createJoinNode(analyzer, root, innerRef, selectStmt); // Have the build side of a join copy data to a compact representation // in the tuple buffer. + if (aggOutputNotHaveMaterializedSlot + && aggregateInfo + .getOutputTupleDesc() + .getSlots() + .stream() + .anyMatch(SlotDescriptor::isMaterialized)) { + aggregateInfo.materializeRequiredSlots(analyzer, null); + } root.getChildren().get(1).setCompactData(true); root.assignConjuncts(analyzer); } @@ -2260,6 +2289,7 @@ public class SingleNodePlanner { for (SlotId id : slotIds) { final SlotDescriptor slot = analyzer.getDescTbl().getSlotDesc(id); slot.setIsMaterialized(true); + slot.materializeSrcExpr(); } for (TupleId id : tupleIds) { final TupleDescriptor tuple = analyzer.getDescTbl().getTupleDesc(id); diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/SortNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/SortNode.java index c4ac2c7760..f578385d9e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/SortNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/SortNode.java @@ -47,6 +47,8 @@ import java.util.Set; /** * Sorting. + * Attention, in some corner case, we need enable projection planner to promise the correctness of the Plan. + * Please refer to this regression test:regression-test/suites/query/aggregate/aggregate_count1.groovy. */ public class SortNode extends PlanNode { private final static Logger LOG = LogManager.getLogger(SortNode.class); @@ -240,9 +242,6 @@ public class SortNode extends PlanNode { outputSmap = new ExprSubstitutionMap(); for (int i = 0; i < slotExprs.size(); ++i) { - if (!sortTupleSlots.get(i).isMaterialized()) { - continue; - } resolvedTupleExprs.add(slotExprs.get(i)); outputSmap.put(slotExprs.get(i), new SlotRef(sortTupleSlots.get(i))); } diff --git a/regression-test/data/aggregate/aggregate_count1.out b/regression-test/data/aggregate/aggregate_count1.out new file mode 100644 index 0000000000..84f6fe8329 --- /dev/null +++ b/regression-test/data/aggregate/aggregate_count1.out @@ -0,0 +1,4 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !select -- +8 + diff --git a/regression-test/suites/query/aggregate/aggregate_count1.groovy b/regression-test/suites/query/aggregate/aggregate_count1.groovy new file mode 100644 index 0000000000..eaf4efb62f --- /dev/null +++ b/regression-test/suites/query/aggregate/aggregate_count1.groovy @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("aggregate_count1", "query") { + sql "create table aggregate_count1 (\n" + + "\n" + + " name varchar(128) ,\n" + + "\n" + + " age INT ,\n" + + "\n" + + " identityCode varchar(128) ,\n" + + "\n" + + " cardNo String ,\n" + + "\n" + + " number String ,\n" + + "\n" + + " birthday DATETIME ,\n" + + "\n" + + " country String ,\n" + + "\n" + + " gender String ,\n" + + "\n" + + " covid BOOLEAN \n" + + "\n" + + ")UNIQUE KEY(name,age,`identityCode`)\n" + + "\n" + + "DISTRIBUTED BY HASH(`identityCode`) BUCKETS 10\n" + + "\n" + + "PROPERTIES(\"replication_num\" = \"1\");\n" + + "\n" + sql "insert into aggregate_count1 values ('张三0',11,'1234567','123','321312','1999-02-13','中国','男',false)," + + "('张三1',11,'12345678','123','321312','1999-02-13','中国','男',false)," + + "('张三2',11,'12345671','123','321312','1999-02-13','中国','男',false)," + + "('张三3',11,'12345673','123','321312','1999-02-13','中国','男',false)," + + "('张三4',11,'123456711','123','321312','1999-02-13','中国','男',false)," + + "('张三5',11,'1232134567','123','321312','1999-02-13','中国','男',false)," + + "('张三6',11,'124314567','123','321312','1999-02-13','中国','男',false)," + + "('张三7',11,'123445167','123','321312','1998-02-13','中国','男',false);" + qt_select "SELECT count(1) FROM (WITH t1 AS (\n" + + " WITH t AS (\n" + + " SELECT * FROM aggregate_count1\n" + + " )\n" + + " SELECT\n" + + " identityCode,\n" + + " COUNT(1) as dataAmount,\n" + + " ROUND(COUNT(1) / tableWithSum.sumResult,4) as proportion,\n" + + " MD5(identityCode) as virtuleUniqKey\n" + + " FROM t,(SELECT COUNT(1) as sumResult from t) tableWithSum\n" + + " GROUP BY identityCode ,tableWithSum.sumResult\n" + + ")\n" + + "SELECT\n" + + " identityCode,dataAmount,\n" + + " (\n" + + " CASE\n" + + " WHEN t1.virtuleUniqKey = tableWithMaxId.max_virtuleUniqKey THEN\n" + + " ROUND(proportion + calcTheTail, 4)\n" + + " ELSE\n" + + " proportion\n" + + " END\n" + + " ) proportion\n" + + "FROM t1,\n" + + " (SELECT (1 - sum(t1.proportion)) as calcTheTail FROM t1 ) tableWithTail,\n" + + " (SELECT virtuleUniqKey as max_virtuleUniqKey FROM t1 ORDER BY proportion DESC LIMIT 1 ) tableWithMaxId\n" + + "ORDER BY identityCode) t_a76fe3e829ddb51;" + sql "drop table aggregate_count1" +} \ No newline at end of file --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org