[doris] branch dev-1.1.2 updated: [cherry-pick 1.1.2](planner) Fix wrong planner with count(*) optmizer for cross join optimization (#11621)

yiguolei Tue, 09 Aug 2022 01:24:33 -0700

This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch dev-1.1.2
in repository https://gitbox.apache.org/repos/asf/doris.git



The following commit(s) were added to refs/heads/dev-1.1.2 by this push:
     new a746b2675e [cherry-pick 1.1.2](planner) Fix wrong planner with 
count(*) optmizer for cross join optimization (#11621)
a746b2675e is described below

commit a746b2675ead5cd4b24e2dd2f3893ba1c7946677
Author: Kikyou1997 <33112463+kikyou1...@users.noreply.github.com>
AuthorDate: Tue Aug 9 16:23:50 2022 +0800

    [cherry-pick 1.1.2](planner) Fix wrong planner with count(*) optmizer for 
cross join optimization (#11621)
---
 .../org/apache/doris/analysis/AggregateInfo.java   |  6 --
 .../apache/doris/analysis/FunctionCallExpr.java    | 17 -----
 .../org/apache/doris/analysis/InlineViewRef.java   |  2 +-
 .../org/apache/doris/analysis/SlotDescriptor.java  | 15 ++++
 .../apache/doris/planner/SingleNodePlanner.java    | 30 ++++++++
 .../java/org/apache/doris/planner/SortNode.java    |  5 +-
 .../data/aggregate/aggregate_count1.out            |  4 ++
 .../suites/query/aggregate/aggregate_count1.groovy | 82 ++++++++++++++++++++++
 8 files changed, 134 insertions(+), 27 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/analysis/AggregateInfo.java 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/AggregateInfo.java
index b599f36909..128e60df7a 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/AggregateInfo.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/AggregateInfo.java
@@ -104,11 +104,6 @@ public final class AggregateInfo extends AggregateInfoBase 
{
     // if set, a subset of groupingExprs_; set and used during planning
     private List<Expr> partitionExprs_;
 
-    // indices into aggregateExprs for those that need to be materialized;
-    // shared between this, mergeAggInfo and secondPhaseDistinctAggInfo
-    private ArrayList<Integer> materializedAggregateSlots_ = 
Lists.newArrayList();
-    // if true, this AggregateInfo is the first phase of a 2-phase DISTINCT 
computation
-    private boolean isDistinctAgg = false;
     // If true, the sql has MultiDistinct
     private boolean isMultiDistinct_;
 
@@ -309,7 +304,6 @@ public final class AggregateInfo extends AggregateInfoBase {
         }
 
         this.isMultiDistinct_= 
estimateIfContainsMultiDistinct(distinctAggExprs);
-        isDistinctAgg = true;
 
         // add DISTINCT parameters to grouping exprs
         if (!isMultiDistinct_) {
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java
index 80a7d631f7..0021a1cf9b 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java
@@ -330,23 +330,6 @@ public class FunctionCallExpr extends Expr {
         return fnParams.isDistinct();
     }
 
-    public boolean isCountStar() {
-        if (fnName.getFunction().equalsIgnoreCase(FunctionSet.COUNT)) {
-            if (fnParams.isStar()) {
-                return true;
-            } else if (fnParams.exprs() == null || fnParams.exprs().isEmpty()) 
{
-                return true;
-            } else {
-                for (Expr expr : fnParams.exprs()) {
-                    if (expr.isConstant()) {
-                        return true;
-                    }
-                }
-            }
-        }
-        return false;
-    }
-
     public boolean isCountDistinctBitmapOrHLL() {
         if (!fnParams.isDistinct()) {
             return false;
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/analysis/InlineViewRef.java 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/InlineViewRef.java
index 01791ed17f..ecc5c4360d 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/InlineViewRef.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/InlineViewRef.java
@@ -198,7 +198,6 @@ public class InlineViewRef extends TableRef {
             desc.setIsMaterialized(true);
             materializedTupleIds.add(desc.getId());
         }
-
         // create sMap and baseTblSmap and register auxiliary eq predicates 
between our
         // tuple descriptor's slots and our *unresolved* select list exprs;
         // we create these auxiliary predicates so that the analyzer can 
compute the value
@@ -214,6 +213,7 @@ public class InlineViewRef extends TableRef {
             String colName = getColLabels().get(i);
             SlotDescriptor slotDesc = 
analyzer.registerColumnRef(getAliasAsName(), colName);
             Expr colExpr = queryStmt.getResultExprs().get(i);
+            slotDesc.setSourceExpr(colExpr);
             SlotRef slotRef = new SlotRef(slotDesc);
             sMap.put(slotRef, colExpr);
             baseTblSmap.put(slotRef, queryStmt.getBaseTblResultExprs().get(i));
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/analysis/SlotDescriptor.java 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/SlotDescriptor.java
index 8081ff57e0..939aa05874 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/SlotDescriptor.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/SlotDescriptor.java
@@ -157,6 +157,21 @@ public class SlotDescriptor {
         isMaterialized = value;
     }
 
+    public void materializeSrcExpr() {
+        if (sourceExprs_ == null) {
+            return;
+        }
+        for (Expr expr : sourceExprs_) {
+            if (!(expr instanceof SlotRef)) {
+                continue;
+            }
+            SlotRef slotRef = (SlotRef) expr;
+            SlotDescriptor slotDesc = slotRef.getDesc();
+            slotDesc.setIsMaterialized(true);
+            slotDesc.materializeSrcExpr();
+        }
+    }
+
     public boolean getIsNullable() {
         return isNullable;
     }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java 
b/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java
index 305a44fe4e..21f58d9b8b 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java
@@ -1017,9 +1017,38 @@ public class SingleNodePlanner {
 
             for (int i = 1; i < selectStmt.getTableRefs().size(); ++i) {
                 TableRef innerRef = selectStmt.getTableRefs().get(i);
+                // Optimization of CountStar would change the materialization 
of some outputSlot,
+                // it would cause the nullability inconsistent of SlotRef 
between outputTupleDesc
+                // and agg/groupBy expr. So if some not materialized slots in 
outputTuple of aggInfo
+                // is set to materialized after the optimization of CountStar, 
we need to call
+                // aggregateInfo.materializeRequiredSlots again to make sure 
all required slots is
+                // materialized.
+                boolean aggOutputNotHaveMaterializedSlot = false;
+                AggregateInfo aggregateInfo = null;
+                TupleDescriptor output = null;
+                if (innerRef instanceof InlineViewRef) {
+                    InlineViewRef inlineViewRef = (InlineViewRef) innerRef;
+                    QueryStmt queryStmt = inlineViewRef.getViewStmt();
+                    if (queryStmt instanceof SelectStmt) {
+                        aggregateInfo = ((SelectStmt) queryStmt).getAggInfo();
+                        if (aggregateInfo != null) {
+                            output = aggregateInfo.getOutputTupleDesc();
+                            aggOutputNotHaveMaterializedSlot =
+                                    
output.getSlots().stream().noneMatch(SlotDescriptor::isMaterialized);
+                        }
+                    }
+                }
                 root = createJoinNode(analyzer, root, innerRef, selectStmt);
                 // Have the build side of a join copy data to a compact 
representation
                 // in the tuple buffer.
+                if (aggOutputNotHaveMaterializedSlot
+                        && aggregateInfo
+                        .getOutputTupleDesc()
+                        .getSlots()
+                        .stream()
+                        .anyMatch(SlotDescriptor::isMaterialized)) {
+                    aggregateInfo.materializeRequiredSlots(analyzer, null);
+                }
                 root.getChildren().get(1).setCompactData(true);
                 root.assignConjuncts(analyzer);
             }
@@ -2260,6 +2289,7 @@ public class SingleNodePlanner {
         for (SlotId id : slotIds) {
             final SlotDescriptor slot = analyzer.getDescTbl().getSlotDesc(id);
             slot.setIsMaterialized(true);
+            slot.materializeSrcExpr();
         }
         for (TupleId id : tupleIds) {
             final TupleDescriptor tuple = 
analyzer.getDescTbl().getTupleDesc(id);
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/SortNode.java 
b/fe/fe-core/src/main/java/org/apache/doris/planner/SortNode.java
index c4ac2c7760..f578385d9e 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/SortNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/SortNode.java
@@ -47,6 +47,8 @@ import java.util.Set;
 
 /**
  * Sorting.
+ * Attention, in some corner case, we need enable projection planner to 
promise the correctness of the Plan.
+ * Please refer to this regression 
test:regression-test/suites/query/aggregate/aggregate_count1.groovy.
  */
 public class SortNode extends PlanNode {
     private final static Logger LOG = LogManager.getLogger(SortNode.class);
@@ -240,9 +242,6 @@ public class SortNode extends PlanNode {
         outputSmap = new ExprSubstitutionMap();
 
         for (int i = 0; i < slotExprs.size(); ++i) {
-            if (!sortTupleSlots.get(i).isMaterialized()) {
-                continue;
-            }
             resolvedTupleExprs.add(slotExprs.get(i));
             outputSmap.put(slotExprs.get(i), new 
SlotRef(sortTupleSlots.get(i)));
         }
diff --git a/regression-test/data/aggregate/aggregate_count1.out 
b/regression-test/data/aggregate/aggregate_count1.out
new file mode 100644
index 0000000000..84f6fe8329
--- /dev/null
+++ b/regression-test/data/aggregate/aggregate_count1.out
@@ -0,0 +1,4 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !select --
+8
+
diff --git a/regression-test/suites/query/aggregate/aggregate_count1.groovy 
b/regression-test/suites/query/aggregate/aggregate_count1.groovy
new file mode 100644
index 0000000000..eaf4efb62f
--- /dev/null
+++ b/regression-test/suites/query/aggregate/aggregate_count1.groovy
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+suite("aggregate_count1", "query") {
+    sql "create table aggregate_count1 (\n" +
+            "\n" +
+            "    name varchar(128) ,\n" +
+            "\n" +
+            "    age INT ,\n" +
+            "\n" +
+            "    identityCode  varchar(128) ,\n" +
+            "\n" +
+            "    cardNo String ,\n" +
+            "\n" +
+            "    number String ,\n" +
+            "\n" +
+            "    birthday DATETIME ,\n" +
+            "\n" +
+            "    country String ,\n" +
+            "\n" +
+            "    gender String ,\n" +
+            "\n" +
+            "    covid BOOLEAN \n" +
+            "\n" +
+            ")UNIQUE KEY(name,age,`identityCode`)\n" +
+            "\n" +
+            "DISTRIBUTED BY HASH(`identityCode`) BUCKETS 10\n" +
+            "\n" +
+            "PROPERTIES(\"replication_num\" = \"1\");\n" +
+            "\n"
+    sql "insert into aggregate_count1 values 
('张三0',11,'1234567','123','321312','1999-02-13','中国','男',false)," +
+            
"('张三1',11,'12345678','123','321312','1999-02-13','中国','男',false)," +
+            
"('张三2',11,'12345671','123','321312','1999-02-13','中国','男',false)," +
+            
"('张三3',11,'12345673','123','321312','1999-02-13','中国','男',false)," +
+            
"('张三4',11,'123456711','123','321312','1999-02-13','中国','男',false)," +
+            
"('张三5',11,'1232134567','123','321312','1999-02-13','中国','男',false)," +
+            
"('张三6',11,'124314567','123','321312','1999-02-13','中国','男',false)," +
+            
"('张三7',11,'123445167','123','321312','1998-02-13','中国','男',false);"
+    qt_select "SELECT count(1) FROM (WITH t1 AS (\n" +
+            "     WITH t AS (\n" +
+            "                SELECT * FROM aggregate_count1\n" +
+            "        )\n" +
+            "        SELECT\n" +
+            "                identityCode,\n" +
+            "                COUNT(1) as dataAmount,\n" +
+            "                ROUND(COUNT(1) / tableWithSum.sumResult,4) as 
proportion,\n" +
+            "               MD5(identityCode) as virtuleUniqKey\n" +
+            "        FROM t,(SELECT COUNT(1) as sumResult from t) 
tableWithSum\n" +
+            "        GROUP BY identityCode ,tableWithSum.sumResult\n" +
+            ")\n" +
+            "SELECT\n" +
+            "        identityCode,dataAmount,\n" +
+            "        (\n" +
+            "                CASE\n" +
+            "                WHEN t1.virtuleUniqKey = 
tableWithMaxId.max_virtuleUniqKey THEN\n" +
+            "                        ROUND(proportion + calcTheTail, 4)\n" +
+            "                ELSE\n" +
+            "                        proportion\n" +
+            "              END\n" +
+            "        ) proportion\n" +
+            "FROM t1,\n" +
+            "        (SELECT (1 - sum(t1.proportion)) as calcTheTail FROM t1 ) 
tableWithTail,\n" +
+            "        (SELECT virtuleUniqKey as  max_virtuleUniqKey FROM t1 
ORDER BY proportion DESC LIMIT 1 ) tableWithMaxId\n" +
+            "ORDER BY identityCode) t_a76fe3e829ddb51;"
+    sql "drop table aggregate_count1"
+}
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

[doris] branch dev-1.1.2 updated: [cherry-pick 1.1.2](planner) Fix wrong planner with count(*) optmizer for cross join optimization (#11621)

Reply via email to