This is an automated email from the ASF dual-hosted git repository.

morrysnow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 5c990fb737 [fix](nereids) Analyze failed for SQL that has count 
distinct with same col (#17928)
5c990fb737 is described below

commit 5c990fb737f665be63f7a8d733aed97dc9e0bf7e
Author: AKIRA <33112463+kikyou1...@users.noreply.github.com>
AuthorDate: Sun Mar 19 22:31:47 2023 +0900

    [fix](nereids) Analyze failed for SQL that has count distinct with same col 
(#17928)
    
    This problem is caused by the slots with same hashcodes was put in the 
hashset results into the wrong rules was selected.Use list instead of set as 
return type of getDistinctArguments method
---
 .../nereids/rules/implementation/AggregateStrategies.java      |  8 ++++----
 .../org/apache/doris/nereids/stats/StatsErrorEstimator.java    | 10 +++++++++-
 .../apache/doris/nereids/trees/plans/algebra/Aggregate.java    |  6 +++---
 .../data/nereids_syntax_p0/aggregate_strategies.out            |  7 +++++++
 .../suites/nereids_syntax_p0/aggregate_strategies.groovy       |  2 ++
 5 files changed, 25 insertions(+), 8 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java
index f56bec8d21..ff3fe6d421 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java
@@ -71,6 +71,7 @@ import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.ImmutableSet;
 import com.google.common.collect.Lists;
 
+import java.util.Collection;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
@@ -395,8 +396,7 @@ public class AggregateStrategies implements 
ImplementationRuleFactory {
     private List<PhysicalHashAggregate<Plan>> 
twoPhaseAggregateWithCountDistinctMulti(
             LogicalAggregate<? extends Plan> logicalAgg, CascadesContext 
cascadesContext) {
         AggregateParam inputToBufferParam = new AggregateParam(AggPhase.LOCAL, 
AggMode.INPUT_TO_BUFFER);
-
-        Set<Expression> countDistinctArguments = 
logicalAgg.getDistinctArguments();
+        Collection<Expression> countDistinctArguments = 
logicalAgg.getDistinctArguments();
 
         List<Expression> localAggGroupBy = 
ImmutableList.copyOf(ImmutableSet.<Expression>builder()
                 .addAll(logicalAgg.getGroupByExpressions())
@@ -513,7 +513,7 @@ public class AggregateStrategies implements 
ImplementationRuleFactory {
             LogicalAggregate<? extends Plan> logicalAgg, CascadesContext 
cascadesContext) {
         AggregateParam inputToBufferParam = new AggregateParam(AggPhase.LOCAL, 
AggMode.INPUT_TO_BUFFER);
 
-        Set<Expression> countDistinctArguments = 
logicalAgg.getDistinctArguments();
+        Collection<Expression> countDistinctArguments = 
logicalAgg.getDistinctArguments();
 
         List<Expression> localAggGroupBy = 
ImmutableList.copyOf(ImmutableSet.<Expression>builder()
                 .addAll(logicalAgg.getGroupByExpressions())
@@ -1134,7 +1134,7 @@ public class AggregateStrategies implements 
ImplementationRuleFactory {
                 RequireProperties.of(PhysicalProperties.GATHER), anyLocalAgg);
 
         if (logicalAgg.getGroupByExpressions().isEmpty()) {
-            Set<Expression> distinctArguments = 
logicalAgg.getDistinctArguments();
+            Collection<Expression> distinctArguments = 
logicalAgg.getDistinctArguments();
             RequireProperties requireDistinctHash = 
RequireProperties.of(PhysicalProperties.createHash(
                     distinctArguments, ShuffleType.AGGREGATE));
             PhysicalHashAggregate<? extends Plan> hashLocalGatherGlobalAgg = 
anyLocalGatherGlobalAgg
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsErrorEstimator.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsErrorEstimator.java
index 27da0af30f..6966fc97ea 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsErrorEstimator.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsErrorEstimator.java
@@ -23,6 +23,7 @@ import org.apache.doris.common.util.ProfileManager;
 import org.apache.doris.nereids.trees.plans.AbstractPlan;
 import org.apache.doris.persist.gson.GsonUtils;
 import org.apache.doris.planner.PlanNode;
+import org.apache.doris.statistics.Statistics;
 import org.apache.doris.thrift.TReportExecStatusParams;
 import org.apache.doris.thrift.TRuntimeProfileNode;
 import org.apache.doris.thrift.TUniqueId;
@@ -50,8 +51,15 @@ public class StatsErrorEstimator {
         legacyPlanIdStats = new HashMap<>();
     }
 
+    /**
+     * Map plan id to stats.
+     */
     public void updateLegacyPlanIdToPhysicalPlan(PlanNode planNode, 
AbstractPlan physicalPlan) {
-        legacyPlanIdStats.put(planNode.getId().asInt(), 
Pair.of(physicalPlan.getStats().getRowCount(),
+        Statistics statistics = physicalPlan.getStats();
+        if (statistics == null) {
+            return;
+        }
+        legacyPlanIdStats.put(planNode.getId().asInt(), 
Pair.of(statistics.getRowCount(),
                 (double) 0));
     }
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/algebra/Aggregate.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/algebra/Aggregate.java
index f5e3e14772..5ac7f37dfc 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/algebra/Aggregate.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/algebra/Aggregate.java
@@ -24,7 +24,7 @@ import org.apache.doris.nereids.trees.plans.Plan;
 import org.apache.doris.nereids.trees.plans.UnaryPlan;
 import org.apache.doris.nereids.util.ExpressionUtils;
 
-import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.ImmutableList;
 
 import java.util.List;
 import java.util.Set;
@@ -47,10 +47,10 @@ public interface Aggregate<CHILD_TYPE extends Plan> extends 
UnaryPlan<CHILD_TYPE
         return ExpressionUtils.collect(getOutputExpressions(), 
AggregateFunction.class::isInstance);
     }
 
-    default Set<Expression> getDistinctArguments() {
+    default List<Expression> getDistinctArguments() {
         return getAggregateFunctions().stream()
                 .filter(AggregateFunction::isDistinct)
                 .flatMap(aggregateExpression -> 
aggregateExpression.getArguments().stream())
-                .collect(ImmutableSet.toImmutableSet());
+                .collect(ImmutableList.toImmutableList());
     }
 }
diff --git a/regression-test/data/nereids_syntax_p0/aggregate_strategies.out 
b/regression-test/data/nereids_syntax_p0/aggregate_strategies.out
index 4e0c274447..d53cea4e65 100644
--- a/regression-test/data/nereids_syntax_p0/aggregate_strategies.out
+++ b/regression-test/data/nereids_syntax_p0/aggregate_strategies.out
@@ -241,3 +241,10 @@ name_4     1       4
 -- !group_by_count_distinct --
 5
 
+-- !sql_distinct_same_col --
+1
+1
+1
+1
+1
+
diff --git 
a/regression-test/suites/nereids_syntax_p0/aggregate_strategies.groovy 
b/regression-test/suites/nereids_syntax_p0/aggregate_strategies.groovy
index 0c2aa544d1..ec1d5355fd 100644
--- a/regression-test/suites/nereids_syntax_p0/aggregate_strategies.groovy
+++ b/regression-test/suites/nereids_syntax_p0/aggregate_strategies.groovy
@@ -211,4 +211,6 @@ suite("aggregate_strategies") {
                 from numbers('number' = '10000', 'backend_num'='1')"""
         result([[10000L]])
     }
+
+    qt_sql_distinct_same_col """SELECT COUNT(DISTINCT id, id) FROM 
test_bucket10_table GROUP BY id """
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to