morrySnow commented on code in PR #48987:
URL: https://github.com/apache/doris/pull/48987#discussion_r1994669249


##########
fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/ProjectToGlobalAggregate.java:
##########
@@ -43,17 +54,110 @@ public class ProjectToGlobalAggregate extends 
OneAnalysisRuleFactory {
     @Override
     public Rule build() {
         return RuleType.PROJECT_TO_GLOBAL_AGGREGATE.build(
-           logicalProject().then(project -> {
-               boolean needGlobalAggregate = project.getProjects()
-                       .stream()
-                       .anyMatch(p -> 
p.accept(ExpressionVisitors.CONTAINS_AGGREGATE_CHECKER, null));
-
-               if (needGlobalAggregate) {
-                   return new LogicalAggregate<>(ImmutableList.of(), 
project.getProjects(), project.child());
-               } else {
-                   return project;
-               }
-           })
+            logicalProject().then(project -> {
+                project = distinctConstantsToLimit1(project);
+                Plan result = projectToAggregate(project);
+                return distinctToAggregate(result, project);
+            })
         );
     }
+
+    // select distinct 1,2,3 from tbl
+    //               ↓
+    // select 1,2,3 from (select 1, 2, 3 from tbl limit 1) as tmp
+    private static LogicalProject<Plan> 
distinctConstantsToLimit1(LogicalProject<Plan> project) {
+        if (!project.isDistinct()) {
+            return project;
+        }
+
+        boolean allSelectItemAreConstants = true;
+        for (NamedExpression selectItem : project.getProjects()) {
+            if (!selectItem.isConstant()) {
+                allSelectItemAreConstants = false;

Review Comment:
   return project here directly?



##########
fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/ReplaceExpressionByChildOutput.java:
##########
@@ -53,21 +53,27 @@ public List<Rule> buildRules() {
                 ))
                 .add(RuleType.REPLACE_SORT_EXPRESSION_BY_CHILD_OUTPUT.build(
                         logicalSort(logicalAggregate()).then(sort -> {
-                            LogicalAggregate<Plan> aggregate = sort.child();
-                            Map<Expression, Slot> sMap = 
buildOutputAliasMap(aggregate.getOutputExpressions());
+                            LogicalAggregate<Plan> agg = sort.child();
+                            Map<Expression, Slot> sMap = 
buildOutputAliasMap(agg.getOutputExpressions());
+                            if (sMap.isEmpty() && isSelectDistinct(agg)) {
+                                sMap = getSelectDistinctExpressions(agg);

Review Comment:
   add some comment to explain why need to process distinct seperately



##########
fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/ProjectToGlobalAggregate.java:
##########
@@ -43,17 +54,110 @@ public class ProjectToGlobalAggregate extends 
OneAnalysisRuleFactory {
     @Override
     public Rule build() {
         return RuleType.PROJECT_TO_GLOBAL_AGGREGATE.build(
-           logicalProject().then(project -> {
-               boolean needGlobalAggregate = project.getProjects()
-                       .stream()
-                       .anyMatch(p -> 
p.accept(ExpressionVisitors.CONTAINS_AGGREGATE_CHECKER, null));
-
-               if (needGlobalAggregate) {
-                   return new LogicalAggregate<>(ImmutableList.of(), 
project.getProjects(), project.child());
-               } else {
-                   return project;
-               }
-           })
+            logicalProject().then(project -> {
+                project = distinctConstantsToLimit1(project);
+                Plan result = projectToAggregate(project);
+                return distinctToAggregate(result, project);
+            })
         );
     }
+
+    // select distinct 1,2,3 from tbl
+    //               ↓
+    // select 1,2,3 from (select 1, 2, 3 from tbl limit 1) as tmp
+    private static LogicalProject<Plan> 
distinctConstantsToLimit1(LogicalProject<Plan> project) {
+        if (!project.isDistinct()) {
+            return project;
+        }
+
+        boolean allSelectItemAreConstants = true;
+        for (NamedExpression selectItem : project.getProjects()) {
+            if (!selectItem.isConstant()) {
+                allSelectItemAreConstants = false;
+                break;
+            }
+        }
+
+        if (allSelectItemAreConstants) {
+            return new LogicalProject<>(
+                    project.getProjects(),
+                    new LogicalLimit<>(1, 0, LimitPhase.ORIGIN, 
project.child())
+            );
+        }
+        return project;
+    }
+
+    // select avg(xxx) from tbl
+    //         ↓
+    // LogicalAggregate(groupBy=[], output=[avg(xxx)])
+    private static Plan projectToAggregate(LogicalProject<Plan> project) {
+        // contains aggregate functions, like sum, avg ?
+        for (NamedExpression selectItem : project.getProjects()) {
+            if 
(selectItem.accept(ExpressionVisitors.CONTAINS_AGGREGATE_CHECKER, null)) {
+                return new LogicalAggregate<>(ImmutableList.of(), 
project.getProjects(), project.child());
+            }
+        }
+        return project;
+    }
+
+    private static Plan distinctToAggregate(Plan result, LogicalProject<Plan> 
originProject) {
+        if (!originProject.isDistinct()) {
+            return result;
+        }
+        if (result instanceof LogicalProject) {
+            // remove distinct: select distinct fun(xxx) as c1 from tbl
+            //
+            // LogicalProject(distinct=true, output=[fun(xxx) as c1])
+            //                  ↓
+            // LogicalAggregate(groupBy=[c1], output=[c1])
+            //                  |
+            //   LogicalProject(output=[fun(xxx) as c1])
+            LogicalProject<?> project = (LogicalProject<?>) result;
+
+            ImmutableList.Builder<NamedExpression> bottomProjectOutput
+                    = 
ImmutableList.builderWithExpectedSize(project.getProjects().size());
+            ImmutableList.Builder<NamedExpression> topAggOutput
+                    = 
ImmutableList.builderWithExpectedSize(project.getProjects().size());
+
+            boolean hasComplexExpr = false;
+            for (NamedExpression selectItem : project.getProjects()) {
+                if (selectItem.isSlot()) {
+                    topAggOutput.add(selectItem);
+                    bottomProjectOutput.add(selectItem);
+                } else if (isAliasLiteral(selectItem)) {
+                    // stay in agg, and eliminate by 
`ELIMINATE_GROUP_BY_CONSTANT`
+                    topAggOutput.add(selectItem);
+                } else {
+                    // `FillUpMissingSlots` not support find complex expr in 
aggregate,
+                    // so we should push down into the bottom project
+                    hasComplexExpr = true;
+                    topAggOutput.add(selectItem.toSlot());
+                    bottomProjectOutput.add(selectItem);
+                }
+            }
+
+            if (!hasComplexExpr) {
+                List<Slot> projects = (List) project.getProjects();
+                return new LogicalAggregate(projects, projects, 
project.child());
+            }
+
+            LogicalProject<?> removeDistinct = new 
LogicalProject<>(bottomProjectOutput.build(), project.child());
+            ImmutableList<NamedExpression> aggOutput = topAggOutput.build();
+            return new LogicalAggregate(aggOutput, aggOutput, removeDistinct);
+        } else if (result instanceof LogicalAggregate) {
+            // remove distinct: select distinct avg(xxx) as c1 from tbl
+            //
+            // LogicalProject(distinct=true, output=[avg(xxx) as c1])
+            //                  ↓
+            //  LogicalAggregate(output=[avg(xxx) as c1])
+            return result;
+        } else {
+            // never reach
+            throw new AnalysisException("Unsupported");

Review Comment:
   add a more clear error message



##########
regression-test/suites/nereids_p0/aggregate/select_distinct.groovy:
##########
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+suite("select_distinct") {
+    multi_sql """
+        SET enable_nereids_planner=true;
+        SET enable_fallback_to_original_planner=false;
+        drop table if exists test_distinct_window;
+        create table test_distinct_window(id int) distributed by hash(id) 
properties('replication_num'='1');
+        insert into test_distinct_window values(1), (2), (3);
+        """
+
+    test {
+        sql "select distinct sum(value) over(partition by id) from (select 100 
value, 1 id union all select 100, 2)a"
+        result([[100L]])
+    }
+

Review Comment:
   add more case to cover all if branch in rules, such as
   - qualify with alias 
   - qualify with new window function
   - qualify with window function which has exsited in project list
   - having



##########
fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/ProjectToGlobalAggregate.java:
##########
@@ -43,17 +54,110 @@ public class ProjectToGlobalAggregate extends 
OneAnalysisRuleFactory {
     @Override
     public Rule build() {
         return RuleType.PROJECT_TO_GLOBAL_AGGREGATE.build(
-           logicalProject().then(project -> {
-               boolean needGlobalAggregate = project.getProjects()
-                       .stream()
-                       .anyMatch(p -> 
p.accept(ExpressionVisitors.CONTAINS_AGGREGATE_CHECKER, null));
-
-               if (needGlobalAggregate) {
-                   return new LogicalAggregate<>(ImmutableList.of(), 
project.getProjects(), project.child());
-               } else {
-                   return project;
-               }
-           })
+            logicalProject().then(project -> {
+                project = distinctConstantsToLimit1(project);
+                Plan result = projectToAggregate(project);
+                return distinctToAggregate(result, project);
+            })
         );
     }
+
+    // select distinct 1,2,3 from tbl
+    //               ↓
+    // select 1,2,3 from (select 1, 2, 3 from tbl limit 1) as tmp
+    private static LogicalProject<Plan> 
distinctConstantsToLimit1(LogicalProject<Plan> project) {
+        if (!project.isDistinct()) {
+            return project;
+        }
+
+        boolean allSelectItemAreConstants = true;
+        for (NamedExpression selectItem : project.getProjects()) {
+            if (!selectItem.isConstant()) {
+                allSelectItemAreConstants = false;
+                break;
+            }
+        }
+
+        if (allSelectItemAreConstants) {
+            return new LogicalProject<>(
+                    project.getProjects(),
+                    new LogicalLimit<>(1, 0, LimitPhase.ORIGIN, 
project.child())
+            );
+        }
+        return project;
+    }
+
+    // select avg(xxx) from tbl
+    //         ↓
+    // LogicalAggregate(groupBy=[], output=[avg(xxx)])
+    private static Plan projectToAggregate(LogicalProject<Plan> project) {
+        // contains aggregate functions, like sum, avg ?

Review Comment:
   ```suggestion
           // contains aggregate functions, like sum, avg
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to