This is an automated email from the ASF dual-hosted git repository.

morrysnow pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new b0555522a1c [Fix](nereids) modify the binding aggregate function in 
order by (#34606)
b0555522a1c is described below

commit b0555522a1c3b9b62057a695e292e075f7da21ef
Author: feiniaofeiafei <53502832+feiniaofeia...@users.noreply.github.com>
AuthorDate: Thu May 16 14:10:50 2024 +0800

    [Fix](nereids) modify the binding aggregate function in order by (#34606)
    
    Do same job with pr #32758 and pr #33843.
    Because the master branch2.0 differ in binding order by expressions. So 
this pr is not a cherry-pick, but actually does same job.
    For each order by expression:
    If it does not have aggregate functions, firstly bind expression from 
aggregate outputs. If not found, then bind expression from aggregate child 
outputs.
    If it has aggregate functions, firstly bind expression from aggregate 
outputs which do not have aggregate functions. If not found, then bind 
expression from aggregate child outputs.
    
    
    Co-authored-by: feiniaofeiafei <moail...@selectdb.com>
---
 .../nereids/rules/analysis/BindExpression.java     | 45 ++++++++++++-
 .../nereids_syntax_p0/order_by_bind_priority.out   | 47 ++++++++++++++
 .../order_by_bind_priority.groovy                  | 74 ++++++++++++++++++++++
 3 files changed, 164 insertions(+), 2 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindExpression.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindExpression.java
index 72af439f452..275fe2281b0 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindExpression.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindExpression.java
@@ -81,6 +81,7 @@ import com.google.common.base.Preconditions;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableList.Builder;
 import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.Lists;
 import com.google.common.collect.Sets;
 import org.apache.commons.lang3.StringUtils;
 
@@ -430,14 +431,14 @@ public class BindExpression implements 
AnalysisRuleFactory {
                 logicalSort(aggregate()).thenApply(ctx -> {
                     LogicalSort<Aggregate<Plan>> sort = ctx.root;
                     Aggregate<Plan> aggregate = sort.child();
-                    return bindSort(sort, aggregate, ctx.cascadesContext);
+                    return bindSortAggregate(sort, aggregate, 
ctx.cascadesContext, ctx.connectContext);
                 })
             ),
             RuleType.BINDING_SORT_SLOT.build(
                 logicalSort(logicalHaving(aggregate())).thenApply(ctx -> {
                     LogicalSort<LogicalHaving<Aggregate<Plan>>> sort = 
ctx.root;
                     Aggregate<Plan> aggregate = sort.child().child();
-                    return bindSort(sort, aggregate, ctx.cascadesContext);
+                    return bindSortAggregate(sort, aggregate, 
ctx.cascadesContext, ctx.connectContext);
                 })
             ),
             RuleType.BINDING_SORT_SLOT.build(
@@ -698,6 +699,46 @@ public class BindExpression implements AnalysisRuleFactory 
{
         ).stream().map(ruleCondition).collect(ImmutableList.toImmutableList());
     }
 
+    private Plan bindSortAggregate(LogicalSort<? extends Plan> sort, 
Aggregate<Plan> aggregate, CascadesContext ctx,
+            ConnectContext connectContext) {
+        List<Slot> aggOutput = aggregate.getOutput();
+        Plan aggChild = aggregate.child();
+        List<Slot> aggChildOutput = aggChild.getOutput();
+        List<Slot> aggregateOutputWithoutAggFunc = Lists.newArrayList();
+        List<NamedExpression> outputExpressions = 
aggregate.getOutputExpressions();
+        for (NamedExpression outputExpr : outputExpressions) {
+            if (!outputExpr.anyMatch(expr -> expr instanceof 
AggregateFunction)) {
+                aggregateOutputWithoutAggFunc.add(outputExpr.toSlot());
+            }
+        }
+
+        // find in agg outputs
+        Scope aggOutputScope = toScope(ctx, aggOutput);
+        SlotBinder bindByAggOutput = new SlotBinder(aggOutputScope, ctx, true, 
false, true);
+        // find in agg child outputs
+        Scope aggChildOutputScope = toScope(ctx, aggChildOutput);
+        SlotBinder bindByAggChildOutput = new SlotBinder(aggChildOutputScope, 
ctx, true, false, true);
+        // Find in the outputs of aggregate without aggregate function output
+        Scope aggregateOutputWithoutAggFuncScope = toScope(ctx, 
aggregateOutputWithoutAggFunc);
+        SlotBinder bindByAggregateOutputWithoutAggFunc = new 
SlotBinder(aggregateOutputWithoutAggFuncScope,
+                ctx, true, false, true);
+
+        FunctionRegistry functionRegistry = 
connectContext.getEnv().getFunctionRegistry();
+        Builder<OrderKey> boundOrderKeys = 
ImmutableList.builderWithExpectedSize(sort.getOrderKeys().size());
+        for (OrderKey orderKey : sort.getOrderKeys()) {
+            Expression boundKey;
+            if (hasAggregateFunction(orderKey.getExpr(), functionRegistry)) {
+                boundKey = 
bindByAggregateOutputWithoutAggFunc.bind(orderKey.getExpr());
+            } else {
+                boundKey = bindByAggOutput.bind(orderKey.getExpr());
+            }
+            boundKey = bindByAggChildOutput.bind(boundKey);
+            boundKey = bindFunction(boundKey, sort, ctx);
+            boundOrderKeys.add(orderKey.withExpression(boundKey));
+        }
+        return new LogicalSort<>(boundOrderKeys.build(), sort.child());
+    }
+
     private Plan bindSort(LogicalSort<? extends Plan> sort, Plan plan, 
CascadesContext ctx) {
         // 1. We should deduplicate the slots, otherwise the binding process 
will fail due to the
         //    ambiguous slots exist.
diff --git a/regression-test/data/nereids_syntax_p0/order_by_bind_priority.out 
b/regression-test/data/nereids_syntax_p0/order_by_bind_priority.out
new file mode 100644
index 00000000000..4fd63bc7f99
--- /dev/null
+++ b/regression-test/data/nereids_syntax_p0/order_by_bind_priority.out
@@ -0,0 +1,47 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !test_bind_order_by_with_aggfun1 --
+10     -5      -10
+4      -2      -4
+4      1       3
+6      3       6
+
+-- !test_bind_order_by_with_aggfun2 --
+4      -2      -4
+10     -5      0
+4      1       4
+6      3       6
+
+-- !test_bind_order_by_with_aggfun3 --
+5      -5      5
+2      -2      -2
+2      1       4
+3      3       3
+
+-- !test_bind_order_by_in_no_agg_func_output --
+1      4
+2      -2
+3      3
+5      5
+
+-- !test_multi_slots_in_agg_func_bind_first --
+\N     -3      2       \N      \N      \N
+\N     0       5       \N      16155   16155
+\N     1       6       \N      \N      \N
+\N     2       7       \N      \N      \N
+\N     6       11      \N      \N      \N
+\N     8       13      \N      \N      \N
+\N     13      18      \N      \N      \N
+2      -5      0       8777    \N      \N
+2      -4      1       127     30240   30240
+2      -2      3       10008   -54     -54
+2      3       8       29433   -4654   -4654
+2      4       9       13909   29600   29600
+2      12      17      22950   99      99
+4      -1      4       3618    2881    2881
+4      5       10      10450   8105    8105
+4      7       12      88      88      88
+4      9       14      74      14138   14138
+4      10      15      23      63      63
+4      11      16      4418    -24598  -24598
+4      14      19      2       \N      \N
+
diff --git 
a/regression-test/suites/nereids_syntax_p0/order_by_bind_priority.groovy 
b/regression-test/suites/nereids_syntax_p0/order_by_bind_priority.groovy
new file mode 100644
index 00000000000..44c8b5fa734
--- /dev/null
+++ b/regression-test/suites/nereids_syntax_p0/order_by_bind_priority.groovy
@@ -0,0 +1,74 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("order_by_bind_priority") {
+    sql "SET enable_nereids_planner=true;"
+    sql "SET enable_fallback_to_original_planner=false;"
+
+    sql "drop table if exists t_order_by_bind_priority"
+    sql """create table t_order_by_bind_priority (c1 int, c2 int) distributed 
by hash(c1) properties("replication_num"="1");"""
+    sql "insert into t_order_by_bind_priority values(-2, 
-2),(1,2),(1,2),(3,3),(-5,5);"
+    sql "sync"
+
+
+    qt_test_bind_order_by_with_aggfun1 "select 2*abs(sum(c1)) as c1, 
c1,sum(c1)+c1 from t_order_by_bind_priority group by c1 order by sum(c1)+c1 
asc;"
+    qt_test_bind_order_by_with_aggfun2 "select 2*abs(sum(c1)) as c2, 
c1,sum(c1)+c2 from t_order_by_bind_priority group by c1,c2 order by sum(c1)+c2 
asc;"
+    qt_test_bind_order_by_with_aggfun3 "select abs(sum(c1)) as c1, c1,sum(c2) 
as c2 from t_order_by_bind_priority group by c1 order by sum(c1) asc;"
+    qt_test_bind_order_by_in_no_agg_func_output "select abs(c1) xx, sum(c2) 
from t_order_by_bind_priority group by xx order by min(xx)"
+    test {
+        sql "select abs(sum(c1)) as c1, c1,sum(c2) as c2 from 
t_order_by_bind_priority group by c1 order by sum(c1)+c2 asc;"
+        exception "c2 should be grouped by."
+    }
+    sql """drop table if exists 
table_20_undef_partitions2_keys3_properties4_distributed_by58"""
+    sql """
+        create table 
table_20_undef_partitions2_keys3_properties4_distributed_by58 (
+        pk int,
+        col_int_undef_signed int   ,
+        col_int_undef_signed2 int
+        ) engine=olap
+        DUPLICATE KEY(pk, col_int_undef_signed)
+        distributed by hash(pk) buckets 10
+        properties("replication_num" = "1");
+    """
+    sql """
+        insert into 
table_20_undef_partitions2_keys3_properties4_distributed_by58(pk,col_int_undef_signed,col_int_undef_signed2)
 values (0,-8777,null),
+        
(1,-127,30240),(2,null,null),(3,-10008,-54),(4,3618,2881),(5,null,16155),(6,null,null),(7,null,null),(8,-29433,-4654),(9,-13909,29600),(10,10450,8105),
+        
(11,null,null),(12,88,88),(13,null,null),(14,74,14138),(15,23,63),(16,4418,-24598),(17,-22950,99),(18,null,null),(19,2,null);
+    """
+    sql "sync;"
+
+    qt_test_multi_slots_in_agg_func_bind_first """
+        SELECT
+        SIGN( SUM(col_int_undef_signed) ) + 3 AS col_int_undef_signed,
+        pk - 5 pk ,
+        pk pk ,
+        ABS( MIN(col_int_undef_signed) ) AS col_int_undef_signed,
+        MAX(col_int_undef_signed2) col_int_undef_signed2,
+        col_int_undef_signed2 col_int_undef_signed2
+        FROM
+        table_20_undef_partitions2_keys3_properties4_distributed_by58 
tbl_alias1
+        GROUP BY
+        pk,
+        col_int_undef_signed2
+        ORDER BY
+        col_int_undef_signed,
+        pk - 5,
+        pk,
+        col_int_undef_signed2 ;
+    """
+
+}
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to