This is an automated email from the ASF dual-hosted git repository. morrysnow pushed a commit to branch branch-2.0 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push: new b0555522a1c [Fix](nereids) modify the binding aggregate function in order by (#34606) b0555522a1c is described below commit b0555522a1c3b9b62057a695e292e075f7da21ef Author: feiniaofeiafei <53502832+feiniaofeia...@users.noreply.github.com> AuthorDate: Thu May 16 14:10:50 2024 +0800 [Fix](nereids) modify the binding aggregate function in order by (#34606) Do same job with pr #32758 and pr #33843. Because the master branch2.0 differ in binding order by expressions. So this pr is not a cherry-pick, but actually does same job. For each order by expression: If it does not have aggregate functions, firstly bind expression from aggregate outputs. If not found, then bind expression from aggregate child outputs. If it has aggregate functions, firstly bind expression from aggregate outputs which do not have aggregate functions. If not found, then bind expression from aggregate child outputs. Co-authored-by: feiniaofeiafei <moail...@selectdb.com> --- .../nereids/rules/analysis/BindExpression.java | 45 ++++++++++++- .../nereids_syntax_p0/order_by_bind_priority.out | 47 ++++++++++++++ .../order_by_bind_priority.groovy | 74 ++++++++++++++++++++++ 3 files changed, 164 insertions(+), 2 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindExpression.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindExpression.java index 72af439f452..275fe2281b0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindExpression.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindExpression.java @@ -81,6 +81,7 @@ import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableList.Builder; import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Lists; import com.google.common.collect.Sets; import org.apache.commons.lang3.StringUtils; @@ -430,14 +431,14 @@ public class BindExpression implements AnalysisRuleFactory { logicalSort(aggregate()).thenApply(ctx -> { LogicalSort<Aggregate<Plan>> sort = ctx.root; Aggregate<Plan> aggregate = sort.child(); - return bindSort(sort, aggregate, ctx.cascadesContext); + return bindSortAggregate(sort, aggregate, ctx.cascadesContext, ctx.connectContext); }) ), RuleType.BINDING_SORT_SLOT.build( logicalSort(logicalHaving(aggregate())).thenApply(ctx -> { LogicalSort<LogicalHaving<Aggregate<Plan>>> sort = ctx.root; Aggregate<Plan> aggregate = sort.child().child(); - return bindSort(sort, aggregate, ctx.cascadesContext); + return bindSortAggregate(sort, aggregate, ctx.cascadesContext, ctx.connectContext); }) ), RuleType.BINDING_SORT_SLOT.build( @@ -698,6 +699,46 @@ public class BindExpression implements AnalysisRuleFactory { ).stream().map(ruleCondition).collect(ImmutableList.toImmutableList()); } + private Plan bindSortAggregate(LogicalSort<? extends Plan> sort, Aggregate<Plan> aggregate, CascadesContext ctx, + ConnectContext connectContext) { + List<Slot> aggOutput = aggregate.getOutput(); + Plan aggChild = aggregate.child(); + List<Slot> aggChildOutput = aggChild.getOutput(); + List<Slot> aggregateOutputWithoutAggFunc = Lists.newArrayList(); + List<NamedExpression> outputExpressions = aggregate.getOutputExpressions(); + for (NamedExpression outputExpr : outputExpressions) { + if (!outputExpr.anyMatch(expr -> expr instanceof AggregateFunction)) { + aggregateOutputWithoutAggFunc.add(outputExpr.toSlot()); + } + } + + // find in agg outputs + Scope aggOutputScope = toScope(ctx, aggOutput); + SlotBinder bindByAggOutput = new SlotBinder(aggOutputScope, ctx, true, false, true); + // find in agg child outputs + Scope aggChildOutputScope = toScope(ctx, aggChildOutput); + SlotBinder bindByAggChildOutput = new SlotBinder(aggChildOutputScope, ctx, true, false, true); + // Find in the outputs of aggregate without aggregate function output + Scope aggregateOutputWithoutAggFuncScope = toScope(ctx, aggregateOutputWithoutAggFunc); + SlotBinder bindByAggregateOutputWithoutAggFunc = new SlotBinder(aggregateOutputWithoutAggFuncScope, + ctx, true, false, true); + + FunctionRegistry functionRegistry = connectContext.getEnv().getFunctionRegistry(); + Builder<OrderKey> boundOrderKeys = ImmutableList.builderWithExpectedSize(sort.getOrderKeys().size()); + for (OrderKey orderKey : sort.getOrderKeys()) { + Expression boundKey; + if (hasAggregateFunction(orderKey.getExpr(), functionRegistry)) { + boundKey = bindByAggregateOutputWithoutAggFunc.bind(orderKey.getExpr()); + } else { + boundKey = bindByAggOutput.bind(orderKey.getExpr()); + } + boundKey = bindByAggChildOutput.bind(boundKey); + boundKey = bindFunction(boundKey, sort, ctx); + boundOrderKeys.add(orderKey.withExpression(boundKey)); + } + return new LogicalSort<>(boundOrderKeys.build(), sort.child()); + } + private Plan bindSort(LogicalSort<? extends Plan> sort, Plan plan, CascadesContext ctx) { // 1. We should deduplicate the slots, otherwise the binding process will fail due to the // ambiguous slots exist. diff --git a/regression-test/data/nereids_syntax_p0/order_by_bind_priority.out b/regression-test/data/nereids_syntax_p0/order_by_bind_priority.out new file mode 100644 index 00000000000..4fd63bc7f99 --- /dev/null +++ b/regression-test/data/nereids_syntax_p0/order_by_bind_priority.out @@ -0,0 +1,47 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !test_bind_order_by_with_aggfun1 -- +10 -5 -10 +4 -2 -4 +4 1 3 +6 3 6 + +-- !test_bind_order_by_with_aggfun2 -- +4 -2 -4 +10 -5 0 +4 1 4 +6 3 6 + +-- !test_bind_order_by_with_aggfun3 -- +5 -5 5 +2 -2 -2 +2 1 4 +3 3 3 + +-- !test_bind_order_by_in_no_agg_func_output -- +1 4 +2 -2 +3 3 +5 5 + +-- !test_multi_slots_in_agg_func_bind_first -- +\N -3 2 \N \N \N +\N 0 5 \N 16155 16155 +\N 1 6 \N \N \N +\N 2 7 \N \N \N +\N 6 11 \N \N \N +\N 8 13 \N \N \N +\N 13 18 \N \N \N +2 -5 0 8777 \N \N +2 -4 1 127 30240 30240 +2 -2 3 10008 -54 -54 +2 3 8 29433 -4654 -4654 +2 4 9 13909 29600 29600 +2 12 17 22950 99 99 +4 -1 4 3618 2881 2881 +4 5 10 10450 8105 8105 +4 7 12 88 88 88 +4 9 14 74 14138 14138 +4 10 15 23 63 63 +4 11 16 4418 -24598 -24598 +4 14 19 2 \N \N + diff --git a/regression-test/suites/nereids_syntax_p0/order_by_bind_priority.groovy b/regression-test/suites/nereids_syntax_p0/order_by_bind_priority.groovy new file mode 100644 index 00000000000..44c8b5fa734 --- /dev/null +++ b/regression-test/suites/nereids_syntax_p0/order_by_bind_priority.groovy @@ -0,0 +1,74 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("order_by_bind_priority") { + sql "SET enable_nereids_planner=true;" + sql "SET enable_fallback_to_original_planner=false;" + + sql "drop table if exists t_order_by_bind_priority" + sql """create table t_order_by_bind_priority (c1 int, c2 int) distributed by hash(c1) properties("replication_num"="1");""" + sql "insert into t_order_by_bind_priority values(-2, -2),(1,2),(1,2),(3,3),(-5,5);" + sql "sync" + + + qt_test_bind_order_by_with_aggfun1 "select 2*abs(sum(c1)) as c1, c1,sum(c1)+c1 from t_order_by_bind_priority group by c1 order by sum(c1)+c1 asc;" + qt_test_bind_order_by_with_aggfun2 "select 2*abs(sum(c1)) as c2, c1,sum(c1)+c2 from t_order_by_bind_priority group by c1,c2 order by sum(c1)+c2 asc;" + qt_test_bind_order_by_with_aggfun3 "select abs(sum(c1)) as c1, c1,sum(c2) as c2 from t_order_by_bind_priority group by c1 order by sum(c1) asc;" + qt_test_bind_order_by_in_no_agg_func_output "select abs(c1) xx, sum(c2) from t_order_by_bind_priority group by xx order by min(xx)" + test { + sql "select abs(sum(c1)) as c1, c1,sum(c2) as c2 from t_order_by_bind_priority group by c1 order by sum(c1)+c2 asc;" + exception "c2 should be grouped by." + } + sql """drop table if exists table_20_undef_partitions2_keys3_properties4_distributed_by58""" + sql """ + create table table_20_undef_partitions2_keys3_properties4_distributed_by58 ( + pk int, + col_int_undef_signed int , + col_int_undef_signed2 int + ) engine=olap + DUPLICATE KEY(pk, col_int_undef_signed) + distributed by hash(pk) buckets 10 + properties("replication_num" = "1"); + """ + sql """ + insert into table_20_undef_partitions2_keys3_properties4_distributed_by58(pk,col_int_undef_signed,col_int_undef_signed2) values (0,-8777,null), + (1,-127,30240),(2,null,null),(3,-10008,-54),(4,3618,2881),(5,null,16155),(6,null,null),(7,null,null),(8,-29433,-4654),(9,-13909,29600),(10,10450,8105), + (11,null,null),(12,88,88),(13,null,null),(14,74,14138),(15,23,63),(16,4418,-24598),(17,-22950,99),(18,null,null),(19,2,null); + """ + sql "sync;" + + qt_test_multi_slots_in_agg_func_bind_first """ + SELECT + SIGN( SUM(col_int_undef_signed) ) + 3 AS col_int_undef_signed, + pk - 5 pk , + pk pk , + ABS( MIN(col_int_undef_signed) ) AS col_int_undef_signed, + MAX(col_int_undef_signed2) col_int_undef_signed2, + col_int_undef_signed2 col_int_undef_signed2 + FROM + table_20_undef_partitions2_keys3_properties4_distributed_by58 tbl_alias1 + GROUP BY + pk, + col_int_undef_signed2 + ORDER BY + col_int_undef_signed, + pk - 5, + pk, + col_int_undef_signed2 ; + """ + +} \ No newline at end of file --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org