This is an automated email from the ASF dual-hosted git repository. morrysnow pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new a2b9b9edd7 [fix](planner) fix bug in agg on constant column (#16442) a2b9b9edd7 is described below commit a2b9b9edd72d70fed4cd76c571ee5b736493e3a7 Author: minghong <engle...@gmail.com> AuthorDate: Mon Feb 13 11:26:08 2023 +0800 [fix](planner) fix bug in agg on constant column (#16442) For performance reason, we want to remove constant column from groupingExprs. For example: `select sum(T.A) from T group by T.B, 'xyz'` is equivalent to `select sum(T.A) from T group by T.B` We can remove constant column `abc` from groupingExprs. But there is an exception when all groupingExpr are constant For example: sql1: `select 'abc' from t group by 'abc'` is not equivalent to sql2: `select 'abc' from t` sql3: `select 'abc', sum(a) from t group by 'abc'` is not equivalent to sql4: `select 1, sum(a) from t` (when t is empty, sql3 returns 0 tuple, sql4 return 1 tuple) We need to keep some constant columns if all groupingExpr are constant. Consider sql5 `select a from (select "abc" as a, 'def' as b) T group by b, a;` if the constant column `a` is in select list, this column should not be removed. sql5 is transformed to sql6 `select a from (select "abc" as a, 'def' as b) T group by a;` --- .../java/org/apache/doris/analysis/SelectStmt.java | 64 ++++++++++++++++++++-- ...antGroupBy_order.out => constant_group_key.out} | 11 +++- .../sql/group_by/runConstantGroupBy_order.out | 6 ++ .../suites/trino_p0/constant_group_key.groovy | 53 ++++++++++++++++++ .../sql/group_by/runConstantGroupBy_order.sql | 5 +- 5 files changed, 131 insertions(+), 8 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java index d0819885db..4b84dd0a86 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java @@ -1301,11 +1301,65 @@ public class SelectStmt extends QueryStmt { substituteOrdinalsAliases(groupingExprs, "GROUP BY", analyzer, aliasFirst); if (!groupByClause.isGroupByExtension() && !groupingExprs.isEmpty()) { - ArrayList<Expr> tempExprs = new ArrayList<>(groupingExprs); - groupingExprs.removeIf(Expr::isConstant); - if (groupingExprs.isEmpty() && aggExprs.isEmpty()) { - // should keep at least one expr to make the result correct - groupingExprs.add(tempExprs.get(0)); + /* + For performance reason, we want to remove constant column from groupingExprs. + For example: + `select sum(T.A) from T group by T.B, 'xyz'` is equivalent to `select sum(T.A) from T group by T.B` + We can remove constant column `abc` from groupingExprs. + + But there is an exception when all groupingExpr are constant + For example: + sql1: `select 'abc' from t group by 'abc'` + is not equivalent to + sql2: `select 'abc' from t` + + sql3: `select 'abc', sum(a) from t group by 'abc'` + is not equivalent to + sql4: `select 1, sum(a) from t` + (when t is empty, sql3 returns 0 tuple, sql4 return 1 tuple) + + We need to keep some constant columns if all groupingExpr are constant. + Consider sql5 `select a from (select "abc" as a, 'def' as b) T group by b, a;` + if the constant column is in select list, this column should not be removed. + */ + + Expr theFirstConstantGroupingExpr = null; + boolean someGroupExprRemoved = false; + ArrayList<Expr> tempExprs = new ArrayList<>(); + for (Expr groupExpr : groupingExprs) { + //remove groupExpr if it is const, and it is not in select list + boolean removeConstGroupingKey = false; + if (groupExpr.isConstant()) { + if (theFirstConstantGroupingExpr == null) { + theFirstConstantGroupingExpr = groupExpr; + } + boolean keyInSelectList = false; + if (groupExpr instanceof SlotRef) { + for (SelectListItem item : selectList.getItems()) { + if (item.getExpr() instanceof SlotRef) { + keyInSelectList = ((SlotRef) item.getExpr()).columnEqual(groupExpr); + if (keyInSelectList) { + break; + } + } + } + } + removeConstGroupingKey = ! keyInSelectList; + } + if (removeConstGroupingKey) { + someGroupExprRemoved = true; + } else { + tempExprs.add(groupExpr); + } + } + if (someGroupExprRemoved) { + groupingExprs.clear(); + groupingExprs.addAll(tempExprs); + //groupingExprs need at least one expr, it can be + //any original grouping expr. we use the first one. + if (groupingExprs.isEmpty()) { + groupingExprs.add(theFirstConstantGroupingExpr); + } } } diff --git a/regression-test/data/trino_p0/sql/group_by/runConstantGroupBy_order.out b/regression-test/data/trino_p0/constant_group_key.out similarity index 55% copy from regression-test/data/trino_p0/sql/group_by/runConstantGroupBy_order.out copy to regression-test/data/trino_p0/constant_group_key.out index 6a880dbc0a..6cb7d0559c 100644 --- a/regression-test/data/trino_p0/sql/group_by/runConstantGroupBy_order.out +++ b/regression-test/data/trino_p0/constant_group_key.out @@ -1,4 +1,11 @@ -- This file is automatically generated. You should know what you did if you want to edit this --- !runConstantGroupBy_order -- -2 +-- !scalar_count -- +0 + +-- !agg_count -- + +-- !scalar_sum -- +\N + +-- !agg_sum -- diff --git a/regression-test/data/trino_p0/sql/group_by/runConstantGroupBy_order.out b/regression-test/data/trino_p0/sql/group_by/runConstantGroupBy_order.out index 6a880dbc0a..7c092acd1e 100644 --- a/regression-test/data/trino_p0/sql/group_by/runConstantGroupBy_order.out +++ b/regression-test/data/trino_p0/sql/group_by/runConstantGroupBy_order.out @@ -2,3 +2,9 @@ -- !runConstantGroupBy_order -- 2 +-- !runConstantGroupBy_order_2 -- +oneline + +-- !runConstantGroupBy_order_3 -- +abc + diff --git a/regression-test/suites/trino_p0/constant_group_key.groovy b/regression-test/suites/trino_p0/constant_group_key.groovy new file mode 100644 index 0000000000..cd159722a3 --- /dev/null +++ b/regression-test/suites/trino_p0/constant_group_key.groovy @@ -0,0 +1,53 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// This suit test remove constant group_by_key +suite("constant_group_key") { + //remove constant key + explain { + sql("select 'oneline' from nation group by n_nationkey, 'constant1'") + contains "group by: `n_nationkey`" + } + + //reserve constant key in group by + explain { + sql("select 'oneline' from nation group by 'constant1'") + contains "group by: 'constant1'" + } + + explain { + sql("select 'oneline', sum(n_nationkey) from nation group by 'constant1', 'constant2'") + contains "group by: 'constant1'" + } + + explain { + sql("select a from (select '1' as b, 'abc' as a) T group by b, a") + contains "group by: 'abc'" + } + + sql "drop table if exists cgk_tbl" + sql """ + create table cgk_tbl (a int null) + engine=olap duplicate key(a) + distributed by hash(a) buckets 1 + properties ('replication_num'='1');""" + + qt_scalar_count 'select count(*) from cgk_tbl'; + qt_agg_count "select count(*) from cgk_tbl group by 'any const str'" + qt_scalar_sum 'select sum(a) from cgk_tbl'; + qt_agg_sum "select sum(a) from cgk_tbl group by 'any const str'" +} diff --git a/regression-test/suites/trino_p0/sql/group_by/runConstantGroupBy_order.sql b/regression-test/suites/trino_p0/sql/group_by/runConstantGroupBy_order.sql index d90f03a34d..0ed5664054 100644 --- a/regression-test/suites/trino_p0/sql/group_by/runConstantGroupBy_order.sql +++ b/regression-test/suites/trino_p0/sql/group_by/runConstantGroupBy_order.sql @@ -1 +1,4 @@ -select 2 from nation group by 1 +select 2 from nation group by 1; +select "oneline" from nation group by "I am a const, but not to be removed"; +select a from (select "abc" as a, '1' as b) T group by b, a; + --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org