This is an automated email from the ASF dual-hosted git repository. morrysnow pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 68bd4a1a96 [opt](Nereids) check multiple distinct functions that cannot be transformed into muti_distinct (#21626) 68bd4a1a96 is described below commit 68bd4a1a96b988fab31ffc3a1aeeda831347e1bd Author: 谢健 <jianx...@gmail.com> AuthorDate: Mon Jul 24 16:34:17 2023 +0800 [opt](Nereids) check multiple distinct functions that cannot be transformed into muti_distinct (#21626) This commit introduces a transformation for SQL queries that contain multiple distinct aggregate functions. When the number of distinct values processed by these functions is greater than 1, they are converted into multi_distinct functions for more efficient handling. Example: ``` SELECT COUNT(DISTINCT c1), SUM(DISTINCT c2) FROM tbl GROUP BY c3 -- Transformed to SELECT MULTI_DISTINCT_COUNT(c1), MULTI_DISTINCT_SUM(c2) FROM tbl GROUP BY c3 ``` The following functions can be transformed: - COUNT - SUM - AVG - GROUP_CONCAT If any unsupported functions are encountered, an error is now reported during the optimization phase. To ensure the absence of such cases, a final check has been implemented after the rewriting phase. --- .../doris/nereids/jobs/executor/Rewriter.java | 2 + .../nereids/rules/rewrite/CheckMultiDistinct.java | 62 ++++++++++++++++++++++ .../suites/nereids_syntax_p0/analyze_agg.groovy | 5 ++ 3 files changed, 69 insertions(+) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java index 3fb3208e5e..8e2f0260bb 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java @@ -40,6 +40,7 @@ import org.apache.doris.nereids.rules.rewrite.CTEInline; import org.apache.doris.nereids.rules.rewrite.CheckAndStandardizeWindowFunctionAndFrame; import org.apache.doris.nereids.rules.rewrite.CheckDataTypes; import org.apache.doris.nereids.rules.rewrite.CheckMatchExpression; +import org.apache.doris.nereids.rules.rewrite.CheckMultiDistinct; import org.apache.doris.nereids.rules.rewrite.CollectFilterAboveConsumer; import org.apache.doris.nereids.rules.rewrite.CollectProjectAboveConsumer; import org.apache.doris.nereids.rules.rewrite.ColumnPruning; @@ -291,6 +292,7 @@ public class Rewriter extends AbstractBatchJobExecutor { bottomUp( new ExpressionRewrite(CheckLegalityAfterRewrite.INSTANCE), new CheckMatchExpression(), + new CheckMultiDistinct(), new CheckAfterRewrite() ) ), diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/CheckMultiDistinct.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/CheckMultiDistinct.java new file mode 100644 index 0000000000..4488a94b8d --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/CheckMultiDistinct.java @@ -0,0 +1,62 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.rules.rewrite; + +import org.apache.doris.nereids.exceptions.AnalysisException; +import org.apache.doris.nereids.rules.Rule; +import org.apache.doris.nereids.rules.RuleType; +import org.apache.doris.nereids.trees.expressions.functions.agg.AggregateFunction; +import org.apache.doris.nereids.trees.expressions.functions.agg.Avg; +import org.apache.doris.nereids.trees.expressions.functions.agg.Count; +import org.apache.doris.nereids.trees.expressions.functions.agg.GroupConcat; +import org.apache.doris.nereids.trees.expressions.functions.agg.Sum; +import org.apache.doris.nereids.trees.plans.Plan; +import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate; + +import com.google.common.collect.ImmutableSet; + +/** + * If there are multiple distinct aggregate functions that cannot + * be transformed into multi_distinct, an error is reported. + * The following functions can be transformed into multi_distinct: + * - count -> MULTI_DISTINCT_COUNT + * - sum -> MULTI_DISTINCT_SUM + * - avg -> MULTI_DISTINCT_AVG + * - group_concat -> MULTI_DISTINCT_GROUP_CONCAT + */ +public class CheckMultiDistinct extends OneRewriteRuleFactory { + private final ImmutableSet<Class<? extends AggregateFunction>> supportedFunctions = + ImmutableSet.of(Count.class, Sum.class, Avg.class, GroupConcat.class); + + @Override + public Rule build() { + return logicalAggregate().then(agg -> checkDistinct(agg)).toRule(RuleType.CHECK_ANALYSIS); + } + + private LogicalAggregate checkDistinct(LogicalAggregate<? extends Plan> aggregate) { + if (aggregate.getDistinctArguments().size() > 1) { + + for (AggregateFunction func : aggregate.getAggregateFunctions()) { + if (func.isDistinct() && !supportedFunctions.contains(func.getClass())) { + throw new AnalysisException(func.toString() + " can't support multi distinct."); + } + } + } + return aggregate; + } +} diff --git a/regression-test/suites/nereids_syntax_p0/analyze_agg.groovy b/regression-test/suites/nereids_syntax_p0/analyze_agg.groovy index e5184234f3..2f0b0d01c6 100644 --- a/regression-test/suites/nereids_syntax_p0/analyze_agg.groovy +++ b/regression-test/suites/nereids_syntax_p0/analyze_agg.groovy @@ -68,4 +68,9 @@ suite("analyze_agg") { tt2.d, tt2.c; """ + + test { + sql "select count(distinct t2.id), max(distinct t2.c) from t2" + exception "max(DISTINCT c#2) can't support multi distinct." + } } \ No newline at end of file --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org