This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
The following commit(s) were added to refs/heads/master by this push: new c9ddd88 [Rewrite]Rewrite from_unixtime to reduce calling this function (#5444) c9ddd88 is described below commit c9ddd88e14f814e745ec0a3fc8f82450dd29256d Author: xinghuayu007 <1450306...@qq.com> AuthorDate: Thu Mar 4 22:31:28 2021 +0800 [Rewrite]Rewrite from_unixtime to reduce calling this function (#5444) from_unxitime is a cpu-exhausted function. SQL: select filed from table where from_unixtime(field) > '2021-03-02', if there are one million rows of data. Function from_unixtime will be called one million times, which will make query very slow. In issue #5443, we try to rewrite from_unixtime into timestamp to reduce calling this function. This rewriting can bring 2 times query performance improvement. --- .../java/org/apache/doris/analysis/Analyzer.java | 2 + .../doris/rewrite/RewriteFromUnixTimeRule.java | 108 +++++++++++++++++++++ .../org/apache/doris/planner/QueryPlanTest.java | 40 ++++++++ 3 files changed, 150 insertions(+) diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/Analyzer.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/Analyzer.java index 0956a6e..dabf2f5 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/Analyzer.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/Analyzer.java @@ -39,6 +39,7 @@ import org.apache.doris.rewrite.BetweenToCompoundRule; import org.apache.doris.rewrite.ExprRewriteRule; import org.apache.doris.rewrite.ExprRewriter; import org.apache.doris.rewrite.FoldConstantsRule; +import org.apache.doris.rewrite.RewriteFromUnixTimeRule; import org.apache.doris.rewrite.NormalizeBinaryPredicatesRule; import org.apache.doris.rewrite.mvrewrite.CountDistinctToBitmap; import org.apache.doris.rewrite.mvrewrite.CountDistinctToBitmapOrHLLRule; @@ -255,6 +256,7 @@ public class Analyzer { // pushdown and Parquet row group pruning based on min/max statistics. rules.add(NormalizeBinaryPredicatesRule.INSTANCE); rules.add(FoldConstantsRule.INSTANCE); + rules.add(RewriteFromUnixTimeRule.INSTANCE); exprRewriter_ = new ExprRewriter(rules); // init mv rewriter List<ExprRewriteRule> mvRewriteRules = Lists.newArrayList(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/rewrite/RewriteFromUnixTimeRule.java b/fe/fe-core/src/main/java/org/apache/doris/rewrite/RewriteFromUnixTimeRule.java new file mode 100644 index 0000000..a6d6d70 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/rewrite/RewriteFromUnixTimeRule.java @@ -0,0 +1,108 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.rewrite; + +import org.apache.doris.analysis.Analyzer; +import org.apache.doris.analysis.BinaryPredicate; +import org.apache.doris.analysis.BoolLiteral; +import org.apache.doris.analysis.CompoundPredicate; +import org.apache.doris.analysis.Expr; +import org.apache.doris.analysis.FunctionCallExpr; +import org.apache.doris.analysis.FunctionParams; +import org.apache.doris.analysis.LiteralExpr; +import org.apache.doris.analysis.SlotRef; +import org.apache.doris.catalog.Type; +import org.apache.doris.common.AnalysisException; + +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.Date; + +/* + * rewrite sql: select * from table where from_unixtime(query_time) > '2021-03-02 12:12:23' + * into: select * from table where query_time > 1614658343 + * query_time is integer type + * 1614658343 is the timestamp of 2021-03-02 12:12:23 + * this rewrite can improve the query performance, because from_unixtime is cpu-exhausted + * */ +public class RewriteFromUnixTimeRule implements ExprRewriteRule { + public static RewriteFromUnixTimeRule INSTANCE = new RewriteFromUnixTimeRule(); + + @Override + public Expr apply(Expr expr, Analyzer analyzer) throws AnalysisException { + if (!(expr instanceof BinaryPredicate)) { + return expr; + } + BinaryPredicate bp = (BinaryPredicate) expr; + Expr left = bp.getChild(0); + if (!(left instanceof FunctionCallExpr)) { + return expr; + } + FunctionCallExpr fce = (FunctionCallExpr) left; + if (!fce.getFnName().getFunction().equalsIgnoreCase("from_unixtime")) { + return expr; + } + FunctionParams params = fce.getParams(); + if (params == null) { + return expr; + } + // definition: from_unixtime(int, format) + if (params.exprs().size() != 1 && params.exprs().size() != 2) { + return expr; + } + Expr paramSlot = params.exprs().get(0); + if (!(paramSlot instanceof SlotRef)) { + return expr; + } + SlotRef sr = (SlotRef) paramSlot; + if (!sr.getColumn().getType().isIntegerType()) { + return new BoolLiteral(false); + } + Expr right = bp.getChild(1); + if (!(right instanceof LiteralExpr)) { + return expr; + } + LiteralExpr le = (LiteralExpr) right; + SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd"); + // default format is "yyyy-MM-dd HH:mm:ss" + if (params.exprs().size() == 1) { + format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + } else { + LiteralExpr fm = (LiteralExpr) params.exprs().get(1); + format = new SimpleDateFormat(fm.getStringValue()); + } + try { + Date date = format.parse(le.getStringValue()); + // it must adds low bound 0, because when a field contains negative data like -100, it will be queried as a result + if (bp.getOp() == BinaryPredicate.Operator.LT || bp.getOp() == BinaryPredicate.Operator.LE) { + BinaryPredicate r = new BinaryPredicate(bp.getOp(), sr, LiteralExpr.create(String.valueOf(date.getTime() / 1000), Type.BIGINT)); + BinaryPredicate l = new BinaryPredicate(BinaryPredicate.Operator.GE, sr, LiteralExpr.create("0", Type.BIGINT)); + return new CompoundPredicate(CompoundPredicate.Operator.AND, r, l); + } else if (bp.getOp() == BinaryPredicate.Operator.GT || bp.getOp() == BinaryPredicate.Operator.GE) { + // also it must adds upper bound 253402271999, because from_unixtime support time range is [1970-01-01 00:00:00 ~ 9999-12-31 23:59:59] + BinaryPredicate l = new BinaryPredicate(bp.getOp(), sr, LiteralExpr.create(String.valueOf(date.getTime() / 1000), Type.BIGINT)); + BinaryPredicate r = new BinaryPredicate(BinaryPredicate.Operator.LE, sr, LiteralExpr.create("253402271999", Type.BIGINT)); + return new CompoundPredicate(CompoundPredicate.Operator.AND, r, l); + } else { + return new BinaryPredicate(bp.getOp(), sr, LiteralExpr.create(String.valueOf(date.getTime() / 1000), Type.BIGINT)); + } + } catch (ParseException e) { + return expr; + } + } +} diff --git a/fe/fe-core/src/test/java/org/apache/doris/planner/QueryPlanTest.java b/fe/fe-core/src/test/java/org/apache/doris/planner/QueryPlanTest.java index e1bc9d3..bf4288c 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/planner/QueryPlanTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/planner/QueryPlanTest.java @@ -1420,6 +1420,46 @@ public class QueryPlanTest { explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql); Assert.assertTrue(explainString.contains("OUTPUT EXPRS:3 | 4")); } + + @Test + public void testFromUnixTimeRewrite() throws Exception { + connectContext.setDatabase("default_cluster:test"); + //default format + String sql = "select * from test1 where from_unixtime(query_time) > '2021-03-02 10:01:28'"; + String explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql); + Assert.assertTrue(explainString.contains("PREDICATES: `query_time` <= 253402271999, `query_time` > 1614650488")); + //format yyyy-MM-dd HH:mm:ss + sql = "select * from test1 where from_unixtime(query_time, 'yyyy-MM-dd HH:mm:ss') > '2021-03-02 10:01:28'"; + explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql); + Assert.assertTrue(explainString.contains("PREDICATES: `query_time` <= 253402271999, `query_time` > 1614650488")); + //format yyyy-MM-dd HH:mm + sql = "select * from test1 where from_unixtime(query_time, 'yyyy-MM-dd HH:mm') > '2021-03-02 10:01:28'"; + explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql); + Assert.assertTrue(explainString.contains("PREDICATES: `query_time` <= 253402271999, `query_time` > 1614650460")); + //format yyyy-MM-dd HH + sql = "select * from test1 where from_unixtime(query_time, 'yyyy-MM-dd HH') > '2021-03-02 10:01:28'"; + explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql); + Assert.assertTrue(explainString.contains("PREDICATES: `query_time` <= 253402271999, `query_time` > 1614650400")); + //format yyyy-MM-dd + sql = "select * from test1 where from_unixtime(query_time, 'yyyy-MM-dd') > '2021-03-02 10:01:28'"; + explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql); + Assert.assertTrue(explainString.contains("PREDICATES: `query_time` <= 253402271999, `query_time` > 1614614400")); + //format yyyy-MM + sql = "select * from test1 where from_unixtime(query_time, 'yyyy-MM') > '2021-03-02 10:01:28'"; + explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql); + Assert.assertTrue(explainString.contains("PREDICATES: `query_time` <= 253402271999, `query_time` > 1614528000")); + //format yyyy + sql = "select * from test1 where from_unixtime(query_time, 'yyyy') > '2021-03-02 10:01:28'"; + explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql); + Assert.assertTrue(explainString.contains("PREDICATES: `query_time` <= 253402271999, `query_time` > 1609430400")); + + //format less than + sql = "select * from test1 where from_unixtime(query_time, 'yyyy-MM-dd') < '2021-03-02 10:01:28'"; + explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql); + System.out.println("wangxixu-explain:"+explainString); + Assert.assertTrue(explainString.contains("PREDICATES: `query_time` < 1614614400, `query_time` >= 0")); + + } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org