This is an automated email from the ASF dual-hosted git repository. morrysnow pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new f7e129934e [fix](nereids) only order by slot reference could use topn opt (#18622) f7e129934e is described below commit f7e129934e892de77b61c9579addd9a68b838594 Author: starocean999 <40539150+starocean...@users.noreply.github.com> AuthorDate: Fri Apr 14 20:59:06 2023 +0800 [fix](nereids) only order by slot reference could use topn opt (#18622) select cast(k1 as INT) as id from tbl1 order by id limit 2; is not valid for topN optimization, because 'id' is a cast expr not a table column from scan node. This pr address this issue. --- .../java/org/apache/doris/analysis/CastExpr.java | 2 + .../doris/nereids/processor/post/TopNScanOpt.java | 9 ++- regression-test/data/nereids_p0/sort/sort.out | 40 ++++++++++++++ regression-test/suites/nereids_p0/sort/sort.groovy | 64 ++++++++++++++++++++++ 4 files changed, 114 insertions(+), 1 deletion(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/CastExpr.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/CastExpr.java index b2779c33cf..2ea293f246 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/CastExpr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/CastExpr.java @@ -130,10 +130,12 @@ public class CastExpr extends Expr { if (type.isDecimalV2() && e.type.isDecimalV2()) { getChild(0).setType(type); } + analysisDone(); return; } if (e.type.isNull()) { + analysisDone(); return; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/TopNScanOpt.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/TopNScanOpt.java index 721ba3773f..7a2d93f7c1 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/TopNScanOpt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/TopNScanOpt.java @@ -51,7 +51,14 @@ public class TopNScanOpt extends PlanPostProcessor { return topN; } Expression firstKey = topN.getOrderKeys().get(0).getExpr(); - if (!(firstKey instanceof SlotReference)) { + + // if firstKey's column is not present, it means the firstKey is not a original column from scan node + // for example: "select cast(k1 as INT) as id from tbl1 order by id limit 2;" the firstKey "id" is + // a cast expr which is not from tbl1 and its column is not present. + // On the other hand "select k1 as id from tbl1 order by id limit 2;" the firstKey "id" is just an alias of k1 + // so its column is present which is valid for topN optimize + // see Alias::toSlot() method to get how column info is passed around by alias of slotReference + if (!(firstKey instanceof SlotReference) || !((SlotReference) firstKey).getColumn().isPresent()) { return topN; } if (firstKey.getDataType().isStringLikeType() diff --git a/regression-test/data/nereids_p0/sort/sort.out b/regression-test/data/nereids_p0/sort/sort.out index 4e898a3f62..ffddb115bc 100644 --- a/regression-test/data/nereids_p0/sort/sort.out +++ b/regression-test/data/nereids_p0/sort/sort.out @@ -10,3 +10,43 @@ -- !sort_string_on_fe -- true +-- !sql -- + +-- !sql -- +1 +2 +3 +4 + +-- !sql -- +0 +0 +1 +1 + +-- !sql -- +1 +2 +3 +4 + +-- !sql -- +1 +2 +3 +4 + +-- !sql -- +1 +2 + +-- !sql -- +1 +2 + +-- !sql_orderby_non_overlap_desc -- +2023-03-21T09:00 area1 p0 aaaaa ddddd6 100.000 100.000 100.000 100.000 2023-03-21T17:00 +2023-03-21T08:00 area1 p0 aaaaa ddddd5 100.000 100.000 100.000 100.000 2023-03-21T17:00 +2023-03-21T07:00 area1 p0 aaaaa ddddd2 100.000 100.000 100.000 100.000 2023-03-21T17:00 +2023-03-21T06:00 area1 p0 aaaaa ddddd1 100.000 100.000 100.000 100.000 2023-03-21T17:00 + diff --git a/regression-test/suites/nereids_p0/sort/sort.groovy b/regression-test/suites/nereids_p0/sort/sort.groovy index c7869b7ceb..11f4bc5dfb 100644 --- a/regression-test/suites/nereids_p0/sort/sort.groovy +++ b/regression-test/suites/nereids_p0/sort/sort.groovy @@ -25,4 +25,68 @@ suite("sort") { qt_sort_string_single_column """ select * from ( select '汇总' as a union all select '2022-01-01' as a ) a order by 1 """ qt_sort_string_multiple_columns """ select * from ( select '汇总' as a,1 as b union all select '2022-01-01' as a,1 as b ) a order by 1,2 """ qt_sort_string_on_fe """ select '汇总' > '2022-01-01' """ + + sql """CREATE TABLE IF NOT EXISTS Test2PhaseSortWhenAggTable + (`l1` VARCHAR(20) NOT NULL, `l2` VARCHAR(20) NOT NULL, `id` INT REPLACE NOT NULL, `maximum` INT MAX DEFAULT "0" ) + ENGINE=olap AGGREGATE KEY(`l1`, `l2`) PARTITION BY LIST(`l1`, `l2`) ( PARTITION `p1` VALUES IN (("a", "a"), ("b", "b"), ("c", "c")), + PARTITION `p2` VALUES IN (("d", "d"), ("e", "e"), ("f", "f")), PARTITION `p3` VALUES IN (("g", "g"), ("h", "h"), ("i", "i")) ) DISTRIBUTED BY HASH(`l1`) BUCKETS 2 PROPERTIES ( "replication_num" = "1" )""" + + sql """insert into Test2PhaseSortWhenAggTable values ("a", "a", 1, 1), ("b", "b", 3, 2), ("c", "c", 3, 3), ("d", "d", 4, 4), ("e", "e", 5, 5), ("f", "f", 6, 6), ("g", "g", 7, 7), ("h", "h", 8, 8), ("i", "i", 9, 9)""" + + qt_sql """ + SELECT /*+ SET_VAR(query_timeout = 600) */ ref_0.`l1` AS c0, + bitmap_empty() AS c1, + ref_0.`l1` AS c2 + FROM Test2PhaseSortWhenAggTable AS ref_0 + WHERE ref_0.`l2` IS NOT NULL + ORDER BY ref_0.`l1` DESC + LIMIT 110 + OFFSET 130 + """ + + sql """drop table if exists tbl1""" + sql """create table tbl1 (k1 varchar(100), k2 string) distributed by hash(k1) buckets 1 properties("replication_num" = "1");""" + sql """insert into tbl1 values(1, "alice");""" + sql """insert into tbl1 values(2, "bob");""" + sql """insert into tbl1 values(3, "mark");""" + sql """insert into tbl1 values(4, "thor");""" + qt_sql """select cast(k1 as INT) as id from tbl1 order by id;""" + qt_sql """select cast(k1 as INT) % 2 as id from tbl1 order by id;""" + qt_sql """select cast(k1 as BIGINT) as id from tbl1 order by id;""" + qt_sql """select cast(k1 as STRING) as id from tbl1 order by id;""" + qt_sql """select cast(k1 as INT) as id from tbl1 order by id limit 2""" + qt_sql """select cast(k1 as STRING) as id from tbl1 order by id limit 2""" + + sql """ DROP TABLE if exists `sort_non_overlap`; """ + sql """ CREATE TABLE `sort_non_overlap` ( + `time_period` datetime NOT NULL, + `area_name` varchar(255) NOT NULL, + `province` varchar(255) NOT NULL, + `res_name` varchar(255) NOT NULL, + `dev` varchar(255) NOT NULL, + `dec0` decimal(10, 3) REPLACE_IF_NOT_NULL NULL, + `dec1` decimal(10, 3) REPLACE_IF_NOT_NULL NULL, + `dec2` decimal(10, 3) REPLACE_IF_NOT_NULL NULL, + `dec3` decimal(10, 3) REPLACE_IF_NOT_NULL NULL, + `update_time` datetime REPLACE NULL + ) ENGINE=OLAP + AGGREGATE KEY(`time_period`, `area_name`, `province`, `res_name`, `dev`) + DISTRIBUTED BY HASH(`area_name`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "disable_auto_compaction" = "true" + ); + """ + + sql """ insert into sort_non_overlap values + ('2023-03-21 06:00:00', 'area1', 'p0', 'aaaaa', 'ddddd1', 100, 100, 100, 100, '2023-03-21 17:00:00'), + ('2023-03-21 07:00:00', 'area1', 'p0', 'aaaaa', 'ddddd2', 100, 100, 100, 100, '2023-03-21 17:00:00'); + """ + + sql """ insert into sort_non_overlap values + ('2023-03-21 08:00:00', 'area1', 'p0', 'aaaaa', 'ddddd5', 100, 100, 100, 100, '2023-03-21 17:00:00'), + ('2023-03-21 09:00:00', 'area1', 'p0', 'aaaaa', 'ddddd6', 100, 100, 100, 100, '2023-03-21 17:00:00'); + """ + + qt_sql_orderby_non_overlap_desc """ select * from sort_non_overlap order by time_period desc limit 4; """ } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org