This is an automated email from the ASF dual-hosted git repository. dataroaring pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.0 by this push: new 4b0bee68d53 [fix](scan) Incorrect scan keys lead to wrong query results. (#40814) 4b0bee68d53 is described below commit 4b0bee68d53111c090a9a04a4a633d9f7fc2e96b Author: Jerry Hu <mrh...@gmail.com> AuthorDate: Thu Sep 19 13:57:14 2024 +0800 [fix](scan) Incorrect scan keys lead to wrong query results. (#40814) ## Proposed changes ``` mysql [doris_14555]>select * from table_9436528_3; +------+------+------+------+------------------------+--------------------+------+ | col1 | col2 | col3 | col5 | col4 | col6 | col7 | +------+------+------+------+------------------------+--------------------+------+ | -100 | 1 | -82 | 1 | 2024-02-16 04:37:37.00 | -1299962421.904282 | NULL | | -100 | 1 | 92 | 1 | 2024-02-16 04:37:37.00 | 23423423.0324234 | NULL | | -100 | 0 | -82 | 0 | 2023-11-11 10:49:43.00 | 840968969.872149 | NULL | ``` wrong result: ``` mysql [doris_14555]>select * from table_9436528_3 where col1 <= -100 and col2 in (true, false) and col3 = -82; +------+------+------+------+------------------------+--------------------+------+ | col1 | col2 | col3 | col5 | col4 | col6 | col7 | +------+------+------+------+------------------------+--------------------+------+ | -100 | 1 | -82 | 1 | 2024-02-16 04:37:37.00 | -1299962421.904282 | NULL | | -100 | 1 | 92 | 1 | 2024-02-16 04:37:37.00 | 23423423.0324234 | NULL | +------+------+------+------+------------------------+--------------------+------+ ``` --- be/src/exec/olap_common.h | 6 +- be/src/pipeline/exec/olap_scan_operator.cpp | 18 ++-- .../correctness/test_scan_keys_with_bool_type.out | 43 ++++++++ .../test_scan_keys_with_bool_type.groovy | 110 +++++++++++++++++++++ 4 files changed, 169 insertions(+), 8 deletions(-) diff --git a/be/src/exec/olap_common.h b/be/src/exec/olap_common.h index 9ba1b81d1b8..a4180938dfc 100644 --- a/be/src/exec/olap_common.h +++ b/be/src/exec/olap_common.h @@ -375,7 +375,7 @@ public: template <PrimitiveType primitive_type> Status extend_scan_key(ColumnValueRange<primitive_type>& range, int32_t max_scan_key_num, - bool* exact_value, bool* eos); + bool* exact_value, bool* eos, bool* should_break); Status get_key_range(std::vector<std::unique_ptr<OlapScanRange>>* key_range); @@ -993,7 +993,8 @@ bool ColumnValueRange<primitive_type>::has_intersection(ColumnValueRange<primiti template <PrimitiveType primitive_type> Status OlapScanKeys::extend_scan_key(ColumnValueRange<primitive_type>& range, - int32_t max_scan_key_num, bool* exact_value, bool* eos) { + int32_t max_scan_key_num, bool* exact_value, bool* eos, + bool* should_break) { using CppType = typename PrimitiveTypeTraits<primitive_type>::CppType; using ConstIterator = typename std::set<CppType>::const_iterator; @@ -1017,6 +1018,7 @@ Status OlapScanKeys::extend_scan_key(ColumnValueRange<primitive_type>& range, range.convert_to_range_value(); *exact_value = false; } else { + *should_break = true; return Status::OK(); } } diff --git a/be/src/pipeline/exec/olap_scan_operator.cpp b/be/src/pipeline/exec/olap_scan_operator.cpp index 002e085ac0a..6a5e1f8beb8 100644 --- a/be/src/pipeline/exec/olap_scan_operator.cpp +++ b/be/src/pipeline/exec/olap_scan_operator.cpp @@ -468,9 +468,13 @@ Status OlapScanLocalState::_build_key_ranges_and_filters() { // we use `exact_range` to identify a key range is an exact range or not when we convert // it to `_scan_keys`. If `exact_range` is true, we can just discard it from `_olap_filters`. bool exact_range = true; + + // If the `_scan_keys` cannot extend by the range of column, should stop. + bool should_break = false; + bool eos = false; - for (int column_index = 0; - column_index < column_names.size() && !_scan_keys.has_range_value() && !eos; + for (int column_index = 0; column_index < column_names.size() && + !_scan_keys.has_range_value() && !eos && !should_break; ++column_index) { auto iter = _colname_to_value_range.find(column_names[column_index]); if (_colname_to_value_range.end() == iter) { @@ -484,8 +488,9 @@ Status OlapScanLocalState::_build_key_ranges_and_filters() { // but the original range may be converted to olap filters, if it's not a exact_range. auto temp_range = range; if (range.get_fixed_value_size() <= p._max_pushdown_conditions_per_column) { - RETURN_IF_ERROR(_scan_keys.extend_scan_key( - temp_range, p._max_scan_key_num, &exact_range, &eos)); + RETURN_IF_ERROR( + _scan_keys.extend_scan_key(temp_range, p._max_scan_key_num, + &exact_range, &eos, &should_break)); if (exact_range) { _colname_to_value_range.erase(iter->first); } @@ -493,8 +498,9 @@ Status OlapScanLocalState::_build_key_ranges_and_filters() { // if exceed max_pushdown_conditions_per_column, use whole_value_rang instead // and will not erase from _colname_to_value_range, it must be not exact_range temp_range.set_whole_value_range(); - RETURN_IF_ERROR(_scan_keys.extend_scan_key( - temp_range, p._max_scan_key_num, &exact_range, &eos)); + RETURN_IF_ERROR( + _scan_keys.extend_scan_key(temp_range, p._max_scan_key_num, + &exact_range, &eos, &should_break)); } return Status::OK(); }, diff --git a/regression-test/data/correctness/test_scan_keys_with_bool_type.out b/regression-test/data/correctness/test_scan_keys_with_bool_type.out new file mode 100644 index 00000000000..d0448a74ed7 --- /dev/null +++ b/regression-test/data/correctness/test_scan_keys_with_bool_type.out @@ -0,0 +1,43 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !select1 -- +-100 false -82 false 2023-11-11T10:49:43 8.40968969872149E8 \N +-100 true -82 true 2024-02-16T04:37:37 -1.299962421904282E9 \N +-100 true 92 true 2024-02-16T04:37:37 2.34234230324234E7 \N + +-- !select2 -- +-100 false -82 false 2023-11-11T10:49:43 8.40968969872149E8 \N +-100 true -82 true 2024-02-16T04:37:37 -1.299962421904282E9 \N + +-- !select3 -- +-100 false -82 false 2023-11-11T10:49:43 8.40968969872149E8 \N +-100 true -82 true 2024-02-16T04:37:37 -1.299962421904282E9 \N + +-- !select3 -- +-100 0 -82 false 2023-11-11T10:49:43 8.40968969872149E8 \N +-100 1 -82 true 2024-02-16T04:37:37 -1.299962421904282E9 \N +-100 1 92 true 2024-02-16T04:37:37 2.34234230324234E7 \N +-100 2 -82 true 2024-02-16T04:37:37 -1.299962421904282E9 \N + +-- !select4 -- +-100 1 -82 true 2024-02-16T04:37:37 -1.299962421904282E9 \N +-100 2 -82 true 2024-02-16T04:37:37 -1.299962421904282E9 \N + +-- !select5 -- +-100 0 -82 false 2023-11-11T10:49:43 8.40968969872149E8 \N +-100 1 -82 true 2024-02-16T04:37:37 -1.299962421904282E9 \N +-100 2 -82 true 2024-02-16T04:37:37 -1.299962421904282E9 \N + +-- !select6 -- +-100 a -82 false 2023-11-11T10:49:43 8.40968969872149E8 \N +-100 b -82 true 2024-02-16T04:37:37 -1.299962421904282E9 \N +-100 b 92 true 2024-02-16T04:37:37 2.34234230324234E7 \N +-100 c 92 true 2024-02-16T04:37:37 2.34234230324234E7 \N + +-- !select7 -- +-100 a -82 false 2023-11-11T10:49:43 8.40968969872149E8 \N +-100 b -82 true 2024-02-16T04:37:37 -1.299962421904282E9 \N + +-- !select8 -- +-100 a -82 false 2023-11-11T10:49:43 8.40968969872149E8 \N +-100 b -82 true 2024-02-16T04:37:37 -1.299962421904282E9 \N + diff --git a/regression-test/suites/correctness/test_scan_keys_with_bool_type.groovy b/regression-test/suites/correctness/test_scan_keys_with_bool_type.groovy new file mode 100644 index 00000000000..1eaa6058236 --- /dev/null +++ b/regression-test/suites/correctness/test_scan_keys_with_bool_type.groovy @@ -0,0 +1,110 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_scan_keys_with_bool_type") { + sql """ DROP TABLE IF EXISTS test_scan_keys_with_bool_type """ + + sql """ + CREATE TABLE `test_scan_keys_with_bool_type` ( + `col1` tinyint NOT NULL, + `col2` boolean NOT NULL, + `col3` tinyint NOT NULL, + `col5` boolean REPLACE NOT NULL, + `col4` datetime(2) REPLACE NOT NULL, + `col6` double REPLACE_IF_NOT_NULL NULL, + `col7` datetime(3) REPLACE_IF_NOT_NULL NULL + ) ENGINE=OLAP + AGGREGATE KEY(`col1`, `col2`, `col3`) + DISTRIBUTED BY HASH(`col1`, `col2`, `col3`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "disable_auto_compaction" = "true" + ); + """ + + sql """ insert into test_scan_keys_with_bool_type values + ( -100 , 0 , -82 , 0 , '2023-11-11 10:49:43.00' , 840968969.872149 , NULL ), + ( -100 , 1 , -82 , 1 , '2024-02-16 04:37:37.00' , -1299962421.904282 , NULL ), + ( -100 , 1 , 92 , 1 , '2024-02-16 04:37:37.00' , 23423423.0324234 , NULL ); + """ + + qt_select1 " select * from test_scan_keys_with_bool_type order by 1, 2, 3, 4, 5, 6, 7; " + qt_select2 " select * from test_scan_keys_with_bool_type where col1 <= -100 and col2 in (true, false) and col3 = -82 order by 1, 2, 3, 4, 5, 6, 7; " + qt_select3 " select * from test_scan_keys_with_bool_type where col1 <= -100 and col3 = -82 order by 1, 2, 3, 4, 5, 6, 7; " + sql """ DROP TABLE IF EXISTS test_scan_keys_with_bool_type2 """ + + sql """ + CREATE TABLE `test_scan_keys_with_bool_type2` ( + `col1` tinyint NOT NULL, + `col2` int NOT NULL, + `col3` tinyint NOT NULL, + `col5` boolean REPLACE NOT NULL, + `col4` datetime(2) REPLACE NOT NULL, + `col6` double REPLACE_IF_NOT_NULL NULL, + `col7` datetime(3) REPLACE_IF_NOT_NULL NULL + ) ENGINE=OLAP + AGGREGATE KEY(`col1`, `col2`, `col3`) + DISTRIBUTED BY HASH(`col1`, `col2`, `col3`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "disable_auto_compaction" = "true" + ); + """ + + sql """ insert into test_scan_keys_with_bool_type2 values + ( -100 , 0 , -82 , 0 , '2023-11-11 10:49:43.00' , 840968969.872149 , NULL ), + ( -100 , 1 , -82 , 1 , '2024-02-16 04:37:37.00' , -1299962421.904282 , NULL ), + ( -100 , 2 , -82 , 1 , '2024-02-16 04:37:37.00' , -1299962421.904282 , NULL ), + ( -100 , 1 , 92 , 1 , '2024-02-16 04:37:37.00' , 23423423.0324234 , NULL ); + """ + + qt_select3 " select * from test_scan_keys_with_bool_type2 order by 1, 2, 3, 4, 5, 6, 7; " + qt_select4 " select * from test_scan_keys_with_bool_type2 where col1 <= -100 and col2 in (1, 2) and col3 = -82 order by 1, 2, 3, 4, 5, 6, 7; " + qt_select5 " select * from test_scan_keys_with_bool_type2 where col1 <= -100 and col3 = -82 order by 1, 2, 3, 4, 5, 6, 7; " + + + sql """ DROP TABLE IF EXISTS test_scan_keys_with_bool_type3 """ + + sql """ + CREATE TABLE `test_scan_keys_with_bool_type3` ( + `col1` tinyint NOT NULL, + `col2` char NOT NULL, + `col3` tinyint NOT NULL, + `col5` boolean REPLACE NOT NULL, + `col4` datetime(2) REPLACE NOT NULL, + `col6` double REPLACE_IF_NOT_NULL NULL, + `col7` datetime(3) REPLACE_IF_NOT_NULL NULL + ) ENGINE=OLAP + AGGREGATE KEY(`col1`, `col2`, `col3`) + DISTRIBUTED BY HASH(`col1`, `col2`, `col3`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "disable_auto_compaction" = "true" + ); + """ + + sql """ insert into test_scan_keys_with_bool_type3 values + ( -100 , 'a' , -82 , 0 , '2023-11-11 10:49:43.00' , 840968969.872149 , NULL ), + ( -100 , 'b', -82 , 1 , '2024-02-16 04:37:37.00' , -1299962421.904282 , NULL ), + ( -100 , 'b' , 92 , 1 , '2024-02-16 04:37:37.00' , 23423423.0324234 , NULL ), + ( -100 , 'c' , 92 , 1 , '2024-02-16 04:37:37.00' , 23423423.0324234 , NULL ); + """ + + qt_select6 " select * from test_scan_keys_with_bool_type3 order by 1, 2, 3, 4, 5, 6, 7; " + qt_select7 " select * from test_scan_keys_with_bool_type3 where col1 <= -100 and col2 in ('a', 'b') and col3 = -82 order by 1, 2, 3, 4, 5, 6, 7; " + qt_select8 " select * from test_scan_keys_with_bool_type3 where col1 <= -100 and col3 = -82 order by 1, 2, 3, 4, 5, 6, 7; " +} --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org