This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new 4b0bee68d53 [fix](scan) Incorrect scan keys lead to wrong query 
results. (#40814)
4b0bee68d53 is described below

commit 4b0bee68d53111c090a9a04a4a633d9f7fc2e96b
Author: Jerry Hu <mrh...@gmail.com>
AuthorDate: Thu Sep 19 13:57:14 2024 +0800

    [fix](scan) Incorrect scan keys lead to wrong query results. (#40814)
    
    ## Proposed changes
    
    ```
    mysql [doris_14555]>select * from table_9436528_3;
    
+------+------+------+------+------------------------+--------------------+------+
    | col1 | col2 | col3 | col5 | col4                   | col6               | 
col7 |
    
+------+------+------+------+------------------------+--------------------+------+
    | -100 |    1 |  -82 |    1 | 2024-02-16 04:37:37.00 | -1299962421.904282 | 
NULL |
    | -100 |    1 |   92 |    1 | 2024-02-16 04:37:37.00 |   23423423.0324234 | 
NULL |
    | -100 |    0 |  -82 |    0 | 2023-11-11 10:49:43.00 |   840968969.872149 | 
NULL |
    ```
    wrong result:
    ```
    mysql [doris_14555]>select * from table_9436528_3 where col1 <= -100 and 
col2 in (true, false) and col3 = -82;
    
+------+------+------+------+------------------------+--------------------+------+
    | col1 | col2 | col3 | col5 | col4                   | col6               | 
col7 |
    
+------+------+------+------+------------------------+--------------------+------+
    | -100 |    1 |  -82 |    1 | 2024-02-16 04:37:37.00 | -1299962421.904282 | 
NULL |
    | -100 |    1 |   92 |    1 | 2024-02-16 04:37:37.00 |   23423423.0324234 | 
NULL |
    
+------+------+------+------+------------------------+--------------------+------+
    ```
---
 be/src/exec/olap_common.h                          |   6 +-
 be/src/pipeline/exec/olap_scan_operator.cpp        |  18 ++--
 .../correctness/test_scan_keys_with_bool_type.out  |  43 ++++++++
 .../test_scan_keys_with_bool_type.groovy           | 110 +++++++++++++++++++++
 4 files changed, 169 insertions(+), 8 deletions(-)

diff --git a/be/src/exec/olap_common.h b/be/src/exec/olap_common.h
index 9ba1b81d1b8..a4180938dfc 100644
--- a/be/src/exec/olap_common.h
+++ b/be/src/exec/olap_common.h
@@ -375,7 +375,7 @@ public:
 
     template <PrimitiveType primitive_type>
     Status extend_scan_key(ColumnValueRange<primitive_type>& range, int32_t 
max_scan_key_num,
-                           bool* exact_value, bool* eos);
+                           bool* exact_value, bool* eos, bool* should_break);
 
     Status get_key_range(std::vector<std::unique_ptr<OlapScanRange>>* 
key_range);
 
@@ -993,7 +993,8 @@ bool 
ColumnValueRange<primitive_type>::has_intersection(ColumnValueRange<primiti
 
 template <PrimitiveType primitive_type>
 Status OlapScanKeys::extend_scan_key(ColumnValueRange<primitive_type>& range,
-                                     int32_t max_scan_key_num, bool* 
exact_value, bool* eos) {
+                                     int32_t max_scan_key_num, bool* 
exact_value, bool* eos,
+                                     bool* should_break) {
     using CppType = typename PrimitiveTypeTraits<primitive_type>::CppType;
     using ConstIterator = typename std::set<CppType>::const_iterator;
 
@@ -1017,6 +1018,7 @@ Status 
OlapScanKeys::extend_scan_key(ColumnValueRange<primitive_type>& range,
                 range.convert_to_range_value();
                 *exact_value = false;
             } else {
+                *should_break = true;
                 return Status::OK();
             }
         }
diff --git a/be/src/pipeline/exec/olap_scan_operator.cpp 
b/be/src/pipeline/exec/olap_scan_operator.cpp
index 002e085ac0a..6a5e1f8beb8 100644
--- a/be/src/pipeline/exec/olap_scan_operator.cpp
+++ b/be/src/pipeline/exec/olap_scan_operator.cpp
@@ -468,9 +468,13 @@ Status OlapScanLocalState::_build_key_ranges_and_filters() 
{
         // we use `exact_range` to identify a key range is an exact range or 
not when we convert
         // it to `_scan_keys`. If `exact_range` is true, we can just discard 
it from `_olap_filters`.
         bool exact_range = true;
+
+        // If the `_scan_keys` cannot extend by the range of column, should 
stop.
+        bool should_break = false;
+
         bool eos = false;
-        for (int column_index = 0;
-             column_index < column_names.size() && 
!_scan_keys.has_range_value() && !eos;
+        for (int column_index = 0; column_index < column_names.size() &&
+                                   !_scan_keys.has_range_value() && !eos && 
!should_break;
              ++column_index) {
             auto iter = 
_colname_to_value_range.find(column_names[column_index]);
             if (_colname_to_value_range.end() == iter) {
@@ -484,8 +488,9 @@ Status OlapScanLocalState::_build_key_ranges_and_filters() {
                         // but the original range may be converted to olap 
filters, if it's not a exact_range.
                         auto temp_range = range;
                         if (range.get_fixed_value_size() <= 
p._max_pushdown_conditions_per_column) {
-                            RETURN_IF_ERROR(_scan_keys.extend_scan_key(
-                                    temp_range, p._max_scan_key_num, 
&exact_range, &eos));
+                            RETURN_IF_ERROR(
+                                    _scan_keys.extend_scan_key(temp_range, 
p._max_scan_key_num,
+                                                               &exact_range, 
&eos, &should_break));
                             if (exact_range) {
                                 _colname_to_value_range.erase(iter->first);
                             }
@@ -493,8 +498,9 @@ Status OlapScanLocalState::_build_key_ranges_and_filters() {
                             // if exceed max_pushdown_conditions_per_column, 
use whole_value_rang instead
                             // and will not erase from 
_colname_to_value_range, it must be not exact_range
                             temp_range.set_whole_value_range();
-                            RETURN_IF_ERROR(_scan_keys.extend_scan_key(
-                                    temp_range, p._max_scan_key_num, 
&exact_range, &eos));
+                            RETURN_IF_ERROR(
+                                    _scan_keys.extend_scan_key(temp_range, 
p._max_scan_key_num,
+                                                               &exact_range, 
&eos, &should_break));
                         }
                         return Status::OK();
                     },
diff --git a/regression-test/data/correctness/test_scan_keys_with_bool_type.out 
b/regression-test/data/correctness/test_scan_keys_with_bool_type.out
new file mode 100644
index 00000000000..d0448a74ed7
--- /dev/null
+++ b/regression-test/data/correctness/test_scan_keys_with_bool_type.out
@@ -0,0 +1,43 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !select1 --
+-100   false   -82     false   2023-11-11T10:49:43     8.40968969872149E8      
\N
+-100   true    -82     true    2024-02-16T04:37:37     -1.299962421904282E9    
\N
+-100   true    92      true    2024-02-16T04:37:37     2.34234230324234E7      
\N
+
+-- !select2 --
+-100   false   -82     false   2023-11-11T10:49:43     8.40968969872149E8      
\N
+-100   true    -82     true    2024-02-16T04:37:37     -1.299962421904282E9    
\N
+
+-- !select3 --
+-100   false   -82     false   2023-11-11T10:49:43     8.40968969872149E8      
\N
+-100   true    -82     true    2024-02-16T04:37:37     -1.299962421904282E9    
\N
+
+-- !select3 --
+-100   0       -82     false   2023-11-11T10:49:43     8.40968969872149E8      
\N
+-100   1       -82     true    2024-02-16T04:37:37     -1.299962421904282E9    
\N
+-100   1       92      true    2024-02-16T04:37:37     2.34234230324234E7      
\N
+-100   2       -82     true    2024-02-16T04:37:37     -1.299962421904282E9    
\N
+
+-- !select4 --
+-100   1       -82     true    2024-02-16T04:37:37     -1.299962421904282E9    
\N
+-100   2       -82     true    2024-02-16T04:37:37     -1.299962421904282E9    
\N
+
+-- !select5 --
+-100   0       -82     false   2023-11-11T10:49:43     8.40968969872149E8      
\N
+-100   1       -82     true    2024-02-16T04:37:37     -1.299962421904282E9    
\N
+-100   2       -82     true    2024-02-16T04:37:37     -1.299962421904282E9    
\N
+
+-- !select6 --
+-100   a       -82     false   2023-11-11T10:49:43     8.40968969872149E8      
\N
+-100   b       -82     true    2024-02-16T04:37:37     -1.299962421904282E9    
\N
+-100   b       92      true    2024-02-16T04:37:37     2.34234230324234E7      
\N
+-100   c       92      true    2024-02-16T04:37:37     2.34234230324234E7      
\N
+
+-- !select7 --
+-100   a       -82     false   2023-11-11T10:49:43     8.40968969872149E8      
\N
+-100   b       -82     true    2024-02-16T04:37:37     -1.299962421904282E9    
\N
+
+-- !select8 --
+-100   a       -82     false   2023-11-11T10:49:43     8.40968969872149E8      
\N
+-100   b       -82     true    2024-02-16T04:37:37     -1.299962421904282E9    
\N
+
diff --git 
a/regression-test/suites/correctness/test_scan_keys_with_bool_type.groovy 
b/regression-test/suites/correctness/test_scan_keys_with_bool_type.groovy
new file mode 100644
index 00000000000..1eaa6058236
--- /dev/null
+++ b/regression-test/suites/correctness/test_scan_keys_with_bool_type.groovy
@@ -0,0 +1,110 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_scan_keys_with_bool_type") {
+    sql """ DROP TABLE IF EXISTS test_scan_keys_with_bool_type """
+
+    sql """ 
+        CREATE TABLE `test_scan_keys_with_bool_type` (
+            `col1` tinyint NOT NULL,
+            `col2` boolean NOT NULL,
+            `col3` tinyint NOT NULL,
+            `col5` boolean REPLACE NOT NULL,
+            `col4` datetime(2) REPLACE NOT NULL,
+            `col6` double REPLACE_IF_NOT_NULL NULL,
+            `col7` datetime(3) REPLACE_IF_NOT_NULL NULL
+        ) ENGINE=OLAP
+        AGGREGATE KEY(`col1`, `col2`, `col3`)
+        DISTRIBUTED BY HASH(`col1`, `col2`, `col3`) BUCKETS 1
+        PROPERTIES (
+            "replication_allocation" = "tag.location.default: 1",
+            "disable_auto_compaction" = "true"
+        ); 
+    """
+
+    sql """ insert into test_scan_keys_with_bool_type values
+        ( -100 ,    0 ,  -82 ,    0 , '2023-11-11 10:49:43.00' ,   
840968969.872149 , NULL ),
+        ( -100 ,    1 ,  -82 ,    1 , '2024-02-16 04:37:37.00' , 
-1299962421.904282 , NULL ),
+        ( -100 ,    1 ,   92 ,    1 , '2024-02-16 04:37:37.00' ,   
23423423.0324234 , NULL );
+    """
+
+    qt_select1 " select * from test_scan_keys_with_bool_type order by 1, 2, 3, 
4, 5, 6, 7; "
+    qt_select2 " select * from test_scan_keys_with_bool_type where col1 <= 
-100 and col2 in (true, false) and col3 = -82 order by 1, 2, 3, 4, 5, 6, 7; "
+    qt_select3 " select * from test_scan_keys_with_bool_type where col1 <= 
-100 and col3 = -82 order by 1, 2, 3, 4, 5, 6, 7; "
+    sql """ DROP TABLE IF EXISTS test_scan_keys_with_bool_type2 """
+
+    sql """ 
+        CREATE TABLE `test_scan_keys_with_bool_type2` (
+            `col1` tinyint NOT NULL,
+            `col2` int NOT NULL,
+            `col3` tinyint NOT NULL,
+            `col5` boolean REPLACE NOT NULL,
+            `col4` datetime(2) REPLACE NOT NULL,
+            `col6` double REPLACE_IF_NOT_NULL NULL,
+            `col7` datetime(3) REPLACE_IF_NOT_NULL NULL
+        ) ENGINE=OLAP
+        AGGREGATE KEY(`col1`, `col2`, `col3`)
+        DISTRIBUTED BY HASH(`col1`, `col2`, `col3`) BUCKETS 1
+        PROPERTIES (
+            "replication_allocation" = "tag.location.default: 1",
+            "disable_auto_compaction" = "true"
+        ); 
+    """
+
+    sql """ insert into test_scan_keys_with_bool_type2 values
+        ( -100 ,    0 ,  -82 ,    0 , '2023-11-11 10:49:43.00' ,   
840968969.872149 , NULL ),
+        ( -100 ,    1 ,  -82 ,    1 , '2024-02-16 04:37:37.00' , 
-1299962421.904282 , NULL ),
+        ( -100 ,    2 ,  -82 ,    1 , '2024-02-16 04:37:37.00' , 
-1299962421.904282 , NULL ),
+        ( -100 ,    1 ,   92 ,    1 , '2024-02-16 04:37:37.00' ,   
23423423.0324234 , NULL );
+    """
+
+    qt_select3 " select * from test_scan_keys_with_bool_type2 order by 1, 2, 
3, 4, 5, 6, 7; "
+    qt_select4 " select * from test_scan_keys_with_bool_type2 where col1 <= 
-100 and col2 in (1, 2) and col3 = -82 order by 1, 2, 3, 4, 5, 6, 7; "
+    qt_select5 " select * from test_scan_keys_with_bool_type2 where col1 <= 
-100 and col3 = -82 order by 1, 2, 3, 4, 5, 6, 7; "
+
+
+    sql """ DROP TABLE IF EXISTS test_scan_keys_with_bool_type3 """
+
+    sql """ 
+        CREATE TABLE `test_scan_keys_with_bool_type3` (
+            `col1` tinyint NOT NULL,
+            `col2` char NOT NULL,
+            `col3` tinyint NOT NULL,
+            `col5` boolean REPLACE NOT NULL,
+            `col4` datetime(2) REPLACE NOT NULL,
+            `col6` double REPLACE_IF_NOT_NULL NULL,
+            `col7` datetime(3) REPLACE_IF_NOT_NULL NULL
+        ) ENGINE=OLAP
+        AGGREGATE KEY(`col1`, `col2`, `col3`)
+        DISTRIBUTED BY HASH(`col1`, `col2`, `col3`) BUCKETS 1
+        PROPERTIES (
+            "replication_allocation" = "tag.location.default: 1",
+            "disable_auto_compaction" = "true"
+        ); 
+    """
+
+    sql """ insert into test_scan_keys_with_bool_type3 values
+        ( -100 ,    'a' ,  -82 ,    0 , '2023-11-11 10:49:43.00' ,   
840968969.872149 , NULL ),
+        ( -100 ,    'b',  -82 ,    1 , '2024-02-16 04:37:37.00' , 
-1299962421.904282 , NULL ),
+        ( -100 ,    'b' ,   92 ,    1 , '2024-02-16 04:37:37.00' ,   
23423423.0324234 , NULL ),
+        ( -100 ,    'c' ,   92 ,    1 , '2024-02-16 04:37:37.00' ,   
23423423.0324234 , NULL );
+    """
+
+    qt_select6 " select * from test_scan_keys_with_bool_type3 order by 1, 2, 
3, 4, 5, 6, 7; "
+    qt_select7 " select * from test_scan_keys_with_bool_type3 where col1 <= 
-100 and col2 in ('a', 'b') and col3 = -82 order by 1, 2, 3, 4, 5, 6, 7; "
+    qt_select8 " select * from test_scan_keys_with_bool_type3 where col1 <= 
-100 and col3 = -82 order by 1, 2, 3, 4, 5, 6, 7; "
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to