This is an automated email from the ASF dual-hosted git repository.
airborne pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 21e5374f5a2 [fix](inverted index) Error parsing NULL_LITERAL (#37842)
21e5374f5a2 is described below
commit 21e5374f5a277c5ec9d05cb3727e95a612661e16
Author: zzzxl <[email protected]>
AuthorDate: Tue Jul 16 11:19:22 2024 +0800
[fix](inverted index) Error parsing NULL_LITERAL (#37842)
## Proposed changes
1. NULL_LITERAL and other LITERAL will enter the else process
incorrectly
2. SELECT * FROM table_name WHERE column_name NOT IN (1, 2, NULL); The
NOT IN operator returns an empty result set when the collection contains
NULL.
---
be/src/olap/rowset/segment_v2/segment_iterator.cpp | 6 +-
be/src/pipeline/exec/scan_operator.cpp | 19 ++-
.../test_index_inlist_fault_injection.out | 36 +++++
.../test_index_rqg_bug4.out} | 31 ++--
.../test_index_inlist_fault_injection.groovy | 23 +++
.../inverted_index_p0/test_index_rqg_bug2.groovy | 2 +-
.../inverted_index_p0/test_index_rqg_bug4.groovy | 162 +++++++++++++++++++++
7 files changed, 250 insertions(+), 29 deletions(-)
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index 6a66ae6fc14..a0b893bb24b 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -753,6 +753,7 @@ bool SegmentIterator::_is_literal_node(const
TExprNodeType::type& node_type) {
case TExprNodeType::DECIMAL_LITERAL:
case TExprNodeType::STRING_LITERAL:
case TExprNodeType::DATE_LITERAL:
+ case TExprNodeType::NULL_LITERAL:
return true;
default:
return false;
@@ -2811,7 +2812,8 @@ void
SegmentIterator::_calculate_pred_in_remaining_conjunct_root(
} else if (_is_literal_node(node_type)) {
auto v_literal_expr = static_cast<const
doris::vectorized::VLiteral*>(expr.get());
_column_predicate_info->query_values.insert(v_literal_expr->value());
- } else {
+ } else if (node_type == TExprNodeType::BINARY_PRED || node_type ==
TExprNodeType::MATCH_PRED ||
+ node_type == TExprNodeType::IN_PRED) {
if (node_type == TExprNodeType::MATCH_PRED) {
_column_predicate_info->query_op = "match";
} else if (node_type == TExprNodeType::IN_PRED) {
@@ -2820,7 +2822,7 @@ void
SegmentIterator::_calculate_pred_in_remaining_conjunct_root(
} else {
_column_predicate_info->query_op = "not_in";
}
- } else if (node_type != TExprNodeType::COMPOUND_PRED) {
+ } else {
_column_predicate_info->query_op = expr->fn().name.function_name;
}
diff --git a/be/src/pipeline/exec/scan_operator.cpp
b/be/src/pipeline/exec/scan_operator.cpp
index a72514fd0b1..c3fd0457d5e 100644
--- a/be/src/pipeline/exec/scan_operator.cpp
+++ b/be/src/pipeline/exec/scan_operator.cpp
@@ -1100,18 +1100,12 @@ Status
ScanLocalState<Derived>::_normalize_in_and_not_in_compound_predicate(
auto hybrid_set = expr->get_set_func();
if (hybrid_set != nullptr) {
- if (hybrid_set->size() <=
- _parent->cast<typename
Derived::Parent>()._max_pushdown_conditions_per_column) {
- iter = hybrid_set->begin();
- } else {
- _filter_predicates.in_filters.emplace_back(slot->col_name(),
expr->get_set_func());
- *pdt = PushDownType::ACCEPTABLE;
- return Status::OK();
- }
+ *pdt = PushDownType::UNACCEPTABLE;
+ return Status::OK();
} else {
- vectorized::VInPredicate* pred =
static_cast<vectorized::VInPredicate*>(expr);
+ auto* pred = static_cast<vectorized::VInPredicate*>(expr);
- vectorized::InState* state =
reinterpret_cast<vectorized::InState*>(
+ auto* state = reinterpret_cast<vectorized::InState*>(
expr_ctx->fn_context(pred->fn_context_index())
->get_function_state(FunctionContext::FRAGMENT_LOCAL));
@@ -1120,6 +1114,11 @@ Status
ScanLocalState<Derived>::_normalize_in_and_not_in_compound_predicate(
}
iter = state->hybrid_set->begin();
+
+ if (state->hybrid_set->contain_null()) {
+ *pdt = PushDownType::UNACCEPTABLE;
+ return Status::OK();
+ }
}
while (iter->has_next()) {
diff --git
a/regression-test/data/fault_injection_p0/test_index_inlist_fault_injection.out
b/regression-test/data/fault_injection_p0/test_index_inlist_fault_injection.out
index 9fbd1c8e252..7e9336a12c9 100644
---
a/regression-test/data/fault_injection_p0/test_index_inlist_fault_injection.out
+++
b/regression-test/data/fault_injection_p0/test_index_inlist_fault_injection.out
@@ -17,3 +17,39 @@
-- !sql --
208
+-- !sql --
+30
+
+-- !sql --
+0
+
+-- !sql --
+0
+
+-- !sql --
+34
+
+-- !sql --
+2
+
+-- !sql --
+2
+
+-- !sql --
+30
+
+-- !sql --
+0
+
+-- !sql --
+0
+
+-- !sql --
+34
+
+-- !sql --
+2
+
+-- !sql --
+2
+
diff --git
a/regression-test/data/fault_injection_p0/test_index_inlist_fault_injection.out
b/regression-test/data/inverted_index_p0/test_index_rqg_bug4.out
similarity index 57%
copy from
regression-test/data/fault_injection_p0/test_index_inlist_fault_injection.out
copy to regression-test/data/inverted_index_p0/test_index_rqg_bug4.out
index 9fbd1c8e252..784795cbbc1 100644
---
a/regression-test/data/fault_injection_p0/test_index_inlist_fault_injection.out
+++ b/regression-test/data/inverted_index_p0/test_index_rqg_bug4.out
@@ -1,19 +1,18 @@
-- This file is automatically generated. You should know what you did if you
want to edit this
-- !sql --
-8
-
--- !sql --
-996
-
--- !sql --
-210
-
--- !sql --
-8
-
--- !sql --
-998
-
--- !sql --
-208
+\N
+a
+b
+f
+h
+i
+j
+k
+l
+o
+p
+q
+v
+y
+z
diff --git
a/regression-test/suites/fault_injection_p0/test_index_inlist_fault_injection.groovy
b/regression-test/suites/fault_injection_p0/test_index_inlist_fault_injection.groovy
index 52f9ed5c656..57ec94d9f99 100644
---
a/regression-test/suites/fault_injection_p0/test_index_inlist_fault_injection.groovy
+++
b/regression-test/suites/fault_injection_p0/test_index_inlist_fault_injection.groovy
@@ -90,6 +90,29 @@ suite("test_index_inlist_fault_injection", "nonConcurrent") {
} finally {
GetDebugPoint().disableDebugPointForAllBEs("segment_iterator._rowid_result_for_index")
}
+
+ try {
+ sql """ set enable_common_expr_pushdown = true; """
+
+ qt_sql """ select count() from ${indexTbName} where (clientip in
('40.135.0.0', '232.0.0.0', NULL, '26.1.0.0', '247.37.0.0')); """
+ qt_sql """ select count() from ${indexTbName} where (clientip not in
('40.135.0.0', '232.0.0.0', NULL, '26.1.0.0', '247.37.0.0')); """
+ qt_sql """ select count() from ${indexTbName} where (clientip match
'2.1.0.0' and clientip in ('40.135.0.0', '232.0.0.0', NULL, '26.1.0.0',
'247.37.0.0') and clientip match '120.1.0.0'); """
+ qt_sql """ select count() from ${indexTbName} where (clientip match
'2.1.0.0' or clientip in ('40.135.0.0', '232.0.0.0', NULL, '26.1.0.0',
'247.37.0.0') or clientip match '120.1.0.0'); """
+ qt_sql """ select count() from ${indexTbName} where (clientip match
'2.1.0.0' and clientip in ('40.135.0.0', '232.0.0.0', NULL, '26.1.0.0',
'247.37.0.0') or clientip match '120.1.0.0'); """
+ qt_sql """ select count() from ${indexTbName} where (clientip match
'2.1.0.0' or clientip in ('40.135.0.0', '232.0.0.0', NULL, '26.1.0.0',
'247.37.0.0') and clientip match '120.1.0.0'); """
+
+ sql """ set enable_common_expr_pushdown = false; """
+
+ qt_sql """ select count() from ${indexTbName} where (clientip in
('40.135.0.0', '232.0.0.0', NULL, '26.1.0.0', '247.37.0.0')); """
+ qt_sql """ select count() from ${indexTbName} where (clientip not in
('40.135.0.0', '232.0.0.0', NULL, '26.1.0.0', '247.37.0.0')); """
+ qt_sql """ select count() from ${indexTbName} where (clientip match
'2.1.0.0' and clientip in ('40.135.0.0', '232.0.0.0', NULL, '26.1.0.0',
'247.37.0.0') and clientip match '120.1.0.0'); """
+ qt_sql """ select count() from ${indexTbName} where (clientip match
'2.1.0.0' or clientip in ('40.135.0.0', '232.0.0.0', NULL, '26.1.0.0',
'247.37.0.0') or clientip match '120.1.0.0'); """
+ qt_sql """ select count() from ${indexTbName} where (clientip match
'2.1.0.0' and clientip in ('40.135.0.0', '232.0.0.0', NULL, '26.1.0.0',
'247.37.0.0') or clientip match '120.1.0.0'); """
+ qt_sql """ select count() from ${indexTbName} where (clientip match
'2.1.0.0' or clientip in ('40.135.0.0', '232.0.0.0', NULL, '26.1.0.0',
'247.37.0.0') and clientip match '120.1.0.0'); """
+
+ sql """ set enable_common_expr_pushdown = true; """
+ } finally {
+ }
} finally {
}
}
\ No newline at end of file
diff --git
a/regression-test/suites/inverted_index_p0/test_index_rqg_bug2.groovy
b/regression-test/suites/inverted_index_p0/test_index_rqg_bug2.groovy
index bddbfdc4ab0..faede0bc3c1 100644
--- a/regression-test/suites/inverted_index_p0/test_index_rqg_bug2.groovy
+++ b/regression-test/suites/inverted_index_p0/test_index_rqg_bug2.groovy
@@ -14,7 +14,7 @@
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
-suite("test_index_rqg_bug2", "test_index_rqg_bug2"){
+suite("test_index_rqg_bug2", "test_index_rqg_bug"){
def table1 = "test_index_rqg_bug2"
sql "drop table if exists ${table1}"
diff --git
a/regression-test/suites/inverted_index_p0/test_index_rqg_bug4.groovy
b/regression-test/suites/inverted_index_p0/test_index_rqg_bug4.groovy
new file mode 100644
index 00000000000..9353616f95f
--- /dev/null
+++ b/regression-test/suites/inverted_index_p0/test_index_rqg_bug4.groovy
@@ -0,0 +1,162 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+suite("test_index_rqg_bug4", "test_index_rqg_bug"){
+ def table1 = "test_index_rqg_bug4_table1"
+ def table2 = "test_index_rqg_bug4_table2"
+
+ sql "drop table if exists ${table1}"
+ sql "drop table if exists ${table2}"
+
+ sql """
+ create table ${table1} (
+ col_date_undef_signed_not_null_index_inverted date not null ,
+ col_bigint_undef_signed_not_null_index_inverted bigint not null ,
+ col_bigint_undef_signed_not_null bigint not null ,
+ col_int_undef_signed int null ,
+ col_int_undef_signed_index_inverted int null ,
+ col_int_undef_signed_not_null int not null ,
+ col_int_undef_signed_not_null_index_inverted int not null ,
+ col_bigint_undef_signed bigint null ,
+ col_bigint_undef_signed_index_inverted bigint null ,
+ col_date_undef_signed date null ,
+ col_date_undef_signed_index_inverted date null ,
+ col_date_undef_signed_not_null date not null ,
+ col_varchar_10__undef_signed varchar(10) null ,
+ col_varchar_10__undef_signed_index_inverted varchar(10) null ,
+ col_varchar_10__undef_signed_not_null varchar(10) not null ,
+ col_varchar_10__undef_signed_not_null_index_inverted varchar(10) not
null ,
+ col_varchar_1024__undef_signed varchar(1024) null ,
+ col_varchar_1024__undef_signed_index_inverted varchar(1024) null ,
+ col_varchar_1024__undef_signed_not_null varchar(1024) not null ,
+ col_varchar_1024__undef_signed_not_null_index_inverted varchar(1024)
not null ,
+ pk int,
+ INDEX col_int_undef_signed_index_inverted_idx
(`col_int_undef_signed_index_inverted`) USING INVERTED,
+ INDEX col_int_undef_signed_not_null_index_inverted_idx
(`col_int_undef_signed_not_null_index_inverted`) USING INVERTED,
+ INDEX col_bigint_undef_signed_index_inverted_idx
(`col_bigint_undef_signed_index_inverted`) USING INVERTED,
+ INDEX col_bigint_undef_signed_not_null_index_inverted_idx
(`col_bigint_undef_signed_not_null_index_inverted`) USING INVERTED,
+ INDEX col_date_undef_signed_index_inverted_idx
(`col_date_undef_signed_index_inverted`) USING INVERTED,
+ INDEX col_date_undef_signed_not_null_index_inverted_idx
(`col_date_undef_signed_not_null_index_inverted`) USING INVERTED,
+ INDEX col_varchar_10__undef_signed_index_inverted_idx
(`col_varchar_10__undef_signed_index_inverted`) USING INVERTED,
+ INDEX col_varchar_10__undef_signed_not_null_index_inverted_idx
(`col_varchar_10__undef_signed_not_null_index_inverted`) USING INVERTED,
+ INDEX col_varchar_1024__undef_signed_index_inverted_idx
(`col_varchar_1024__undef_signed_index_inverted`) USING INVERTED,
+ INDEX col_varchar_1024__undef_signed_not_null_index_inverted_idx
(`col_varchar_1024__undef_signed_not_null_index_inverted`) USING INVERTED
+ ) engine=olap
+ UNIQUE KEY(col_date_undef_signed_not_null_index_inverted,
col_bigint_undef_signed_not_null_index_inverted,
col_bigint_undef_signed_not_null)
+ PARTITION BY
RANGE(col_date_undef_signed_not_null_index_inverted) (
+ FROM ('2023-12-09') TO ('2024-03-09') INTERVAL 1 DAY,
+ FROM ('2025-02-16') TO ('2025-03-09') INTERVAL 1 DAY,
+ FROM ('2025-06-18') TO ('2025-06-20') INTERVAL 1 DAY,
+ FROM ('2026-01-01') TO ('2026-03-09') INTERVAL 1 DAY,
+ FROM ('2027-01-01') TO ('2027-02-09') INTERVAL 1 DAY
+ )
+ distributed by hash(col_bigint_undef_signed_not_null_index_inverted)
+ properties("enable_unique_key_merge_on_write" = "true",
"replication_num" = "1");
+ """
+
+ sql """
+ create table ${table2} (
+ col_date_undef_signed_not_null date not null ,
+ col_bigint_undef_signed_not_null_index_inverted bigint not null ,
+ col_bigint_undef_signed_not_null bigint not null ,
+ col_int_undef_signed int null ,
+ col_int_undef_signed_index_inverted int null ,
+ col_int_undef_signed_not_null int not null ,
+ col_int_undef_signed_not_null_index_inverted int not null ,
+ col_bigint_undef_signed bigint null ,
+ col_bigint_undef_signed_index_inverted bigint null ,
+ col_date_undef_signed date null ,
+ col_date_undef_signed_index_inverted date null ,
+ col_date_undef_signed_not_null_index_inverted date not null ,
+ col_varchar_10__undef_signed varchar(10) null ,
+ col_varchar_10__undef_signed_index_inverted varchar(10) null ,
+ col_varchar_10__undef_signed_not_null varchar(10) not null ,
+ col_varchar_10__undef_signed_not_null_index_inverted varchar(10) not
null ,
+ col_varchar_1024__undef_signed varchar(1024) null ,
+ col_varchar_1024__undef_signed_index_inverted varchar(1024) null ,
+ col_varchar_1024__undef_signed_not_null varchar(1024) not null ,
+ col_varchar_1024__undef_signed_not_null_index_inverted varchar(1024)
not null ,
+ pk int,
+ INDEX col_int_undef_signed_index_inverted_idx
(`col_int_undef_signed_index_inverted`) USING INVERTED,
+ INDEX col_int_undef_signed_not_null_index_inverted_idx
(`col_int_undef_signed_not_null_index_inverted`) USING INVERTED,
+ INDEX col_bigint_undef_signed_index_inverted_idx
(`col_bigint_undef_signed_index_inverted`) USING INVERTED,
+ INDEX col_bigint_undef_signed_not_null_index_inverted_idx
(`col_bigint_undef_signed_not_null_index_inverted`) USING INVERTED,
+ INDEX col_date_undef_signed_index_inverted_idx
(`col_date_undef_signed_index_inverted`) USING INVERTED,
+ INDEX col_date_undef_signed_not_null_index_inverted_idx
(`col_date_undef_signed_not_null_index_inverted`) USING INVERTED,
+ INDEX col_varchar_10__undef_signed_index_inverted_idx
(`col_varchar_10__undef_signed_index_inverted`) USING INVERTED,
+ INDEX col_varchar_10__undef_signed_not_null_index_inverted_idx
(`col_varchar_10__undef_signed_not_null_index_inverted`) USING INVERTED,
+ INDEX col_varchar_1024__undef_signed_index_inverted_idx
(`col_varchar_1024__undef_signed_index_inverted`) USING INVERTED,
+ INDEX col_varchar_1024__undef_signed_not_null_index_inverted_idx
(`col_varchar_1024__undef_signed_not_null_index_inverted`) USING INVERTED
+ ) engine=olap
+ UNIQUE KEY(col_date_undef_signed_not_null,
col_bigint_undef_signed_not_null_index_inverted,
col_bigint_undef_signed_not_null)
+ PARTITION BY RANGE(col_date_undef_signed_not_null) (
+ PARTITION p0 VALUES LESS THAN ('2023-12-11'),
+ PARTITION p1 VALUES LESS THAN ('2023-12-15'),
+ PARTITION p2 VALUES LESS THAN ('2023-12-16'),
+ PARTITION p3 VALUES LESS THAN ('2023-12-25'),
+ PARTITION p4 VALUES LESS THAN ('2024-01-18'),
+ PARTITION p5 VALUES LESS THAN ('2026-02-18'),
+ PARTITION p6 VALUES LESS THAN ('5024-02-18'),
+ PARTITION p100 VALUES LESS THAN ('9999-12-31')
+ )
+ distributed by hash(col_bigint_undef_signed_not_null_index_inverted)
+ properties("enable_unique_key_merge_on_write" = "true",
"replication_num" = "1");
+ """
+
+ sql """
+ insert into
${table1}(pk,col_int_undef_signed,col_int_undef_signed_index_inverted,col_int_undef_signed_not_null,col_int_undef_signed_not_null_index_inverted,col_bigint_undef_signed,col_bigint_undef_signed_index_inverted,col_bigint_undef_signed_not_null,col_bigint_undef_signed_not_null_index_inverted,col_date_undef_signed,col_date_undef_signed_index_inverted,col_date_undef_signed_not_null,col_date_undef_signed_not_null_index_inverted,col_varchar_10__undef_signed,col_varchar_10__unde
[...]
+ """
+
+ sql """
+ insert into
${table2}(pk,col_int_undef_signed,col_int_undef_signed_index_inverted,col_int_undef_signed_not_null,col_int_undef_signed_not_null_index_inverted,col_bigint_undef_signed,col_bigint_undef_signed_index_inverted,col_bigint_undef_signed_not_null,col_bigint_undef_signed_not_null_index_inverted,col_date_undef_signed,col_date_undef_signed_index_inverted,col_date_undef_signed_not_null,col_date_undef_signed_not_null_index_inverted,col_varchar_10__undef_signed,col_varchar_10__unde
[...]
+ """
+
+ try {
+ sql "sync"
+
+ qt_sql """
+ select
+ table1.col_varchar_1024__undef_signed_not_null as field1
+ from
+ ${table1} as table1
+ right join ${table2} as table2 on (
+ table2.col_date_undef_signed_index_inverted =
table1.col_date_undef_signed_index_inverted
+ )
+ where
+ not (
+ (
+ table2.`col_date_undef_signed_not_null_index_inverted` in (
+ '2027-01-16',
+ '2023-12-17',
+ '2024-02-18',
+ null,
+ '2000-10-18',
+ '2023-12-14',
+ '2023-12-18'
+ )
+ )
+ and table2.`col_date_undef_signed_not_null_index_inverted` <
'2025-06-18'
+ )
+ group by
+ field1
+ order by
+ field1
+ limit
+ 10000;
+ """
+ } finally {
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]