This is an automated email from the ASF dual-hosted git repository.
kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push:
new f801c013bea [opt](inverted index) performance optimization for
need_read_data in compound #35346 (#35695)
f801c013bea is described below
commit f801c013beadece9bc47c04c58d33c55c58b2fb5
Author: Kang <[email protected]>
AuthorDate: Fri May 31 16:35:50 2024 +0800
[opt](inverted index) performance optimization for need_read_data in
compound #35346 (#35695)
---
be/src/olap/rowset/segment_v2/segment_iterator.cpp | 27 +++----
.../test_need_read_data_fault_injection.out | 22 +++++
.../test_need_read_data_fault_injection.groovy | 94 ++++++++++++++++++++++
3 files changed, 129 insertions(+), 14 deletions(-)
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index 342d77ae0bb..6e7f4214959 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -826,6 +826,7 @@ Status
SegmentIterator::_apply_inverted_index_except_leafnode_of_andnode(
Status SegmentIterator::_apply_index_except_leafnode_of_andnode() {
for (auto pred : _col_preds_except_leafnode_of_andnode) {
+ auto column_id = pred->column_id();
auto pred_type = pred->type();
bool is_support = pred_type == PredicateType::EQ || pred_type ==
PredicateType::NE ||
pred_type == PredicateType::LT || pred_type ==
PredicateType::LE ||
@@ -836,6 +837,7 @@ Status
SegmentIterator::_apply_index_except_leafnode_of_andnode() {
pred_type == PredicateType::NOT_IN_LIST);
}
if (!is_support) {
+ _need_read_data_indices[column_id] = true;
continue;
}
@@ -845,16 +847,17 @@ Status
SegmentIterator::_apply_index_except_leafnode_of_andnode() {
if (can_apply_by_inverted_index) {
res = _apply_inverted_index_except_leafnode_of_andnode(pred,
&bitmap);
} else {
+ _need_read_data_indices[column_id] = true;
continue;
}
- bool need_remaining_after_evaluate =
_column_has_fulltext_index(pred->column_id()) &&
+ bool need_remaining_after_evaluate =
_column_has_fulltext_index(column_id) &&
PredicateTypeTraits::is_equal_or_list(pred_type);
if (!res.ok()) {
if (_downgrade_without_index(res, need_remaining_after_evaluate)) {
// downgrade without index query
- _not_apply_index_pred.insert(pred->column_id());
- _need_read_data_indices[pred->column_id()] = true;
+ _not_apply_index_pred.insert(column_id);
+ _need_read_data_indices[column_id] = true;
continue;
}
LOG(WARNING) << "failed to evaluate index"
@@ -866,18 +869,10 @@ Status
SegmentIterator::_apply_index_except_leafnode_of_andnode() {
std::string pred_result_sign = _gen_predicate_result_sign(pred);
_rowid_result_for_index.emplace(
std::make_pair(pred_result_sign, std::make_pair(true,
bitmap)));
- }
- for (auto pred : _col_preds_except_leafnode_of_andnode) {
- auto column_name = _schema->column(pred->column_id())->name();
- if (!_remaining_conjunct_roots.empty() &&
- _check_column_pred_all_push_down(column_name, true,
- pred->type() ==
PredicateType::MATCH) &&
- !pred->predicate_params()->marked_by_runtime_filter) {
- // if column's need_read_data already set true, we can not set it
to false now.
- if (_need_read_data_indices.find(pred->column_id()) ==
_need_read_data_indices.end()) {
- _need_read_data_indices[pred->column_id()] = false;
- }
+ if (!pred->predicate_params()->marked_by_runtime_filter &&
+ !_need_read_data_indices.contains(column_id)) {
+ _need_read_data_indices[column_id] = false;
}
}
@@ -1806,6 +1801,10 @@ Status SegmentIterator::_read_columns_by_index(uint32_t
nrows_read_limit, uint32
continue;
}
+ DBUG_EXECUTE_IF("segment_iterator._read_columns_by_index", {
+ return Status::Error<ErrorCode::INTERNAL_ERROR>("{} does not need
to read data");
+ })
+
if (is_continuous) {
size_t rows_read = nrows_read;
_opts.stats->block_first_read_seek_num += 1;
diff --git
a/regression-test/data/fault_injection_p0/test_need_read_data_fault_injection.out
b/regression-test/data/fault_injection_p0/test_need_read_data_fault_injection.out
new file mode 100644
index 00000000000..37885e404d3
--- /dev/null
+++
b/regression-test/data/fault_injection_p0/test_need_read_data_fault_injection.out
@@ -0,0 +1,22 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !sql --
+863
+
+-- !sql --
+210
+
+-- !sql --
+0
+
+-- !sql --
+819
+
+-- !sql --
+199
+
+-- !sql --
+713
+
+-- !sql --
+18
+
diff --git
a/regression-test/suites/fault_injection_p0/test_need_read_data_fault_injection.groovy
b/regression-test/suites/fault_injection_p0/test_need_read_data_fault_injection.groovy
new file mode 100644
index 00000000000..c0763cb6bae
--- /dev/null
+++
b/regression-test/suites/fault_injection_p0/test_need_read_data_fault_injection.groovy
@@ -0,0 +1,94 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_need_read_data_fault_injection", "nonConcurrent") {
+ // define a sql table
+ def indexTbName = "test_need_read_data_fault_injection"
+
+ sql "DROP TABLE IF EXISTS ${indexTbName}"
+ sql """
+ CREATE TABLE ${indexTbName} (
+ `@timestamp` int(11) NULL COMMENT "",
+ `clientip` varchar(20) NULL COMMENT "",
+ `request` text NULL COMMENT "",
+ `status` int(11) NULL COMMENT "",
+ `size` int(11) NULL COMMENT "",
+ INDEX clientip_idx (`clientip`) USING INVERTED PROPERTIES("parser" =
"english", "support_phrase" = "true") COMMENT '',
+ INDEX request_idx (`request`) USING INVERTED PROPERTIES("parser" =
"english", "support_phrase" = "true") COMMENT ''
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`@timestamp`)
+ COMMENT "OLAP"
+ DISTRIBUTED BY RANDOM BUCKETS 1
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1",
+ "disable_auto_compaction" = "true"
+ );
+ """
+
+ def load_httplogs_data = {table_name, label, read_flag, format_flag,
file_name, ignore_failure=false,
+ expected_succ_rows = -1, load_to_single_tablet =
'true' ->
+
+ // load the json data
+ streamLoad {
+ table "${table_name}"
+
+ // set http request header params
+ set 'label', label + "_" + UUID.randomUUID().toString()
+ set 'read_json_by_line', read_flag
+ set 'format', format_flag
+ file file_name // import json file
+ time 10000 // limit inflight 10s
+ if (expected_succ_rows >= 0) {
+ set 'max_filter_ratio', '1'
+ }
+
+ // if declared a check callback, the default check condition will
ignore.
+ // So you must check all condition
+ check { result, exception, startTime, endTime ->
+ if (ignore_failure && expected_succ_rows < 0) { return }
+ if (exception != null) {
+ throw exception
+ }
+ log.info("Stream load result: ${result}".toString())
+ def json = parseJson(result)
+ }
+ }
+ }
+
+ try {
+ load_httplogs_data.call(indexTbName,
'test_need_read_data_fault_injection', 'true', 'json', 'documents-1000.json')
+
+ sql "sync"
+
+ try {
+
GetDebugPoint().enableDebugPointForAllBEs("segment_iterator._read_columns_by_index")
+
+ qt_sql """ select count() from ${indexTbName} where (request
match_phrase 'hm' or request match_phrase 'jpg' or request match_phrase 'gif');
"""
+ qt_sql """ select count() from ${indexTbName} where (request
match_phrase 'hm' or request match_phrase 'jpg' and request match_phrase
'gif'); """
+ qt_sql """ select count() from ${indexTbName} where (request
match_phrase 'hm' and request match_phrase 'jpg' and request match_phrase
'gif'); """
+ qt_sql """ select count() from ${indexTbName} where (request
match_phrase 'hm' and request match_phrase 'jpg' or request match_phrase
'gif'); """
+
+ qt_sql """ select count() from ${indexTbName} where (clientip match
'1' or request match 'jpg' or clientip match '2'); """
+ qt_sql """ select count() from ${indexTbName} where (clientip match
'3' or request match 'gif' or clientip match '4'); """
+ qt_sql """ select count() from ${indexTbName} where (clientip match
'images' or clientip match '5' or clientip match 'english'); """
+
+ } finally {
+
GetDebugPoint().disableDebugPointForAllBEs("segment_iterator._read_columns_by_index")
+ }
+ } finally {
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]