This is an automated email from the ASF dual-hosted git repository. kxiao pushed a commit to branch branch-2.0 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push: new a420cf6a0dd [opt](inverted index) reduce generation of the rowid_result if not necessary #35357 (#36571) a420cf6a0dd is described below commit a420cf6a0dd1c18def5175d1faa78683db24fa33 Author: zzzxl <33418555+zzzxl1...@users.noreply.github.com> AuthorDate: Sun Jul 7 12:07:54 2024 +0800 [opt](inverted index) reduce generation of the rowid_result if not necessary #35357 (#36571) --- be/src/olap/rowset/segment_v2/segment_iterator.cpp | 7 ++ .../data/inverted_index_p0/test_compound_1.out | 19 +++++ .../inverted_index_p0/test_compound_1.groovy | 95 ++++++++++++++++++++++ 3 files changed, 121 insertions(+) diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index 2fabf724c9c..34fbc865b3e 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -501,6 +501,13 @@ Status SegmentIterator::_get_row_ranges_by_column_conditions() { ++it; } } + // 1. if all conditions in the compound hit the inverted index and there are no other expr to handle. + // 2. then there is no need to generate index_result_column. + if (_enable_common_expr_pushdown && _remaining_conjunct_roots.empty()) { + for (auto& iter : _rowid_result_for_index) { + iter.second.first = false; + } + } } _opts.stats->rows_inverted_index_filtered += (input_rows - _row_bitmap.cardinality()); } diff --git a/regression-test/data/inverted_index_p0/test_compound_1.out b/regression-test/data/inverted_index_p0/test_compound_1.out new file mode 100644 index 00000000000..242f2d86f1a --- /dev/null +++ b/regression-test/data/inverted_index_p0/test_compound_1.out @@ -0,0 +1,19 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql -- +864 + +-- !sql -- +703 + +-- !sql -- +161 + +-- !sql -- +864 + +-- !sql -- +703 + +-- !sql -- +161 + diff --git a/regression-test/suites/inverted_index_p0/test_compound_1.groovy b/regression-test/suites/inverted_index_p0/test_compound_1.groovy new file mode 100644 index 00000000000..92dd2f78f0b --- /dev/null +++ b/regression-test/suites/inverted_index_p0/test_compound_1.groovy @@ -0,0 +1,95 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + + +suite("test_compound_1", "p0"){ + def indexTbName = "test_compound_1" + + sql "DROP TABLE IF EXISTS ${indexTbName}" + sql """ + CREATE TABLE ${indexTbName} ( + `@timestamp` int(11) NULL COMMENT "", + `clientip` varchar(20) NULL COMMENT "", + `request` text NULL COMMENT "", + `status` int(11) NULL COMMENT "", + `size` int(11) NULL COMMENT "", + INDEX clientip_idx (`clientip`) USING INVERTED PROPERTIES("parser" = "english", "support_phrase" = "true") COMMENT '', + INDEX request_idx (`request`) USING INVERTED PROPERTIES("parser" = "english", "support_phrase" = "true") COMMENT '' + ) ENGINE=OLAP + DUPLICATE KEY(`@timestamp`) + COMMENT "OLAP" + DISTRIBUTED BY RANDOM BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "disable_auto_compaction" = "true" + ); + """ + + def load_httplogs_data = {table_name, label, read_flag, format_flag, file_name, ignore_failure=false, + expected_succ_rows = -1, load_to_single_tablet = 'true' -> + + // load the json data + streamLoad { + table "${table_name}" + + // set http request header params + set 'label', label + "_" + UUID.randomUUID().toString() + set 'read_json_by_line', read_flag + set 'format', format_flag + file file_name // import json file + time 10000 // limit inflight 10s + if (expected_succ_rows >= 0) { + set 'max_filter_ratio', '1' + } + + // if declared a check callback, the default check condition will ignore. + // So you must check all condition + check { result, exception, startTime, endTime -> + if (ignore_failure && expected_succ_rows < 0) { return } + if (exception != null) { + throw exception + } + log.info("Stream load result: ${result}".toString()) + def json = parseJson(result) + assertEquals("success", json.Status.toLowerCase()) + if (expected_succ_rows >= 0) { + assertEquals(json.NumberLoadedRows, expected_succ_rows) + } else { + assertEquals(json.NumberTotalRows, json.NumberLoadedRows + json.NumberUnselectedRows) + assertTrue(json.NumberLoadedRows > 0 && json.LoadBytes > 0) + } + } + } + } + + try { + load_httplogs_data.call(indexTbName, 'test_compound_1', 'true', 'json', 'documents-1000.json') + + sql "sync" + + qt_sql """ select count() from ${indexTbName} where (request match_phrase 'english' and clientip match_phrase '4' or request match_phrase 'images'); """ + qt_sql """ select count() from ${indexTbName} where (request match_phrase 'hm' and clientip match_phrase '3' or request match_phrase 'gif'); """ + qt_sql """ select count() from ${indexTbName} where (request match_phrase 'french' and clientip match_phrase '2' or request match_phrase 'jpg'); """ + + qt_sql """ select /*+ SET_VAR(enable_common_expr_pushdown = false) */ count() from ${indexTbName} where (request match_phrase 'english' and clientip match_phrase '4' or request match_phrase 'images'); """ + qt_sql """ select /*+ SET_VAR(enable_common_expr_pushdown = false) */ count() from ${indexTbName} where (request match_phrase 'hm' and clientip match_phrase '3' or request match_phrase 'gif'); """ + qt_sql """ select /*+ SET_VAR(enable_common_expr_pushdown = false) */ count() from ${indexTbName} where (request match_phrase 'french' and clientip match_phrase '2' or request match_phrase 'jpg'); """ + + } finally { + //try_sql("DROP TABLE IF EXISTS ${testTable}") + } +} \ No newline at end of file --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org