This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new a420cf6a0dd [opt](inverted index) reduce generation of the 
rowid_result if not necessary #35357 (#36571)
a420cf6a0dd is described below

commit a420cf6a0dd1c18def5175d1faa78683db24fa33
Author: zzzxl <33418555+zzzxl1...@users.noreply.github.com>
AuthorDate: Sun Jul 7 12:07:54 2024 +0800

    [opt](inverted index) reduce generation of the rowid_result if not 
necessary #35357 (#36571)
---
 be/src/olap/rowset/segment_v2/segment_iterator.cpp |  7 ++
 .../data/inverted_index_p0/test_compound_1.out     | 19 +++++
 .../inverted_index_p0/test_compound_1.groovy       | 95 ++++++++++++++++++++++
 3 files changed, 121 insertions(+)

diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp 
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index 2fabf724c9c..34fbc865b3e 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -501,6 +501,13 @@ Status 
SegmentIterator::_get_row_ranges_by_column_conditions() {
                     ++it;
                 }
             }
+            // 1. if all conditions in the compound hit the inverted index and 
there are no other expr to handle.
+            // 2. then there is no need to generate index_result_column.
+            if (_enable_common_expr_pushdown && 
_remaining_conjunct_roots.empty()) {
+                for (auto& iter : _rowid_result_for_index) {
+                    iter.second.first = false;
+                }
+            }
         }
         _opts.stats->rows_inverted_index_filtered += (input_rows - 
_row_bitmap.cardinality());
     }
diff --git a/regression-test/data/inverted_index_p0/test_compound_1.out 
b/regression-test/data/inverted_index_p0/test_compound_1.out
new file mode 100644
index 00000000000..242f2d86f1a
--- /dev/null
+++ b/regression-test/data/inverted_index_p0/test_compound_1.out
@@ -0,0 +1,19 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !sql --
+864
+
+-- !sql --
+703
+
+-- !sql --
+161
+
+-- !sql --
+864
+
+-- !sql --
+703
+
+-- !sql --
+161
+
diff --git a/regression-test/suites/inverted_index_p0/test_compound_1.groovy 
b/regression-test/suites/inverted_index_p0/test_compound_1.groovy
new file mode 100644
index 00000000000..92dd2f78f0b
--- /dev/null
+++ b/regression-test/suites/inverted_index_p0/test_compound_1.groovy
@@ -0,0 +1,95 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+
+suite("test_compound_1", "p0"){
+    def indexTbName = "test_compound_1"
+
+    sql "DROP TABLE IF EXISTS ${indexTbName}"
+    sql """
+      CREATE TABLE ${indexTbName} (
+        `@timestamp` int(11) NULL COMMENT "",
+        `clientip` varchar(20) NULL COMMENT "",
+        `request` text NULL COMMENT "",
+        `status` int(11) NULL COMMENT "",
+        `size` int(11) NULL COMMENT "",
+        INDEX clientip_idx (`clientip`) USING INVERTED PROPERTIES("parser" = 
"english", "support_phrase" = "true") COMMENT '',
+        INDEX request_idx (`request`) USING INVERTED PROPERTIES("parser" = 
"english", "support_phrase" = "true") COMMENT ''
+      ) ENGINE=OLAP
+      DUPLICATE KEY(`@timestamp`)
+      COMMENT "OLAP"
+      DISTRIBUTED BY RANDOM BUCKETS 1
+      PROPERTIES (
+        "replication_allocation" = "tag.location.default: 1",
+        "disable_auto_compaction" = "true"
+      );
+    """
+
+    def load_httplogs_data = {table_name, label, read_flag, format_flag, 
file_name, ignore_failure=false,
+                        expected_succ_rows = -1, load_to_single_tablet = 
'true' ->
+        
+        // load the json data
+        streamLoad {
+            table "${table_name}"
+            
+            // set http request header params
+            set 'label', label + "_" + UUID.randomUUID().toString()
+            set 'read_json_by_line', read_flag
+            set 'format', format_flag
+            file file_name // import json file
+            time 10000 // limit inflight 10s
+            if (expected_succ_rows >= 0) {
+                set 'max_filter_ratio', '1'
+            }
+
+            // if declared a check callback, the default check condition will 
ignore.
+            // So you must check all condition
+            check { result, exception, startTime, endTime ->
+                       if (ignore_failure && expected_succ_rows < 0) { return }
+                    if (exception != null) {
+                        throw exception
+                    }
+                    log.info("Stream load result: ${result}".toString())
+                    def json = parseJson(result)
+                    assertEquals("success", json.Status.toLowerCase())
+                    if (expected_succ_rows >= 0) {
+                        assertEquals(json.NumberLoadedRows, expected_succ_rows)
+                    } else {
+                        assertEquals(json.NumberTotalRows, 
json.NumberLoadedRows + json.NumberUnselectedRows)
+                        assertTrue(json.NumberLoadedRows > 0 && json.LoadBytes 
> 0)
+                }
+            }
+        }
+    }
+
+    try {
+        load_httplogs_data.call(indexTbName, 'test_compound_1', 'true', 
'json', 'documents-1000.json')
+
+        sql "sync"
+
+        qt_sql """ select count() from ${indexTbName} where (request 
match_phrase 'english' and clientip match_phrase '4' or request match_phrase 
'images'); """
+        qt_sql """ select count() from ${indexTbName} where (request 
match_phrase 'hm' and clientip match_phrase '3' or request match_phrase 'gif'); 
"""
+        qt_sql """ select count() from ${indexTbName} where (request 
match_phrase 'french' and clientip match_phrase '2' or request match_phrase 
'jpg'); """
+
+        qt_sql """ select /*+ SET_VAR(enable_common_expr_pushdown = false) */ 
count() from ${indexTbName} where (request match_phrase 'english' and clientip 
match_phrase '4' or request match_phrase 'images'); """
+        qt_sql """ select /*+ SET_VAR(enable_common_expr_pushdown = false) */ 
count() from ${indexTbName} where (request match_phrase 'hm' and clientip 
match_phrase '3' or request match_phrase 'gif'); """
+        qt_sql """ select /*+ SET_VAR(enable_common_expr_pushdown = false) */ 
count() from ${indexTbName} where (request match_phrase 'french' and clientip 
match_phrase '2' or request match_phrase 'jpg'); """
+
+    } finally {
+        //try_sql("DROP TABLE IF EXISTS ${testTable}")
+    }
+}
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to