This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new 6495eb28d81 [fix](index compaction)support compact multi segments in 
one index #28889 (#29276)
6495eb28d81 is described below

commit 6495eb28d8118f8266a5212ca31e1af80e51e320
Author: qiye <jianliang5...@gmail.com>
AuthorDate: Fri Dec 29 16:10:26 2023 +0800

    [fix](index compaction)support compact multi segments in one index #28889 
(#29276)
---
 .gitmodules                                        |   2 +-
 be/src/clucene                                     |   2 +-
 be/src/common/config.cpp                           |   5 +-
 be/src/common/config.h                             |   3 +-
 be/src/olap/compaction.cpp                         |  38 ++-
 .../segment_v2/inverted_index_compaction.cpp       |   1 +
 .../rowset/segment_v2/inverted_index_writer.cpp    |   3 +-
 build.sh                                           |   2 +-
 ..._index_compaction_with_multi_index_segments.out | 214 +++++++++++++++
 ...dex_compaction_with_multi_index_segments.groovy | 286 +++++++++++++++++++++
 10 files changed, 544 insertions(+), 12 deletions(-)

diff --git a/.gitmodules b/.gitmodules
index 9fe51bfd1d0..729354ec9c7 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -32,4 +32,4 @@
 [submodule "be/src/clucene"]
        path = be/src/clucene
        url = https://github.com/apache/doris-thirdparty.git
-       branch = clucene
+       branch = clucene-2.0
diff --git a/be/src/clucene b/be/src/clucene
index ed92e181310..c9030853082 160000
--- a/be/src/clucene
+++ b/be/src/clucene
@@ -1 +1 @@
-Subproject commit ed92e1813103a513aa0ee16730b94cc840daec73
+Subproject commit c90308530828a24fe421a9e19bc1e5e06f1460cd
diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp
index ee7df5d96db..af08d2deaab 100644
--- a/be/src/common/config.cpp
+++ b/be/src/common/config.cpp
@@ -996,6 +996,9 @@ DEFINE_String(inverted_index_query_cache_limit, "10%");
 
 // inverted index
 DEFINE_mDouble(inverted_index_ram_buffer_size, "512");
+// -1 indicates not working.
+// Normally we should not change this, it's useful for testing.
+DEFINE_mInt32(inverted_index_max_buffered_docs, "-1");
 DEFINE_Int32(query_bkd_inverted_index_limit_percent, "5"); // 5%
 // dict path for chinese analyzer
 DEFINE_String(inverted_index_dict_path, "${DORIS_HOME}/dict");
@@ -1003,7 +1006,7 @@ DEFINE_Int32(inverted_index_read_buffer_size, "4096");
 // tree depth for bkd index
 DEFINE_Int32(max_depth_in_bkd_tree, "32");
 // index compaction
-DEFINE_Bool(inverted_index_compaction_enable, "false");
+DEFINE_mBool(inverted_index_compaction_enable, "false");
 // use num_broadcast_buffer blocks as buffer to do broadcast
 DEFINE_Int32(num_broadcast_buffer, "32");
 // semi-structure configs
diff --git a/be/src/common/config.h b/be/src/common/config.h
index c865c3b72b2..4fc3bc8dbfa 100644
--- a/be/src/common/config.h
+++ b/be/src/common/config.h
@@ -1037,10 +1037,11 @@ DECLARE_Int32(query_bkd_inverted_index_limit_percent); 
// 5%
 // dict path for chinese analyzer
 DECLARE_String(inverted_index_dict_path);
 DECLARE_Int32(inverted_index_read_buffer_size);
+DECLARE_mInt32(inverted_index_max_buffered_docs);
 // tree depth for bkd index
 DECLARE_Int32(max_depth_in_bkd_tree);
 // index compaction
-DECLARE_Bool(inverted_index_compaction_enable);
+DECLARE_mBool(inverted_index_compaction_enable);
 // use num_broadcast_buffer blocks as buffer to do broadcast
 DECLARE_Int32(num_broadcast_buffer);
 // semi-structure configs
diff --git a/be/src/olap/compaction.cpp b/be/src/olap/compaction.cpp
index 761e50db73e..f6c8b3bb5bf 100644
--- a/be/src/olap/compaction.cpp
+++ b/be/src/olap/compaction.cpp
@@ -403,6 +403,34 @@ Status Compaction::do_compaction_impl(int64_t permits) {
 
     if (_input_row_num > 0 && stats.rowid_conversion && 
config::inverted_index_compaction_enable) {
         OlapStopWatch inverted_watch;
+
+        // check rowid_conversion correctness
+        Version version = _tablet->max_version();
+        DeleteBitmap output_rowset_delete_bitmap(_tablet->tablet_id());
+        std::set<RowLocation> missed_rows;
+        std::map<RowsetSharedPtr, std::list<std::pair<RowLocation, 
RowLocation>>> location_map;
+        // Convert the delete bitmap of the input rowsets to output rowset.
+        std::size_t missed_rows_size = 0;
+        _tablet->calc_compaction_output_rowset_delete_bitmap(
+                _input_rowsets, _rowid_conversion, 0, version.second + 1, 
&missed_rows,
+                &location_map, _tablet->tablet_meta()->delete_bitmap(),
+                &output_rowset_delete_bitmap);
+        if (!allow_delete_in_cumu_compaction()) {
+            missed_rows_size = missed_rows.size();
+            if (compaction_type() == ReaderType::READER_CUMULATIVE_COMPACTION 
&&
+                stats.merged_rows != missed_rows_size) {
+                std::string err_msg = fmt::format(
+                        "cumulative compaction: the merged rows({}) is not 
equal to missed "
+                        "rows({}) in rowid conversion, tablet_id: {}, 
table_id:{}",
+                        stats.merged_rows, missed_rows_size, 
_tablet->tablet_id(),
+                        _tablet->table_id());
+                DCHECK(false) << err_msg;
+                LOG(WARNING) << err_msg;
+            }
+        }
+
+        RETURN_IF_ERROR(_tablet->check_rowid_conversion(_output_rowset, 
location_map));
+
         // translation vec
         // <<dest_idx_num, dest_docId>>
         // the first level vector: index indicates src segment.
@@ -428,7 +456,7 @@ Status Compaction::do_compaction_impl(int64_t permits) {
             // src index files
             // format: rowsetId_segmentId
             std::vector<std::string> src_index_files(src_segment_num);
-            for (auto m : src_seg_to_id_map) {
+            for (const auto& m : src_seg_to_id_map) {
                 std::pair<RowsetId, uint32_t> p = m.first;
                 src_index_files[m.second] = p.first.to_string() + "_" + 
std::to_string(p.second);
             }
@@ -677,11 +705,11 @@ Status Compaction::modify_rowsets(const 
Merger::Statistics* stats) {
         // of incremental data later.
         // TODO(LiaoXin): check if there are duplicate keys
         std::size_t missed_rows_size = 0;
+        _tablet->calc_compaction_output_rowset_delete_bitmap(
+                _input_rowsets, _rowid_conversion, 0, version.second + 1, 
&missed_rows,
+                &location_map, _tablet->tablet_meta()->delete_bitmap(),
+                &output_rowset_delete_bitmap);
         if (!allow_delete_in_cumu_compaction()) {
-            _tablet->calc_compaction_output_rowset_delete_bitmap(
-                    _input_rowsets, _rowid_conversion, 0, version.second + 1, 
&missed_rows,
-                    &location_map, _tablet->tablet_meta()->delete_bitmap(),
-                    &output_rowset_delete_bitmap);
             missed_rows_size = missed_rows.size();
             if (compaction_type() == ReaderType::READER_CUMULATIVE_COMPACTION 
&& stats != nullptr &&
                 stats->merged_rows != missed_rows_size) {
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_compaction.cpp 
b/be/src/olap/rowset/segment_v2/inverted_index_compaction.cpp
index 7f653a93591..b3a28c6ebfc 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_compaction.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index_compaction.cpp
@@ -56,6 +56,7 @@ Status compact_column(int32_t index_id, int src_segment_num, 
int dest_segment_nu
         dest_index_dirs[i] = DorisCompoundDirectory::getDirectory(fs, 
path.c_str(), true);
     }
 
+    DCHECK_EQ(src_index_dirs.size(), trans_vec.size());
     index_writer->indexCompaction(src_index_dirs, dest_index_dirs, trans_vec,
                                   dest_segment_num_rows);
 
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp 
b/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp
index 0949d708742..744710d9082 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp
@@ -60,7 +60,6 @@
 
 namespace doris::segment_v2 {
 const int32_t MAX_FIELD_LEN = 0x7FFFFFFFL;
-const int32_t MAX_BUFFER_DOCS = 100000000;
 const int32_t MERGE_FACTOR = 100000000;
 const int32_t MAX_LEAF_COUNT = 1024;
 const float MAXMBSortInHeap = 512.0 * 8;
@@ -193,8 +192,8 @@ public:
         }
         _index_writer = 
std::make_unique<lucene::index::IndexWriter>(_dir.get(), _analyzer.get(),
                                                                      create, 
true);
-        _index_writer->setMaxBufferedDocs(MAX_BUFFER_DOCS);
         
_index_writer->setRAMBufferSizeMB(config::inverted_index_ram_buffer_size);
+        
_index_writer->setMaxBufferedDocs(config::inverted_index_max_buffered_docs);
         _index_writer->setMaxFieldLength(MAX_FIELD_LEN);
         _index_writer->setMergeFactor(MERGE_FACTOR);
         _index_writer->setUseCompoundFile(false);
diff --git a/build.sh b/build.sh
index a7e31fa9b3b..15f22b03a72 100755
--- a/build.sh
+++ b/build.sh
@@ -302,7 +302,7 @@ update_submodule() {
 }
 
 update_submodule "be/src/apache-orc" "apache-orc" 
"https://github.com/apache/doris-thirdparty/archive/refs/heads/orc.tar.gz";
-update_submodule "be/src/clucene" "clucene" 
"https://github.com/apache/doris-thirdparty/archive/refs/heads/clucene.tar.gz";
+update_submodule "be/src/clucene" "clucene" 
"https://github.com/apache/doris-thirdparty/archive/refs/heads/clucene-2.0.tar.gz";
 
 if [[ "${CLEAN}" -eq 1 && "${BUILD_BE}" -eq 0 && "${BUILD_FE}" -eq 0 && 
"${BUILD_SPARK_DPP}" -eq 0 ]]; then
     clean_gensrc
diff --git 
a/regression-test/data/inverted_index_p0/index_compaction/test_index_compaction_with_multi_index_segments.out
 
b/regression-test/data/inverted_index_p0/index_compaction/test_index_compaction_with_multi_index_segments.out
new file mode 100644
index 00000000000..57ad3c1080c
--- /dev/null
+++ 
b/regression-test/data/inverted_index_p0/index_compaction/test_index_compaction_with_multi_index_segments.out
@@ -0,0 +1,214 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !sql --
+2018-02-21T12:00       1       I'm using the builds
+2018-02-21T12:00       1       I'm using the builds
+2018-02-21T12:00       2       I'm using the builds
+2018-02-21T12:00       2       I'm using the builds
+2018-02-21T12:00       3       I'm using the builds
+2018-02-21T12:00       3       I'm using the builds
+2018-02-21T12:00       4       I'm using the builds
+2018-02-21T12:00       4       I'm using the builds
+2018-02-21T12:00       5       I'm using the builds
+2018-02-21T12:00       5       I'm using the builds
+2018-02-21T12:00       6       I'm using the builds
+2018-02-21T12:00       6       I'm using the builds
+2018-02-21T12:00       7       I'm using the builds
+2018-02-21T12:00       7       I'm using the builds
+2018-02-21T12:00       8       I'm using the builds
+2018-02-21T12:00       8       I'm using the builds
+2018-02-21T12:00       9       I'm using the builds
+2018-02-21T12:00       9       I'm using the builds
+2018-02-21T12:00       10      I'm using the builds
+2018-02-21T12:00       10      I'm using the builds
+
+-- !sql --
+2018-02-21T12:00       1       I'm using the builds
+2018-02-21T12:00       1       I'm using the builds
+2018-02-21T12:00       2       I'm using the builds
+2018-02-21T12:00       2       I'm using the builds
+2018-02-21T12:00       3       I'm using the builds
+2018-02-21T12:00       3       I'm using the builds
+2018-02-21T12:00       4       I'm using the builds
+2018-02-21T12:00       4       I'm using the builds
+2018-02-21T12:00       5       I'm using the builds
+2018-02-21T12:00       5       I'm using the builds
+2018-02-21T12:00       6       I'm using the builds
+2018-02-21T12:00       6       I'm using the builds
+2018-02-21T12:00       7       I'm using the builds
+2018-02-21T12:00       7       I'm using the builds
+2018-02-21T12:00       8       I'm using the builds
+2018-02-21T12:00       8       I'm using the builds
+2018-02-21T12:00       9       I'm using the builds
+2018-02-21T12:00       9       I'm using the builds
+2018-02-21T12:00       10      I'm using the builds
+2018-02-21T12:00       10      I'm using the builds
+
+-- !sql --
+
+-- !sql --
+2018-02-21T12:00       1       I'm using the builds
+2018-02-21T12:00       1       I'm using the builds
+2018-02-21T12:00       2       I'm using the builds
+2018-02-21T12:00       2       I'm using the builds
+2018-02-21T12:00       3       I'm using the builds
+2018-02-21T12:00       3       I'm using the builds
+2018-02-21T12:00       4       I'm using the builds
+2018-02-21T12:00       4       I'm using the builds
+2018-02-21T12:00       5       I'm using the builds
+2018-02-21T12:00       5       I'm using the builds
+2018-02-21T12:00       6       I'm using the builds
+2018-02-21T12:00       6       I'm using the builds
+2018-02-21T12:00       7       I'm using the builds
+2018-02-21T12:00       7       I'm using the builds
+
+-- !sql --
+2018-02-21T12:00       1       I'm using the builds
+2018-02-21T12:00       1       I'm using the builds
+2018-02-21T12:00       2       I'm using the builds
+2018-02-21T12:00       2       I'm using the builds
+2018-02-21T12:00       3       I'm using the builds
+2018-02-21T12:00       3       I'm using the builds
+2018-02-21T12:00       4       I'm using the builds
+2018-02-21T12:00       4       I'm using the builds
+2018-02-21T12:00       5       I'm using the builds
+2018-02-21T12:00       5       I'm using the builds
+2018-02-21T12:00       6       I'm using the builds
+2018-02-21T12:00       6       I'm using the builds
+2018-02-21T12:00       7       I'm using the builds
+2018-02-21T12:00       7       I'm using the builds
+2018-02-21T12:00       8       I'm using the builds
+2018-02-21T12:00       8       I'm using the builds
+2018-02-21T12:00       9       I'm using the builds
+2018-02-21T12:00       9       I'm using the builds
+2018-02-21T12:00       10      I'm using the builds
+2018-02-21T12:00       10      I'm using the builds
+
+-- !sql --
+2018-02-21T12:00       1       I'm using the builds
+2018-02-21T12:00       1       I'm using the builds
+2018-02-21T12:00       2       I'm using the builds
+2018-02-21T12:00       2       I'm using the builds
+2018-02-21T12:00       3       I'm using the builds
+2018-02-21T12:00       3       I'm using the builds
+2018-02-21T12:00       4       I'm using the builds
+2018-02-21T12:00       4       I'm using the builds
+2018-02-21T12:00       5       I'm using the builds
+2018-02-21T12:00       5       I'm using the builds
+2018-02-21T12:00       6       I'm using the builds
+2018-02-21T12:00       6       I'm using the builds
+2018-02-21T12:00       7       I'm using the builds
+2018-02-21T12:00       7       I'm using the builds
+2018-02-21T12:00       8       I'm using the builds
+2018-02-21T12:00       8       I'm using the builds
+2018-02-21T12:00       9       I'm using the builds
+2018-02-21T12:00       9       I'm using the builds
+2018-02-21T12:00       10      I'm using the builds
+2018-02-21T12:00       10      I'm using the builds
+
+-- !sql --
+
+-- !sql --
+2018-02-21T12:00       1       I'm using the builds
+2018-02-21T12:00       1       I'm using the builds
+2018-02-21T12:00       2       I'm using the builds
+2018-02-21T12:00       2       I'm using the builds
+2018-02-21T12:00       3       I'm using the builds
+2018-02-21T12:00       3       I'm using the builds
+2018-02-21T12:00       4       I'm using the builds
+2018-02-21T12:00       4       I'm using the builds
+2018-02-21T12:00       5       I'm using the builds
+2018-02-21T12:00       5       I'm using the builds
+2018-02-21T12:00       6       I'm using the builds
+2018-02-21T12:00       6       I'm using the builds
+2018-02-21T12:00       7       I'm using the builds
+2018-02-21T12:00       7       I'm using the builds
+
+-- !sql --
+2018-02-21T12:00       1       I'm using the builds
+2018-02-21T12:00       1       I'm using the builds
+2018-02-21T12:00       1       I'm using the builds
+2018-02-21T12:00       2       I'm using the builds
+2018-02-21T12:00       2       I'm using the builds
+2018-02-21T12:00       2       I'm using the builds
+2018-02-21T12:00       3       I'm using the builds
+2018-02-21T12:00       3       I'm using the builds
+2018-02-21T12:00       3       I'm using the builds
+2018-02-21T12:00       4       I'm using the builds
+2018-02-21T12:00       4       I'm using the builds
+2018-02-21T12:00       4       I'm using the builds
+2018-02-21T12:00       5       I'm using the builds
+2018-02-21T12:00       5       I'm using the builds
+2018-02-21T12:00       5       I'm using the builds
+2018-02-21T12:00       6       I'm using the builds
+2018-02-21T12:00       6       I'm using the builds
+2018-02-21T12:00       6       I'm using the builds
+2018-02-21T12:00       7       I'm using the builds
+2018-02-21T12:00       7       I'm using the builds
+2018-02-21T12:00       7       I'm using the builds
+2018-02-21T12:00       8       I'm using the builds
+2018-02-21T12:00       8       I'm using the builds
+2018-02-21T12:00       8       I'm using the builds
+2018-02-21T12:00       9       I'm using the builds
+2018-02-21T12:00       9       I'm using the builds
+2018-02-21T12:00       9       I'm using the builds
+2018-02-21T12:00       10      I'm using the builds
+2018-02-21T12:00       10      I'm using the builds
+2018-02-21T12:00       10      I'm using the builds
+
+-- !sql --
+2018-02-21T12:00       1       I'm using the builds
+2018-02-21T12:00       1       I'm using the builds
+2018-02-21T12:00       1       I'm using the builds
+2018-02-21T12:00       2       I'm using the builds
+2018-02-21T12:00       2       I'm using the builds
+2018-02-21T12:00       2       I'm using the builds
+2018-02-21T12:00       3       I'm using the builds
+2018-02-21T12:00       3       I'm using the builds
+2018-02-21T12:00       3       I'm using the builds
+2018-02-21T12:00       4       I'm using the builds
+2018-02-21T12:00       4       I'm using the builds
+2018-02-21T12:00       4       I'm using the builds
+2018-02-21T12:00       5       I'm using the builds
+2018-02-21T12:00       5       I'm using the builds
+2018-02-21T12:00       5       I'm using the builds
+2018-02-21T12:00       6       I'm using the builds
+2018-02-21T12:00       6       I'm using the builds
+2018-02-21T12:00       6       I'm using the builds
+2018-02-21T12:00       7       I'm using the builds
+2018-02-21T12:00       7       I'm using the builds
+2018-02-21T12:00       7       I'm using the builds
+2018-02-21T12:00       8       I'm using the builds
+2018-02-21T12:00       8       I'm using the builds
+2018-02-21T12:00       8       I'm using the builds
+2018-02-21T12:00       9       I'm using the builds
+2018-02-21T12:00       9       I'm using the builds
+2018-02-21T12:00       9       I'm using the builds
+2018-02-21T12:00       10      I'm using the builds
+2018-02-21T12:00       10      I'm using the builds
+2018-02-21T12:00       10      I'm using the builds
+
+-- !sql --
+
+-- !sql --
+2018-02-21T12:00       1       I'm using the builds
+2018-02-21T12:00       1       I'm using the builds
+2018-02-21T12:00       1       I'm using the builds
+2018-02-21T12:00       2       I'm using the builds
+2018-02-21T12:00       2       I'm using the builds
+2018-02-21T12:00       2       I'm using the builds
+2018-02-21T12:00       3       I'm using the builds
+2018-02-21T12:00       3       I'm using the builds
+2018-02-21T12:00       3       I'm using the builds
+2018-02-21T12:00       4       I'm using the builds
+2018-02-21T12:00       4       I'm using the builds
+2018-02-21T12:00       4       I'm using the builds
+2018-02-21T12:00       5       I'm using the builds
+2018-02-21T12:00       5       I'm using the builds
+2018-02-21T12:00       5       I'm using the builds
+2018-02-21T12:00       6       I'm using the builds
+2018-02-21T12:00       6       I'm using the builds
+2018-02-21T12:00       6       I'm using the builds
+2018-02-21T12:00       7       I'm using the builds
+2018-02-21T12:00       7       I'm using the builds
+2018-02-21T12:00       7       I'm using the builds
+
diff --git 
a/regression-test/suites/inverted_index_p0/index_compaction/test_index_compaction_with_multi_index_segments.groovy
 
b/regression-test/suites/inverted_index_p0/index_compaction/test_index_compaction_with_multi_index_segments.groovy
new file mode 100644
index 00000000000..03ab16af5b5
--- /dev/null
+++ 
b/regression-test/suites/inverted_index_p0/index_compaction/test_index_compaction_with_multi_index_segments.groovy
@@ -0,0 +1,286 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import org.codehaus.groovy.runtime.IOGroovyMethods
+
+suite("test_index_compaction_with_multi_index_segments", "p0") {
+    def tableName = "test_index_compaction_with_multi_index_segments"
+  
+    def set_be_config = { key, value ->
+        def backendId_to_backendIP = [:]
+        def backendId_to_backendHttpPort = [:]
+        getBackendIpHttpPort(backendId_to_backendIP, 
backendId_to_backendHttpPort);
+
+        for (String backend_id: backendId_to_backendIP.keySet()) {
+            def (code, out, err) = 
update_be_config(backendId_to_backendIP.get(backend_id), 
backendId_to_backendHttpPort.get(backend_id), key, value)
+            logger.info("update config: code=" + code + ", out=" + out + ", 
err=" + err)
+        }
+    }
+
+    boolean disableAutoCompaction = true
+    boolean invertedIndexCompactionEnable = false
+    int invertedIndexMaxBufferedDocs = -1;
+    boolean has_update_be_config = false
+
+    try {
+        String backend_id;
+        def backendId_to_backendIP = [:]
+        def backendId_to_backendHttpPort = [:]
+        getBackendIpHttpPort(backendId_to_backendIP, 
backendId_to_backendHttpPort);
+
+        backend_id = backendId_to_backendIP.keySet()[0]
+        def (code, out, err) = 
show_be_config(backendId_to_backendIP.get(backend_id), 
backendId_to_backendHttpPort.get(backend_id))
+        
+        logger.info("Show config: code=" + code + ", out=" + out + ", err=" + 
err)
+        assertEquals(code, 0)
+        def configList = parseJson(out.trim())
+        assert configList instanceof List
+
+        for (Object ele in (List) configList) {
+            assert ele instanceof List<String>
+            if (((List<String>) ele)[0] == "inverted_index_compaction_enable") 
{
+                invertedIndexCompactionEnable = 
Boolean.parseBoolean(((List<String>) ele)[2])
+                logger.info("inverted_index_compaction_enable: 
${((List<String>) ele)[2]}")
+            }
+            if (((List<String>) ele)[0] == "inverted_index_max_buffered_docs") 
{
+                invertedIndexMaxBufferedDocs = 
Integer.parseInt(((List<String>) ele)[2])
+                logger.info("inverted_index_max_buffered_docs: 
${((List<String>) ele)[2]}")
+            }
+        }
+        set_be_config.call("inverted_index_compaction_enable", "true")
+        set_be_config.call("inverted_index_max_buffered_docs", "5")
+        has_update_be_config = true
+
+        sql """ DROP TABLE IF EXISTS ${tableName}; """
+        sql """
+            CREATE TABLE ${tableName} (
+                `file_time` DATETIME NOT NULL,
+                `comment_id` int(11)  NULL,
+                `body` TEXT NULL DEFAULT "",
+                INDEX idx_comment_id (`comment_id`) USING INVERTED COMMENT 
'''',
+                INDEX idx_body (`body`) USING INVERTED PROPERTIES("parser" = 
"unicode") COMMENT ''''
+            ) ENGINE=OLAP
+            DUPLICATE KEY(`file_time`)
+            COMMENT 'OLAP'
+            DISTRIBUTED BY RANDOM BUCKETS 1
+            PROPERTIES (
+            "replication_allocation" = "tag.location.default: 1",
+            "disable_auto_compaction" = "true"
+            );
+        """
+
+        // insert 10 rows
+        sql """ INSERT INTO ${tableName} VALUES ("2018-02-21 12:00:00", 1, 
"I\'m using the builds"),
+                                                ("2018-02-21 12:00:00", 2, 
"I\'m using the builds"),
+                                                ("2018-02-21 12:00:00", 3, 
"I\'m using the builds"),
+                                                ("2018-02-21 12:00:00", 4, 
"I\'m using the builds"),
+                                                ("2018-02-21 12:00:00", 5, 
"I\'m using the builds"),
+                                                ("2018-02-21 12:00:00", 6, 
"I\'m using the builds"),
+                                                ("2018-02-21 12:00:00", 7, 
"I\'m using the builds"),
+                                                ("2018-02-21 12:00:00", 8, 
"I\'m using the builds"),
+                                                ("2018-02-21 12:00:00", 9, 
"I\'m using the builds"),
+                                                ("2018-02-21 12:00:00", 10, 
"I\'m using the builds"); """
+        // insert another 10 rows
+        sql """ INSERT INTO ${tableName} VALUES ("2018-02-21 12:00:00", 1, 
"I\'m using the builds"),
+                                                ("2018-02-21 12:00:00", 2, 
"I\'m using the builds"),
+                                                ("2018-02-21 12:00:00", 3, 
"I\'m using the builds"),
+                                                ("2018-02-21 12:00:00", 4, 
"I\'m using the builds"),
+                                                ("2018-02-21 12:00:00", 5, 
"I\'m using the builds"),
+                                                ("2018-02-21 12:00:00", 6, 
"I\'m using the builds"),
+                                                ("2018-02-21 12:00:00", 7, 
"I\'m using the builds"),
+                                                ("2018-02-21 12:00:00", 8, 
"I\'m using the builds"),
+                                                ("2018-02-21 12:00:00", 9, 
"I\'m using the builds"),
+                                                ("2018-02-21 12:00:00", 10, 
"I\'m using the builds"); """
+
+        qt_sql """ select * from ${tableName} order by file_time, comment_id, 
body """
+        qt_sql """ select * from ${tableName} where body match "using" order 
by file_time, comment_id, body """
+        qt_sql """ select * from ${tableName} where body match "the" order by 
file_time, comment_id, body """
+        qt_sql """ select * from ${tableName} where comment_id < 8 order by 
file_time, comment_id, body """
+
+        
//TabletId,ReplicaId,BackendId,SchemaHash,Version,LstSuccessVersion,LstFailedVersion,LstFailedTime,LocalDataSize,RemoteDataSize,RowCount,State,LstConsistencyCheckTime,CheckVersion,VersionCount,PathHash,MetaUrl,CompactionStatus
+        String[][] tablets = sql """ show tablets from ${tableName}; """
+
+        def replicaNum = 1
+        logger.info("get table replica num: " + replicaNum)
+        // before full compaction, there are 3 rowsets.
+        int rowsetCount = 0
+        for (String[] tablet in tablets) {
+            String tablet_id = tablet[0]
+            def compactionStatusUrlIndex = 18
+            (code, out, err) = curl("GET", tablet[compactionStatusUrlIndex])
+            logger.info("Show tablets status: code=" + code + ", out=" + out + 
", err=" + err)
+            assertEquals(code, 0)
+            def tabletJson = parseJson(out.trim())
+            assert tabletJson.rowsets instanceof List
+            rowsetCount +=((List<String>) tabletJson.rowsets).size()
+        }
+        assert (rowsetCount == 3 * replicaNum)
+
+        // trigger full compactions for all tablets in ${tableName}
+        for (String[] tablet in tablets) {
+            String tablet_id = tablet[0]
+            backend_id = tablet[2]
+            times = 1
+
+            do{
+                (code, out, err) = 
be_run_full_compaction(backendId_to_backendIP.get(backend_id), 
backendId_to_backendHttpPort.get(backend_id), tablet_id)
+                logger.info("Run compaction: code=" + code + ", out=" + out + 
", err=" + err)
+                ++times
+                sleep(2000)
+            } while (parseJson(out.trim()).status.toLowerCase()!="success" && 
times<=10)
+
+            def compactJson = parseJson(out.trim())
+            if (compactJson.status.toLowerCase() == "fail") {
+                assertEquals(disableAutoCompaction, false)
+                logger.info("Compaction was done automatically!")
+            }
+            if (disableAutoCompaction) {
+                assertEquals("success", compactJson.status.toLowerCase())
+            }
+        }
+
+        // wait for full compaction done
+        for (String[] tablet in tablets) {
+            boolean running = true
+            do {
+                Thread.sleep(1000)
+                String tablet_id = tablet[0]
+                backend_id = tablet[2]
+                (code, out, err) = 
be_get_compaction_status(backendId_to_backendIP.get(backend_id), 
backendId_to_backendHttpPort.get(backend_id), tablet_id)
+                logger.info("Get compaction status: code=" + code + ", out=" + 
out + ", err=" + err)
+                assertEquals(code, 0)
+                def compactionStatus = parseJson(out.trim())
+                assertEquals("success", compactionStatus.status.toLowerCase())
+                running = compactionStatus.run_status
+            } while (running)
+        }
+
+        // after full compaction, there is only 1 rowset.
+        
+        rowsetCount = 0
+        for (String[] tablet in tablets) {
+            String tablet_id = tablet[0]
+            def compactionStatusUrlIndex = 18
+            (code, out, err) = curl("GET", tablet[compactionStatusUrlIndex])
+            logger.info("Show tablets status: code=" + code + ", out=" + out + 
", err=" + err)
+            assertEquals(code, 0)
+            def tabletJson = parseJson(out.trim())
+            assert tabletJson.rowsets instanceof List
+            rowsetCount +=((List<String>) tabletJson.rowsets).size()
+        }
+        assert (rowsetCount == 1 * replicaNum)
+
+        qt_sql """ select * from ${tableName} order by file_time, comment_id, 
body """
+        qt_sql """ select * from ${tableName} where body match "using" order 
by file_time, comment_id, body """
+        qt_sql """ select * from ${tableName} where body match "the" order by 
file_time, comment_id, body """
+        qt_sql """ select * from ${tableName} where comment_id < 8 order by 
file_time, comment_id, body """
+
+        // insert 10 rows, again
+        sql """ INSERT INTO ${tableName} VALUES ("2018-02-21 12:00:00", 1, 
"I\'m using the builds"),
+                                                ("2018-02-21 12:00:00", 2, 
"I\'m using the builds"),
+                                                ("2018-02-21 12:00:00", 3, 
"I\'m using the builds"),
+                                                ("2018-02-21 12:00:00", 4, 
"I\'m using the builds"),
+                                                ("2018-02-21 12:00:00", 5, 
"I\'m using the builds"),
+                                                ("2018-02-21 12:00:00", 6, 
"I\'m using the builds"),
+                                                ("2018-02-21 12:00:00", 7, 
"I\'m using the builds"),
+                                                ("2018-02-21 12:00:00", 8, 
"I\'m using the builds"),
+                                                ("2018-02-21 12:00:00", 9, 
"I\'m using the builds"),
+                                                ("2018-02-21 12:00:00", 10, 
"I\'m using the builds"); """
+
+        tablets = sql """ show tablets from ${tableName}; """
+
+        logger.info("get table replica num: " + replicaNum)
+        // before full compaction, there are 2 rowsets.
+        rowsetCount = 0
+        for (String[] tablet in tablets) {
+            String tablet_id = tablet[0]
+            def compactionStatusUrlIndex = 18
+            (code, out, err) = curl("GET", tablet[compactionStatusUrlIndex])
+            logger.info("Show tablets status: code=" + code + ", out=" + out + 
", err=" + err)
+            assertEquals(code, 0)
+            def tabletJson = parseJson(out.trim())
+            assert tabletJson.rowsets instanceof List
+            rowsetCount +=((List<String>) tabletJson.rowsets).size()
+        }
+        assert (rowsetCount == 2 * replicaNum)
+
+        // trigger full compactions for all tablets in ${tableName}
+        for (String[] tablet in tablets) {
+            String tablet_id = tablet[0]
+            backend_id = tablet[2]
+            times = 1
+
+            do{
+                (code, out, err) = 
be_run_full_compaction(backendId_to_backendIP.get(backend_id), 
backendId_to_backendHttpPort.get(backend_id), tablet_id)
+                logger.info("Run compaction: code=" + code + ", out=" + out + 
", err=" + err)
+                ++times
+                sleep(2000)
+            } while (parseJson(out.trim()).status.toLowerCase()!="success" && 
times<=10)
+
+            def compactJson = parseJson(out.trim())
+            if (compactJson.status.toLowerCase() == "fail") {
+                assertEquals(disableAutoCompaction, false)
+                logger.info("Compaction was done automatically!")
+            }
+            if (disableAutoCompaction) {
+                assertEquals("success", compactJson.status.toLowerCase())
+            }
+        }
+
+        // wait for full compaction done
+        for (String[] tablet in tablets) {
+            boolean running = true
+            do {
+                Thread.sleep(1000)
+                String tablet_id = tablet[0]
+                backend_id = tablet[2]
+                (code, out, err) = 
be_get_compaction_status(backendId_to_backendIP.get(backend_id), 
backendId_to_backendHttpPort.get(backend_id), tablet_id)
+                logger.info("Get compaction status: code=" + code + ", out=" + 
out + ", err=" + err)
+                assertEquals(code, 0)
+                def compactionStatus = parseJson(out.trim())
+                assertEquals("success", compactionStatus.status.toLowerCase())
+                running = compactionStatus.run_status
+            } while (running)
+        }
+
+        // after full compaction, there is only 1 rowset.
+        
+        rowsetCount = 0
+        for (String[] tablet in tablets) {
+            String tablet_id = tablet[0]
+            def compactionStatusUrlIndex = 18
+            (code, out, err) = curl("GET", tablet[compactionStatusUrlIndex])
+            logger.info("Show tablets status: code=" + code + ", out=" + out + 
", err=" + err)
+            assertEquals(code, 0)
+            def tabletJson = parseJson(out.trim())
+            assert tabletJson.rowsets instanceof List
+            rowsetCount +=((List<String>) tabletJson.rowsets).size()
+        }
+        assert (rowsetCount == 1 * replicaNum)
+
+        qt_sql """ select * from ${tableName} order by file_time, comment_id, 
body """
+        qt_sql """ select * from ${tableName} where body match "using" order 
by file_time, comment_id, body """
+        qt_sql """ select * from ${tableName} where body match "the" order by 
file_time, comment_id, body """
+        qt_sql """ select * from ${tableName} where comment_id < 8 order by 
file_time, comment_id, body """
+
+    } finally {
+        if (has_update_be_config) {
+            set_be_config.call("inverted_index_compaction_enable", 
invertedIndexCompactionEnable.toString())
+            set_be_config.call("inverted_index_max_buffered_docs", 
invertedIndexMaxBufferedDocs.toString())
+        }
+    }
+}
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to