This is an automated email from the ASF dual-hosted git repository. kxiao pushed a commit to branch branch-2.0 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push: new 6495eb28d81 [fix](index compaction)support compact multi segments in one index #28889 (#29276) 6495eb28d81 is described below commit 6495eb28d8118f8266a5212ca31e1af80e51e320 Author: qiye <jianliang5...@gmail.com> AuthorDate: Fri Dec 29 16:10:26 2023 +0800 [fix](index compaction)support compact multi segments in one index #28889 (#29276) --- .gitmodules | 2 +- be/src/clucene | 2 +- be/src/common/config.cpp | 5 +- be/src/common/config.h | 3 +- be/src/olap/compaction.cpp | 38 ++- .../segment_v2/inverted_index_compaction.cpp | 1 + .../rowset/segment_v2/inverted_index_writer.cpp | 3 +- build.sh | 2 +- ..._index_compaction_with_multi_index_segments.out | 214 +++++++++++++++ ...dex_compaction_with_multi_index_segments.groovy | 286 +++++++++++++++++++++ 10 files changed, 544 insertions(+), 12 deletions(-) diff --git a/.gitmodules b/.gitmodules index 9fe51bfd1d0..729354ec9c7 100644 --- a/.gitmodules +++ b/.gitmodules @@ -32,4 +32,4 @@ [submodule "be/src/clucene"] path = be/src/clucene url = https://github.com/apache/doris-thirdparty.git - branch = clucene + branch = clucene-2.0 diff --git a/be/src/clucene b/be/src/clucene index ed92e181310..c9030853082 160000 --- a/be/src/clucene +++ b/be/src/clucene @@ -1 +1 @@ -Subproject commit ed92e1813103a513aa0ee16730b94cc840daec73 +Subproject commit c90308530828a24fe421a9e19bc1e5e06f1460cd diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp index ee7df5d96db..af08d2deaab 100644 --- a/be/src/common/config.cpp +++ b/be/src/common/config.cpp @@ -996,6 +996,9 @@ DEFINE_String(inverted_index_query_cache_limit, "10%"); // inverted index DEFINE_mDouble(inverted_index_ram_buffer_size, "512"); +// -1 indicates not working. +// Normally we should not change this, it's useful for testing. +DEFINE_mInt32(inverted_index_max_buffered_docs, "-1"); DEFINE_Int32(query_bkd_inverted_index_limit_percent, "5"); // 5% // dict path for chinese analyzer DEFINE_String(inverted_index_dict_path, "${DORIS_HOME}/dict"); @@ -1003,7 +1006,7 @@ DEFINE_Int32(inverted_index_read_buffer_size, "4096"); // tree depth for bkd index DEFINE_Int32(max_depth_in_bkd_tree, "32"); // index compaction -DEFINE_Bool(inverted_index_compaction_enable, "false"); +DEFINE_mBool(inverted_index_compaction_enable, "false"); // use num_broadcast_buffer blocks as buffer to do broadcast DEFINE_Int32(num_broadcast_buffer, "32"); // semi-structure configs diff --git a/be/src/common/config.h b/be/src/common/config.h index c865c3b72b2..4fc3bc8dbfa 100644 --- a/be/src/common/config.h +++ b/be/src/common/config.h @@ -1037,10 +1037,11 @@ DECLARE_Int32(query_bkd_inverted_index_limit_percent); // 5% // dict path for chinese analyzer DECLARE_String(inverted_index_dict_path); DECLARE_Int32(inverted_index_read_buffer_size); +DECLARE_mInt32(inverted_index_max_buffered_docs); // tree depth for bkd index DECLARE_Int32(max_depth_in_bkd_tree); // index compaction -DECLARE_Bool(inverted_index_compaction_enable); +DECLARE_mBool(inverted_index_compaction_enable); // use num_broadcast_buffer blocks as buffer to do broadcast DECLARE_Int32(num_broadcast_buffer); // semi-structure configs diff --git a/be/src/olap/compaction.cpp b/be/src/olap/compaction.cpp index 761e50db73e..f6c8b3bb5bf 100644 --- a/be/src/olap/compaction.cpp +++ b/be/src/olap/compaction.cpp @@ -403,6 +403,34 @@ Status Compaction::do_compaction_impl(int64_t permits) { if (_input_row_num > 0 && stats.rowid_conversion && config::inverted_index_compaction_enable) { OlapStopWatch inverted_watch; + + // check rowid_conversion correctness + Version version = _tablet->max_version(); + DeleteBitmap output_rowset_delete_bitmap(_tablet->tablet_id()); + std::set<RowLocation> missed_rows; + std::map<RowsetSharedPtr, std::list<std::pair<RowLocation, RowLocation>>> location_map; + // Convert the delete bitmap of the input rowsets to output rowset. + std::size_t missed_rows_size = 0; + _tablet->calc_compaction_output_rowset_delete_bitmap( + _input_rowsets, _rowid_conversion, 0, version.second + 1, &missed_rows, + &location_map, _tablet->tablet_meta()->delete_bitmap(), + &output_rowset_delete_bitmap); + if (!allow_delete_in_cumu_compaction()) { + missed_rows_size = missed_rows.size(); + if (compaction_type() == ReaderType::READER_CUMULATIVE_COMPACTION && + stats.merged_rows != missed_rows_size) { + std::string err_msg = fmt::format( + "cumulative compaction: the merged rows({}) is not equal to missed " + "rows({}) in rowid conversion, tablet_id: {}, table_id:{}", + stats.merged_rows, missed_rows_size, _tablet->tablet_id(), + _tablet->table_id()); + DCHECK(false) << err_msg; + LOG(WARNING) << err_msg; + } + } + + RETURN_IF_ERROR(_tablet->check_rowid_conversion(_output_rowset, location_map)); + // translation vec // <<dest_idx_num, dest_docId>> // the first level vector: index indicates src segment. @@ -428,7 +456,7 @@ Status Compaction::do_compaction_impl(int64_t permits) { // src index files // format: rowsetId_segmentId std::vector<std::string> src_index_files(src_segment_num); - for (auto m : src_seg_to_id_map) { + for (const auto& m : src_seg_to_id_map) { std::pair<RowsetId, uint32_t> p = m.first; src_index_files[m.second] = p.first.to_string() + "_" + std::to_string(p.second); } @@ -677,11 +705,11 @@ Status Compaction::modify_rowsets(const Merger::Statistics* stats) { // of incremental data later. // TODO(LiaoXin): check if there are duplicate keys std::size_t missed_rows_size = 0; + _tablet->calc_compaction_output_rowset_delete_bitmap( + _input_rowsets, _rowid_conversion, 0, version.second + 1, &missed_rows, + &location_map, _tablet->tablet_meta()->delete_bitmap(), + &output_rowset_delete_bitmap); if (!allow_delete_in_cumu_compaction()) { - _tablet->calc_compaction_output_rowset_delete_bitmap( - _input_rowsets, _rowid_conversion, 0, version.second + 1, &missed_rows, - &location_map, _tablet->tablet_meta()->delete_bitmap(), - &output_rowset_delete_bitmap); missed_rows_size = missed_rows.size(); if (compaction_type() == ReaderType::READER_CUMULATIVE_COMPACTION && stats != nullptr && stats->merged_rows != missed_rows_size) { diff --git a/be/src/olap/rowset/segment_v2/inverted_index_compaction.cpp b/be/src/olap/rowset/segment_v2/inverted_index_compaction.cpp index 7f653a93591..b3a28c6ebfc 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_compaction.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index_compaction.cpp @@ -56,6 +56,7 @@ Status compact_column(int32_t index_id, int src_segment_num, int dest_segment_nu dest_index_dirs[i] = DorisCompoundDirectory::getDirectory(fs, path.c_str(), true); } + DCHECK_EQ(src_index_dirs.size(), trans_vec.size()); index_writer->indexCompaction(src_index_dirs, dest_index_dirs, trans_vec, dest_segment_num_rows); diff --git a/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp b/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp index 0949d708742..744710d9082 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp @@ -60,7 +60,6 @@ namespace doris::segment_v2 { const int32_t MAX_FIELD_LEN = 0x7FFFFFFFL; -const int32_t MAX_BUFFER_DOCS = 100000000; const int32_t MERGE_FACTOR = 100000000; const int32_t MAX_LEAF_COUNT = 1024; const float MAXMBSortInHeap = 512.0 * 8; @@ -193,8 +192,8 @@ public: } _index_writer = std::make_unique<lucene::index::IndexWriter>(_dir.get(), _analyzer.get(), create, true); - _index_writer->setMaxBufferedDocs(MAX_BUFFER_DOCS); _index_writer->setRAMBufferSizeMB(config::inverted_index_ram_buffer_size); + _index_writer->setMaxBufferedDocs(config::inverted_index_max_buffered_docs); _index_writer->setMaxFieldLength(MAX_FIELD_LEN); _index_writer->setMergeFactor(MERGE_FACTOR); _index_writer->setUseCompoundFile(false); diff --git a/build.sh b/build.sh index a7e31fa9b3b..15f22b03a72 100755 --- a/build.sh +++ b/build.sh @@ -302,7 +302,7 @@ update_submodule() { } update_submodule "be/src/apache-orc" "apache-orc" "https://github.com/apache/doris-thirdparty/archive/refs/heads/orc.tar.gz" -update_submodule "be/src/clucene" "clucene" "https://github.com/apache/doris-thirdparty/archive/refs/heads/clucene.tar.gz" +update_submodule "be/src/clucene" "clucene" "https://github.com/apache/doris-thirdparty/archive/refs/heads/clucene-2.0.tar.gz" if [[ "${CLEAN}" -eq 1 && "${BUILD_BE}" -eq 0 && "${BUILD_FE}" -eq 0 && "${BUILD_SPARK_DPP}" -eq 0 ]]; then clean_gensrc diff --git a/regression-test/data/inverted_index_p0/index_compaction/test_index_compaction_with_multi_index_segments.out b/regression-test/data/inverted_index_p0/index_compaction/test_index_compaction_with_multi_index_segments.out new file mode 100644 index 00000000000..57ad3c1080c --- /dev/null +++ b/regression-test/data/inverted_index_p0/index_compaction/test_index_compaction_with_multi_index_segments.out @@ -0,0 +1,214 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql -- +2018-02-21T12:00 1 I'm using the builds +2018-02-21T12:00 1 I'm using the builds +2018-02-21T12:00 2 I'm using the builds +2018-02-21T12:00 2 I'm using the builds +2018-02-21T12:00 3 I'm using the builds +2018-02-21T12:00 3 I'm using the builds +2018-02-21T12:00 4 I'm using the builds +2018-02-21T12:00 4 I'm using the builds +2018-02-21T12:00 5 I'm using the builds +2018-02-21T12:00 5 I'm using the builds +2018-02-21T12:00 6 I'm using the builds +2018-02-21T12:00 6 I'm using the builds +2018-02-21T12:00 7 I'm using the builds +2018-02-21T12:00 7 I'm using the builds +2018-02-21T12:00 8 I'm using the builds +2018-02-21T12:00 8 I'm using the builds +2018-02-21T12:00 9 I'm using the builds +2018-02-21T12:00 9 I'm using the builds +2018-02-21T12:00 10 I'm using the builds +2018-02-21T12:00 10 I'm using the builds + +-- !sql -- +2018-02-21T12:00 1 I'm using the builds +2018-02-21T12:00 1 I'm using the builds +2018-02-21T12:00 2 I'm using the builds +2018-02-21T12:00 2 I'm using the builds +2018-02-21T12:00 3 I'm using the builds +2018-02-21T12:00 3 I'm using the builds +2018-02-21T12:00 4 I'm using the builds +2018-02-21T12:00 4 I'm using the builds +2018-02-21T12:00 5 I'm using the builds +2018-02-21T12:00 5 I'm using the builds +2018-02-21T12:00 6 I'm using the builds +2018-02-21T12:00 6 I'm using the builds +2018-02-21T12:00 7 I'm using the builds +2018-02-21T12:00 7 I'm using the builds +2018-02-21T12:00 8 I'm using the builds +2018-02-21T12:00 8 I'm using the builds +2018-02-21T12:00 9 I'm using the builds +2018-02-21T12:00 9 I'm using the builds +2018-02-21T12:00 10 I'm using the builds +2018-02-21T12:00 10 I'm using the builds + +-- !sql -- + +-- !sql -- +2018-02-21T12:00 1 I'm using the builds +2018-02-21T12:00 1 I'm using the builds +2018-02-21T12:00 2 I'm using the builds +2018-02-21T12:00 2 I'm using the builds +2018-02-21T12:00 3 I'm using the builds +2018-02-21T12:00 3 I'm using the builds +2018-02-21T12:00 4 I'm using the builds +2018-02-21T12:00 4 I'm using the builds +2018-02-21T12:00 5 I'm using the builds +2018-02-21T12:00 5 I'm using the builds +2018-02-21T12:00 6 I'm using the builds +2018-02-21T12:00 6 I'm using the builds +2018-02-21T12:00 7 I'm using the builds +2018-02-21T12:00 7 I'm using the builds + +-- !sql -- +2018-02-21T12:00 1 I'm using the builds +2018-02-21T12:00 1 I'm using the builds +2018-02-21T12:00 2 I'm using the builds +2018-02-21T12:00 2 I'm using the builds +2018-02-21T12:00 3 I'm using the builds +2018-02-21T12:00 3 I'm using the builds +2018-02-21T12:00 4 I'm using the builds +2018-02-21T12:00 4 I'm using the builds +2018-02-21T12:00 5 I'm using the builds +2018-02-21T12:00 5 I'm using the builds +2018-02-21T12:00 6 I'm using the builds +2018-02-21T12:00 6 I'm using the builds +2018-02-21T12:00 7 I'm using the builds +2018-02-21T12:00 7 I'm using the builds +2018-02-21T12:00 8 I'm using the builds +2018-02-21T12:00 8 I'm using the builds +2018-02-21T12:00 9 I'm using the builds +2018-02-21T12:00 9 I'm using the builds +2018-02-21T12:00 10 I'm using the builds +2018-02-21T12:00 10 I'm using the builds + +-- !sql -- +2018-02-21T12:00 1 I'm using the builds +2018-02-21T12:00 1 I'm using the builds +2018-02-21T12:00 2 I'm using the builds +2018-02-21T12:00 2 I'm using the builds +2018-02-21T12:00 3 I'm using the builds +2018-02-21T12:00 3 I'm using the builds +2018-02-21T12:00 4 I'm using the builds +2018-02-21T12:00 4 I'm using the builds +2018-02-21T12:00 5 I'm using the builds +2018-02-21T12:00 5 I'm using the builds +2018-02-21T12:00 6 I'm using the builds +2018-02-21T12:00 6 I'm using the builds +2018-02-21T12:00 7 I'm using the builds +2018-02-21T12:00 7 I'm using the builds +2018-02-21T12:00 8 I'm using the builds +2018-02-21T12:00 8 I'm using the builds +2018-02-21T12:00 9 I'm using the builds +2018-02-21T12:00 9 I'm using the builds +2018-02-21T12:00 10 I'm using the builds +2018-02-21T12:00 10 I'm using the builds + +-- !sql -- + +-- !sql -- +2018-02-21T12:00 1 I'm using the builds +2018-02-21T12:00 1 I'm using the builds +2018-02-21T12:00 2 I'm using the builds +2018-02-21T12:00 2 I'm using the builds +2018-02-21T12:00 3 I'm using the builds +2018-02-21T12:00 3 I'm using the builds +2018-02-21T12:00 4 I'm using the builds +2018-02-21T12:00 4 I'm using the builds +2018-02-21T12:00 5 I'm using the builds +2018-02-21T12:00 5 I'm using the builds +2018-02-21T12:00 6 I'm using the builds +2018-02-21T12:00 6 I'm using the builds +2018-02-21T12:00 7 I'm using the builds +2018-02-21T12:00 7 I'm using the builds + +-- !sql -- +2018-02-21T12:00 1 I'm using the builds +2018-02-21T12:00 1 I'm using the builds +2018-02-21T12:00 1 I'm using the builds +2018-02-21T12:00 2 I'm using the builds +2018-02-21T12:00 2 I'm using the builds +2018-02-21T12:00 2 I'm using the builds +2018-02-21T12:00 3 I'm using the builds +2018-02-21T12:00 3 I'm using the builds +2018-02-21T12:00 3 I'm using the builds +2018-02-21T12:00 4 I'm using the builds +2018-02-21T12:00 4 I'm using the builds +2018-02-21T12:00 4 I'm using the builds +2018-02-21T12:00 5 I'm using the builds +2018-02-21T12:00 5 I'm using the builds +2018-02-21T12:00 5 I'm using the builds +2018-02-21T12:00 6 I'm using the builds +2018-02-21T12:00 6 I'm using the builds +2018-02-21T12:00 6 I'm using the builds +2018-02-21T12:00 7 I'm using the builds +2018-02-21T12:00 7 I'm using the builds +2018-02-21T12:00 7 I'm using the builds +2018-02-21T12:00 8 I'm using the builds +2018-02-21T12:00 8 I'm using the builds +2018-02-21T12:00 8 I'm using the builds +2018-02-21T12:00 9 I'm using the builds +2018-02-21T12:00 9 I'm using the builds +2018-02-21T12:00 9 I'm using the builds +2018-02-21T12:00 10 I'm using the builds +2018-02-21T12:00 10 I'm using the builds +2018-02-21T12:00 10 I'm using the builds + +-- !sql -- +2018-02-21T12:00 1 I'm using the builds +2018-02-21T12:00 1 I'm using the builds +2018-02-21T12:00 1 I'm using the builds +2018-02-21T12:00 2 I'm using the builds +2018-02-21T12:00 2 I'm using the builds +2018-02-21T12:00 2 I'm using the builds +2018-02-21T12:00 3 I'm using the builds +2018-02-21T12:00 3 I'm using the builds +2018-02-21T12:00 3 I'm using the builds +2018-02-21T12:00 4 I'm using the builds +2018-02-21T12:00 4 I'm using the builds +2018-02-21T12:00 4 I'm using the builds +2018-02-21T12:00 5 I'm using the builds +2018-02-21T12:00 5 I'm using the builds +2018-02-21T12:00 5 I'm using the builds +2018-02-21T12:00 6 I'm using the builds +2018-02-21T12:00 6 I'm using the builds +2018-02-21T12:00 6 I'm using the builds +2018-02-21T12:00 7 I'm using the builds +2018-02-21T12:00 7 I'm using the builds +2018-02-21T12:00 7 I'm using the builds +2018-02-21T12:00 8 I'm using the builds +2018-02-21T12:00 8 I'm using the builds +2018-02-21T12:00 8 I'm using the builds +2018-02-21T12:00 9 I'm using the builds +2018-02-21T12:00 9 I'm using the builds +2018-02-21T12:00 9 I'm using the builds +2018-02-21T12:00 10 I'm using the builds +2018-02-21T12:00 10 I'm using the builds +2018-02-21T12:00 10 I'm using the builds + +-- !sql -- + +-- !sql -- +2018-02-21T12:00 1 I'm using the builds +2018-02-21T12:00 1 I'm using the builds +2018-02-21T12:00 1 I'm using the builds +2018-02-21T12:00 2 I'm using the builds +2018-02-21T12:00 2 I'm using the builds +2018-02-21T12:00 2 I'm using the builds +2018-02-21T12:00 3 I'm using the builds +2018-02-21T12:00 3 I'm using the builds +2018-02-21T12:00 3 I'm using the builds +2018-02-21T12:00 4 I'm using the builds +2018-02-21T12:00 4 I'm using the builds +2018-02-21T12:00 4 I'm using the builds +2018-02-21T12:00 5 I'm using the builds +2018-02-21T12:00 5 I'm using the builds +2018-02-21T12:00 5 I'm using the builds +2018-02-21T12:00 6 I'm using the builds +2018-02-21T12:00 6 I'm using the builds +2018-02-21T12:00 6 I'm using the builds +2018-02-21T12:00 7 I'm using the builds +2018-02-21T12:00 7 I'm using the builds +2018-02-21T12:00 7 I'm using the builds + diff --git a/regression-test/suites/inverted_index_p0/index_compaction/test_index_compaction_with_multi_index_segments.groovy b/regression-test/suites/inverted_index_p0/index_compaction/test_index_compaction_with_multi_index_segments.groovy new file mode 100644 index 00000000000..03ab16af5b5 --- /dev/null +++ b/regression-test/suites/inverted_index_p0/index_compaction/test_index_compaction_with_multi_index_segments.groovy @@ -0,0 +1,286 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import org.codehaus.groovy.runtime.IOGroovyMethods + +suite("test_index_compaction_with_multi_index_segments", "p0") { + def tableName = "test_index_compaction_with_multi_index_segments" + + def set_be_config = { key, value -> + def backendId_to_backendIP = [:] + def backendId_to_backendHttpPort = [:] + getBackendIpHttpPort(backendId_to_backendIP, backendId_to_backendHttpPort); + + for (String backend_id: backendId_to_backendIP.keySet()) { + def (code, out, err) = update_be_config(backendId_to_backendIP.get(backend_id), backendId_to_backendHttpPort.get(backend_id), key, value) + logger.info("update config: code=" + code + ", out=" + out + ", err=" + err) + } + } + + boolean disableAutoCompaction = true + boolean invertedIndexCompactionEnable = false + int invertedIndexMaxBufferedDocs = -1; + boolean has_update_be_config = false + + try { + String backend_id; + def backendId_to_backendIP = [:] + def backendId_to_backendHttpPort = [:] + getBackendIpHttpPort(backendId_to_backendIP, backendId_to_backendHttpPort); + + backend_id = backendId_to_backendIP.keySet()[0] + def (code, out, err) = show_be_config(backendId_to_backendIP.get(backend_id), backendId_to_backendHttpPort.get(backend_id)) + + logger.info("Show config: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + def configList = parseJson(out.trim()) + assert configList instanceof List + + for (Object ele in (List) configList) { + assert ele instanceof List<String> + if (((List<String>) ele)[0] == "inverted_index_compaction_enable") { + invertedIndexCompactionEnable = Boolean.parseBoolean(((List<String>) ele)[2]) + logger.info("inverted_index_compaction_enable: ${((List<String>) ele)[2]}") + } + if (((List<String>) ele)[0] == "inverted_index_max_buffered_docs") { + invertedIndexMaxBufferedDocs = Integer.parseInt(((List<String>) ele)[2]) + logger.info("inverted_index_max_buffered_docs: ${((List<String>) ele)[2]}") + } + } + set_be_config.call("inverted_index_compaction_enable", "true") + set_be_config.call("inverted_index_max_buffered_docs", "5") + has_update_be_config = true + + sql """ DROP TABLE IF EXISTS ${tableName}; """ + sql """ + CREATE TABLE ${tableName} ( + `file_time` DATETIME NOT NULL, + `comment_id` int(11) NULL, + `body` TEXT NULL DEFAULT "", + INDEX idx_comment_id (`comment_id`) USING INVERTED COMMENT '''', + INDEX idx_body (`body`) USING INVERTED PROPERTIES("parser" = "unicode") COMMENT '''' + ) ENGINE=OLAP + DUPLICATE KEY(`file_time`) + COMMENT 'OLAP' + DISTRIBUTED BY RANDOM BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "disable_auto_compaction" = "true" + ); + """ + + // insert 10 rows + sql """ INSERT INTO ${tableName} VALUES ("2018-02-21 12:00:00", 1, "I\'m using the builds"), + ("2018-02-21 12:00:00", 2, "I\'m using the builds"), + ("2018-02-21 12:00:00", 3, "I\'m using the builds"), + ("2018-02-21 12:00:00", 4, "I\'m using the builds"), + ("2018-02-21 12:00:00", 5, "I\'m using the builds"), + ("2018-02-21 12:00:00", 6, "I\'m using the builds"), + ("2018-02-21 12:00:00", 7, "I\'m using the builds"), + ("2018-02-21 12:00:00", 8, "I\'m using the builds"), + ("2018-02-21 12:00:00", 9, "I\'m using the builds"), + ("2018-02-21 12:00:00", 10, "I\'m using the builds"); """ + // insert another 10 rows + sql """ INSERT INTO ${tableName} VALUES ("2018-02-21 12:00:00", 1, "I\'m using the builds"), + ("2018-02-21 12:00:00", 2, "I\'m using the builds"), + ("2018-02-21 12:00:00", 3, "I\'m using the builds"), + ("2018-02-21 12:00:00", 4, "I\'m using the builds"), + ("2018-02-21 12:00:00", 5, "I\'m using the builds"), + ("2018-02-21 12:00:00", 6, "I\'m using the builds"), + ("2018-02-21 12:00:00", 7, "I\'m using the builds"), + ("2018-02-21 12:00:00", 8, "I\'m using the builds"), + ("2018-02-21 12:00:00", 9, "I\'m using the builds"), + ("2018-02-21 12:00:00", 10, "I\'m using the builds"); """ + + qt_sql """ select * from ${tableName} order by file_time, comment_id, body """ + qt_sql """ select * from ${tableName} where body match "using" order by file_time, comment_id, body """ + qt_sql """ select * from ${tableName} where body match "the" order by file_time, comment_id, body """ + qt_sql """ select * from ${tableName} where comment_id < 8 order by file_time, comment_id, body """ + + //TabletId,ReplicaId,BackendId,SchemaHash,Version,LstSuccessVersion,LstFailedVersion,LstFailedTime,LocalDataSize,RemoteDataSize,RowCount,State,LstConsistencyCheckTime,CheckVersion,VersionCount,PathHash,MetaUrl,CompactionStatus + String[][] tablets = sql """ show tablets from ${tableName}; """ + + def replicaNum = 1 + logger.info("get table replica num: " + replicaNum) + // before full compaction, there are 3 rowsets. + int rowsetCount = 0 + for (String[] tablet in tablets) { + String tablet_id = tablet[0] + def compactionStatusUrlIndex = 18 + (code, out, err) = curl("GET", tablet[compactionStatusUrlIndex]) + logger.info("Show tablets status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + def tabletJson = parseJson(out.trim()) + assert tabletJson.rowsets instanceof List + rowsetCount +=((List<String>) tabletJson.rowsets).size() + } + assert (rowsetCount == 3 * replicaNum) + + // trigger full compactions for all tablets in ${tableName} + for (String[] tablet in tablets) { + String tablet_id = tablet[0] + backend_id = tablet[2] + times = 1 + + do{ + (code, out, err) = be_run_full_compaction(backendId_to_backendIP.get(backend_id), backendId_to_backendHttpPort.get(backend_id), tablet_id) + logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err) + ++times + sleep(2000) + } while (parseJson(out.trim()).status.toLowerCase()!="success" && times<=10) + + def compactJson = parseJson(out.trim()) + if (compactJson.status.toLowerCase() == "fail") { + assertEquals(disableAutoCompaction, false) + logger.info("Compaction was done automatically!") + } + if (disableAutoCompaction) { + assertEquals("success", compactJson.status.toLowerCase()) + } + } + + // wait for full compaction done + for (String[] tablet in tablets) { + boolean running = true + do { + Thread.sleep(1000) + String tablet_id = tablet[0] + backend_id = tablet[2] + (code, out, err) = be_get_compaction_status(backendId_to_backendIP.get(backend_id), backendId_to_backendHttpPort.get(backend_id), tablet_id) + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + def compactionStatus = parseJson(out.trim()) + assertEquals("success", compactionStatus.status.toLowerCase()) + running = compactionStatus.run_status + } while (running) + } + + // after full compaction, there is only 1 rowset. + + rowsetCount = 0 + for (String[] tablet in tablets) { + String tablet_id = tablet[0] + def compactionStatusUrlIndex = 18 + (code, out, err) = curl("GET", tablet[compactionStatusUrlIndex]) + logger.info("Show tablets status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + def tabletJson = parseJson(out.trim()) + assert tabletJson.rowsets instanceof List + rowsetCount +=((List<String>) tabletJson.rowsets).size() + } + assert (rowsetCount == 1 * replicaNum) + + qt_sql """ select * from ${tableName} order by file_time, comment_id, body """ + qt_sql """ select * from ${tableName} where body match "using" order by file_time, comment_id, body """ + qt_sql """ select * from ${tableName} where body match "the" order by file_time, comment_id, body """ + qt_sql """ select * from ${tableName} where comment_id < 8 order by file_time, comment_id, body """ + + // insert 10 rows, again + sql """ INSERT INTO ${tableName} VALUES ("2018-02-21 12:00:00", 1, "I\'m using the builds"), + ("2018-02-21 12:00:00", 2, "I\'m using the builds"), + ("2018-02-21 12:00:00", 3, "I\'m using the builds"), + ("2018-02-21 12:00:00", 4, "I\'m using the builds"), + ("2018-02-21 12:00:00", 5, "I\'m using the builds"), + ("2018-02-21 12:00:00", 6, "I\'m using the builds"), + ("2018-02-21 12:00:00", 7, "I\'m using the builds"), + ("2018-02-21 12:00:00", 8, "I\'m using the builds"), + ("2018-02-21 12:00:00", 9, "I\'m using the builds"), + ("2018-02-21 12:00:00", 10, "I\'m using the builds"); """ + + tablets = sql """ show tablets from ${tableName}; """ + + logger.info("get table replica num: " + replicaNum) + // before full compaction, there are 2 rowsets. + rowsetCount = 0 + for (String[] tablet in tablets) { + String tablet_id = tablet[0] + def compactionStatusUrlIndex = 18 + (code, out, err) = curl("GET", tablet[compactionStatusUrlIndex]) + logger.info("Show tablets status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + def tabletJson = parseJson(out.trim()) + assert tabletJson.rowsets instanceof List + rowsetCount +=((List<String>) tabletJson.rowsets).size() + } + assert (rowsetCount == 2 * replicaNum) + + // trigger full compactions for all tablets in ${tableName} + for (String[] tablet in tablets) { + String tablet_id = tablet[0] + backend_id = tablet[2] + times = 1 + + do{ + (code, out, err) = be_run_full_compaction(backendId_to_backendIP.get(backend_id), backendId_to_backendHttpPort.get(backend_id), tablet_id) + logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err) + ++times + sleep(2000) + } while (parseJson(out.trim()).status.toLowerCase()!="success" && times<=10) + + def compactJson = parseJson(out.trim()) + if (compactJson.status.toLowerCase() == "fail") { + assertEquals(disableAutoCompaction, false) + logger.info("Compaction was done automatically!") + } + if (disableAutoCompaction) { + assertEquals("success", compactJson.status.toLowerCase()) + } + } + + // wait for full compaction done + for (String[] tablet in tablets) { + boolean running = true + do { + Thread.sleep(1000) + String tablet_id = tablet[0] + backend_id = tablet[2] + (code, out, err) = be_get_compaction_status(backendId_to_backendIP.get(backend_id), backendId_to_backendHttpPort.get(backend_id), tablet_id) + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + def compactionStatus = parseJson(out.trim()) + assertEquals("success", compactionStatus.status.toLowerCase()) + running = compactionStatus.run_status + } while (running) + } + + // after full compaction, there is only 1 rowset. + + rowsetCount = 0 + for (String[] tablet in tablets) { + String tablet_id = tablet[0] + def compactionStatusUrlIndex = 18 + (code, out, err) = curl("GET", tablet[compactionStatusUrlIndex]) + logger.info("Show tablets status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + def tabletJson = parseJson(out.trim()) + assert tabletJson.rowsets instanceof List + rowsetCount +=((List<String>) tabletJson.rowsets).size() + } + assert (rowsetCount == 1 * replicaNum) + + qt_sql """ select * from ${tableName} order by file_time, comment_id, body """ + qt_sql """ select * from ${tableName} where body match "using" order by file_time, comment_id, body """ + qt_sql """ select * from ${tableName} where body match "the" order by file_time, comment_id, body """ + qt_sql """ select * from ${tableName} where comment_id < 8 order by file_time, comment_id, body """ + + } finally { + if (has_update_be_config) { + set_be_config.call("inverted_index_compaction_enable", invertedIndexCompactionEnable.toString()) + set_be_config.call("inverted_index_max_buffered_docs", invertedIndexMaxBufferedDocs.toString()) + } + } +} \ No newline at end of file --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org