This is an automated email from the ASF dual-hosted git repository.
eldenmoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 6cc2d4a3e5c [Improve](TabletSchema) optimize finding TabletIndex
performance in TabletSchema (#48699)
6cc2d4a3e5c is described below
commit 6cc2d4a3e5c08520584ddf11e10defede4b82736
Author: lihangyu <[email protected]>
AuthorDate: Thu Mar 27 21:32:29 2025 +0800
[Improve](TabletSchema) optimize finding TabletIndex performance in
TabletSchema (#48699)
Add an `unordered_map` to accelerate TabletIndex search speed

---
be/src/olap/tablet_schema.cpp | 91 +++++++++-------
be/src/olap/tablet_schema.h | 17 +++
be/test/olap/tablet_schema_index_test.cpp | 174 ++++++++++++++++++++++++++++++
3 files changed, 244 insertions(+), 38 deletions(-)
diff --git a/be/src/olap/tablet_schema.cpp b/be/src/olap/tablet_schema.cpp
index a2c4f29ce0d..2ffc4a2d1a3 100644
--- a/be/src/olap/tablet_schema.cpp
+++ b/be/src/olap/tablet_schema.cpp
@@ -905,6 +905,10 @@ void TabletColumn::append_sparse_column(TabletColumn
column) {
}
void TabletSchema::append_index(TabletIndex&& index) {
+ for (int32_t id : index.col_unique_ids()) {
+ _col_id_suffix_to_index.emplace(
+ std::make_tuple(index.index_type(), id,
index.get_index_suffix()), _indexes.size());
+ }
_indexes.push_back(std::make_shared<TabletIndex>(index));
}
@@ -912,15 +916,14 @@ void TabletSchema::update_index(const TabletColumn& col,
const IndexType& index_
TabletIndex&& index) {
int32_t col_unique_id = col.is_extracted_column() ? col.parent_unique_id()
: col.unique_id();
const std::string& suffix_path = escape_for_path_name(col.suffix_path());
- for (auto& _indexe : _indexes) {
- for (int32_t id : _indexe->col_unique_ids()) {
- if (_indexe->index_type() == index_type && id == col_unique_id &&
- _indexe->get_index_suffix() == suffix_path) {
- _indexe = std::make_shared<TabletIndex>(std::move(index));
- break;
- }
- }
+ IndexKey key(index_type, col_unique_id, suffix_path);
+ auto iter = _col_id_suffix_to_index.find(key);
+ if (iter != _col_id_suffix_to_index.end()) {
+ _indexes[iter->second] =
std::make_shared<TabletIndex>(std::move(index));
+ return;
}
+ LOG(WARNING) << " failed to update_index: " << index_type << " " <<
col_unique_id << " "
+ << suffix_path;
}
void TabletSchema::replace_column(size_t pos, TabletColumn new_col) {
@@ -938,17 +941,25 @@ void TabletSchema::clear_index_cache_handlers() {
void TabletSchema::clear_index() {
clear_index_cache_handlers();
_indexes.clear();
+ _col_id_suffix_to_index.clear();
}
void TabletSchema::remove_index(int64_t index_id) {
std::vector<TabletIndexPtr> indexes;
+ std::unordered_map<IndexKey, int32_t, IndexKeyHash> col_id_suffix_to_index;
for (auto index : _indexes) {
if (index->index_id() == index_id) {
continue;
}
+ for (int32_t col_uid : index->col_unique_ids()) {
+ col_id_suffix_to_index.emplace(
+ std::make_tuple(index->index_type(), col_uid,
index->get_index_suffix()),
+ indexes.size());
+ }
indexes.emplace_back(std::move(index));
}
_indexes = std::move(indexes);
+ _col_id_suffix_to_index = std::move(col_id_suffix_to_index);
}
void TabletSchema::clear_columns() {
@@ -979,6 +990,7 @@ void TabletSchema::init_from_pb(const TabletSchemaPB&
schema, bool ignore_extrac
_num_null_columns = 0;
_cols.clear();
_indexes.clear();
+ _col_id_suffix_to_index.clear();
_field_name_to_index.clear();
_field_id_to_index.clear();
_cluster_key_uids.clear();
@@ -1032,6 +1044,11 @@ void TabletSchema::init_from_pb(const TabletSchemaPB&
schema, bool ignore_extrac
index = std::make_shared<TabletIndex>();
index->init_from_pb(index_pb);
}
+ for (int32_t col_uid : index->col_unique_ids()) {
+ _col_id_suffix_to_index.emplace(
+ std::make_tuple(index->index_type(), col_uid,
index->get_index_suffix()),
+ _indexes.size());
+ }
_indexes.emplace_back(std::move(index));
}
_num_short_key_columns = schema.num_short_key_columns();
@@ -1150,6 +1167,7 @@ void TabletSchema::build_current_tablet_schema(int64_t
index_id, int32_t version
bool has_bf_columns = false;
_cols.clear();
_indexes.clear();
+ _col_id_suffix_to_index.clear();
_field_name_to_index.clear();
_field_id_to_index.clear();
_delete_sign_idx = -1;
@@ -1189,7 +1207,12 @@ void TabletSchema::build_current_tablet_schema(int64_t
index_id, int32_t version
_num_columns++;
}
- for (auto& i : index->indexes) {
+ for (const auto& i : index->indexes) {
+ for (int32_t col_uid : i->col_unique_ids()) {
+ _col_id_suffix_to_index.emplace(
+ std::make_tuple(i->index_type(), col_uid,
i->get_index_suffix()),
+ _indexes.size());
+ }
_indexes.emplace_back(std::make_shared<TabletIndex>(*i));
}
@@ -1371,6 +1394,15 @@ void TabletSchema::update_indexes_from_thrift(const
std::vector<doris::TOlapTabl
indexes.emplace_back(std::make_shared<TabletIndex>(std::move(index)));
}
_indexes = std::move(indexes);
+ std::unordered_map<IndexKey, int32_t, IndexKeyHash> col_id_suffix_to_index;
+ for (size_t i = 0; i < _indexes.size(); i++) {
+ for (int32_t col_uid : _indexes[i]->col_unique_ids()) {
+
col_id_suffix_to_index.emplace(std::make_tuple(_indexes[i]->index_type(),
col_uid,
+
_indexes[i]->get_index_suffix()),
+ i);
+ }
+ }
+ _col_id_suffix_to_index = std::move(col_id_suffix_to_index);
}
bool TabletSchema::exist_column(const std::string& field_name) const {
@@ -1426,14 +1458,10 @@ bool
TabletSchema::has_inverted_index_with_index_id(int64_t index_id) const {
const TabletIndex* TabletSchema::inverted_index(int32_t col_unique_id,
const std::string&
suffix_path) const {
const std::string escaped_suffix = escape_for_path_name(suffix_path);
- for (const auto& _index : _indexes) {
- if (_index->index_type() == IndexType::INVERTED) {
- for (int32_t id : _index->col_unique_ids()) {
- if (id == col_unique_id && _index->get_index_suffix() ==
escaped_suffix) {
- return _index.get();
- }
- }
- }
+ auto it = _col_id_suffix_to_index.find(
+ std::make_tuple(IndexType::INVERTED, col_unique_id,
escaped_suffix));
+ if (it != _col_id_suffix_to_index.end()) {
+ return _indexes[it->second].get();
}
return nullptr;
}
@@ -1450,30 +1478,17 @@ const TabletIndex* TabletSchema::inverted_index(const
TabletColumn& col) const {
}
bool TabletSchema::has_ngram_bf_index(int32_t col_unique_id) const {
- // TODO use more efficient impl
- for (const auto& _index : _indexes) {
- if (_index->index_type() == IndexType::NGRAM_BF) {
- for (int32_t id : _index->col_unique_ids()) {
- if (id == col_unique_id) {
- return true;
- }
- }
- }
- }
-
- return false;
+ IndexKey index_key(IndexType::NGRAM_BF, col_unique_id, "");
+ auto it = _col_id_suffix_to_index.find(index_key);
+ return it != _col_id_suffix_to_index.end();
}
const TabletIndex* TabletSchema::get_ngram_bf_index(int32_t col_unique_id)
const {
- // TODO use more efficient impl
- for (const auto& _index : _indexes) {
- if (_index->index_type() == IndexType::NGRAM_BF) {
- for (int32_t id : _index->col_unique_ids()) {
- if (id == col_unique_id) {
- return _index.get();
- }
- }
- }
+ // Get the ngram bf index for the given column unique id
+ IndexKey index_key(IndexType::NGRAM_BF, col_unique_id, "");
+ auto it = _col_id_suffix_to_index.find(index_key);
+ if (it != _col_id_suffix_to_index.end()) {
+ return _indexes[it->second].get();
}
return nullptr;
}
diff --git a/be/src/olap/tablet_schema.h b/be/src/olap/tablet_schema.h
index cdc91981d24..8d0f3f111de 100644
--- a/be/src/olap/tablet_schema.h
+++ b/be/src/olap/tablet_schema.h
@@ -560,6 +560,23 @@ private:
std::unordered_map<int32_t, int32_t> _field_id_to_index;
std::unordered_map<vectorized::PathInDataRef, int32_t,
vectorized::PathInDataRef::Hash>
_field_path_to_index;
+
+ // index_type/col_unique_id/suffix -> idx in _indexes
+ using IndexKey = std::tuple<IndexType, int32_t, std::string>;
+ struct IndexKeyHash {
+ size_t operator()(const IndexKey& t) const {
+ std::size_t seed = 0;
+ seed = doris::HashUtil::hash((const char*)&std::get<0>(t),
sizeof(std::get<0>(t)),
+ seed);
+ seed = doris::HashUtil::hash((const char*)&std::get<1>(t),
sizeof(std::get<1>(t)),
+ seed);
+ seed = doris::HashUtil::hash((const char*)std::get<2>(t).c_str(),
std::get<2>(t).size(),
+ seed);
+ return seed;
+ }
+ };
+ std::unordered_map<IndexKey, int32_t, IndexKeyHash>
_col_id_suffix_to_index;
+
size_t _num_columns = 0;
size_t _num_variant_columns = 0;
size_t _num_key_columns = 0;
diff --git a/be/test/olap/tablet_schema_index_test.cpp
b/be/test/olap/tablet_schema_index_test.cpp
new file mode 100644
index 00000000000..b35bbdb2a47
--- /dev/null
+++ b/be/test/olap/tablet_schema_index_test.cpp
@@ -0,0 +1,174 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+
+#include "olap/tablet_schema.h"
+
+namespace doris {
+
+class TabletSchemaIndexTest : public testing::Test {
+protected:
+ void SetUp() override {
+ // Setup common test data
+ _tablet_schema = std::make_shared<TabletSchema>();
+ }
+
+ TabletIndex create_test_index(int64_t index_id, IndexType type,
+ const std::vector<int32_t>& col_uids,
+ const std::string& suffix = "") {
+ TabletIndex index;
+ index._index_id = index_id;
+ index._index_type = type;
+ index._col_unique_ids = col_uids;
+ index.set_escaped_escaped_index_suffix_path(suffix);
+ return index;
+ }
+
+ std::shared_ptr<TabletSchema> _tablet_schema;
+};
+
+TEST_F(TabletSchemaIndexTest, TestAddInvertedIndex) {
+ // Add inverted index with suffix
+ TabletIndex index = create_test_index(1, IndexType::INVERTED, {100},
"suffix1");
+ _tablet_schema->append_index(std::move(index));
+
+ // Verify index mapping
+ auto* found_index = _tablet_schema->inverted_index(100, "suffix1");
+ ASSERT_NE(found_index, nullptr);
+ EXPECT_EQ(found_index->index_id(), 1);
+ EXPECT_EQ(found_index->get_index_suffix(), "suffix1");
+}
+
+TEST_F(TabletSchemaIndexTest, TestRemoveIndex) {
+ // Add multiple indexes
+ _tablet_schema->append_index(create_test_index(1, IndexType::INVERTED,
{100}, "suffix1"));
+ _tablet_schema->append_index(create_test_index(2, IndexType::INVERTED,
{200}, "suffix2"));
+
+ // Remove index 1
+ _tablet_schema->remove_index(1);
+
+ // Verify index 1 removed
+ EXPECT_EQ(_tablet_schema->inverted_index(100, "suffix1"), nullptr);
+
+ // Verify index 2 still exists
+ auto* found_index = _tablet_schema->inverted_index(200, "suffix2");
+ ASSERT_NE(found_index, nullptr);
+ EXPECT_EQ(found_index->index_id(), 2);
+}
+
+TEST_F(TabletSchemaIndexTest, TestUpdateIndex) {
+ // Add initial index
+ _tablet_schema->append_index(create_test_index(1, IndexType::INVERTED,
{100}, "old_suffix"));
+ ASSERT_NE(_tablet_schema->inverted_index(100, "old_suffix"), nullptr);
+
+ // Update index with new suffix
+ _tablet_schema->remove_index(1);
+ _tablet_schema->append_index(create_test_index(1, IndexType::INVERTED,
{100}, "new_suffix"));
+
+ // Verify update
+ EXPECT_EQ(_tablet_schema->inverted_index(100, "old_suffix"), nullptr);
+ auto* found_index = _tablet_schema->inverted_index(100, "new_suffix");
+ ASSERT_NE(found_index, nullptr);
+ EXPECT_EQ(found_index->get_index_suffix(), "new%5Fsuffix");
+}
+
+TEST_F(TabletSchemaIndexTest, TestMultipleColumnsIndex) {
+ // Add index with multiple columns
+ TabletIndex index = create_test_index(1, IndexType::INVERTED, {100, 200},
"multi_col");
+ _tablet_schema->append_index(std::move(index));
+
+ // Verify both columns mapped
+ auto* index1 = _tablet_schema->inverted_index(100, "multi_col");
+ auto* index2 = _tablet_schema->inverted_index(200, "multi_col");
+ ASSERT_NE(index1, nullptr);
+ ASSERT_EQ(index1, index2); // Should point to same index
+}
+
+TEST_F(TabletSchemaIndexTest, TestDuplicateIndexKey) {
+ // Add two indexes with same (type,col,suffix)
+ _tablet_schema->append_index(create_test_index(1, IndexType::INVERTED,
{100}, "suffix"));
+ _tablet_schema->append_index(create_test_index(2, IndexType::INVERTED,
{100}, "suffix"));
+
+ // The last added should override
+ auto* found_index = _tablet_schema->inverted_index(100, "suffix");
+ ASSERT_NE(found_index, nullptr);
+ EXPECT_EQ(found_index->index_id(), 1);
+}
+
+TEST_F(TabletSchemaIndexTest, TestClearIndexes) {
+ _tablet_schema->append_index(create_test_index(1, IndexType::INVERTED,
{100}));
+ _tablet_schema->clear_index();
+
+ EXPECT_EQ(_tablet_schema->inverted_index(100, ""), nullptr);
+ EXPECT_TRUE(_tablet_schema->inverted_indexes().empty());
+}
+
+TEST_F(TabletSchemaIndexTest, TestUpdateIndexMethod) {
+ TabletColumn col;
+ col.set_parent_unique_id(100);
+ col.set_path_info(vectorized::PathInData("v2"));
+ _tablet_schema->append_column(col);
+
+ TabletIndex old_index = create_test_index(1, IndexType::INVERTED, {100},
"v2");
+ _tablet_schema->append_index(std::move(old_index));
+
+ TabletIndex new_index = create_test_index(1, IndexType::INVERTED, {100},
"v2");
+ new_index._properties["new_prop"] = "value";
+
+ _tablet_schema->update_index(col, IndexType::INVERTED,
std::move(new_index));
+
+ const TabletIndex* updated_index = _tablet_schema->inverted_index(100,
"v2");
+ ASSERT_NE(updated_index, nullptr);
+ EXPECT_EQ(updated_index->index_id(), 1);
+ EXPECT_EQ(updated_index->properties().at("new_prop"), "value");
+
+ auto key = std::make_tuple(IndexType::INVERTED, 100, "v2");
+ EXPECT_NE(_tablet_schema->_col_id_suffix_to_index.find(key),
+ _tablet_schema->_col_id_suffix_to_index.end());
+}
+
+TEST_F(TabletSchemaIndexTest, TestUpdateIndexAddNewWhenNotExist) {
+ // Not exist, return nullptr
+ TabletColumn col;
+ col.set_unique_id(200);
+
+ TabletIndex new_index = create_test_index(2, IndexType::INVERTED, {200},
"v3");
+ _tablet_schema->update_index(col, IndexType::INVERTED,
std::move(new_index));
+
+ const TabletIndex* index = _tablet_schema->inverted_index(200, "v3");
+ ASSERT_EQ(index, nullptr);
+}
+
+TEST_F(TabletSchemaIndexTest, TestUpdateIndexWithMultipleColumns) {
+ TabletColumn col1, col2;
+ col1.set_unique_id(300);
+ col2.set_unique_id(400);
+ _tablet_schema->append_column(col1);
+ _tablet_schema->append_column(col2);
+
+ TabletIndex old_multi_index = create_test_index(3, IndexType::INVERTED,
{300, 400}, "multi");
+ _tablet_schema->append_index(std::move(old_multi_index));
+
+ TabletIndex new_multi_index = create_test_index(3, IndexType::NGRAM_BF,
{300, 400});
+ _tablet_schema->append_index(std::move(new_multi_index));
+
+ ASSERT_NE(_tablet_schema->inverted_index(300, "multi"), nullptr);
+ EXPECT_NE(_tablet_schema->get_ngram_bf_index(400), nullptr);
+}
+
+} // namespace doris
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]