This is an automated email from the ASF dual-hosted git repository.

airborne pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new 9c672fb1856 [Refactor](inverted index) refactor inverted index 
interface in tablet schema (#43003) (#43280) (#43669)
9c672fb1856 is described below

commit 9c672fb1856369a5d0d742e146ccbf6923a43728
Author: Sun Chenyang <csun5...@gmail.com>
AuthorDate: Tue Nov 12 17:54:44 2024 +0800

    [Refactor](inverted index) refactor inverted index interface in tablet 
schema (#43003) (#43280) (#43669)
    
    pick from master #43003,#43280
---
 be/src/cloud/cloud_meta_mgr.cpp                    |   8 +-
 be/src/cloud/cloud_tablet.cpp                      |  14 ++-
 be/src/cloud/cloud_warm_up_manager.cpp             |  14 ++-
 be/src/olap/compaction.cpp                         |  19 ++--
 be/src/olap/delta_writer.cpp                       |   2 +-
 be/src/olap/rowset/beta_rowset.cpp                 |  41 +++-----
 be/src/olap/rowset/beta_rowset_writer.cpp          |   4 +-
 be/src/olap/rowset/segcompaction.cpp               |   3 +-
 be/src/olap/rowset/segment_v2/column_writer.h      |   2 +-
 be/src/olap/rowset/segment_v2/segment_iterator.cpp |  17 ++--
 be/src/olap/rowset/segment_v2/segment_writer.cpp   |  21 ++--
 .../rowset/segment_v2/vertical_segment_writer.cpp  |  19 ++--
 be/src/olap/schema_change.cpp                      |   9 +-
 be/src/olap/snapshot_manager.cpp                   |   7 +-
 be/src/olap/tablet.cpp                             |  17 +---
 be/src/olap/tablet_schema.cpp                      |  77 ++-------------
 be/src/olap/tablet_schema.h                        |  40 +++++---
 be/src/olap/task/engine_storage_migration_task.cpp |   7 +-
 be/src/olap/task/index_builder.cpp                 |  20 ++--
 be/src/service/backend_service.cpp                 |   9 +-
 be/src/vec/common/schema_util.cpp                  |  25 ++++-
 be/src/vec/common/schema_util.h                    |   3 +
 .../compaction/index_compaction_test.cpp           |   8 +-
 .../index_compaction_with_deleted_term.cpp         |   8 +-
 be/test/olap/tablet_index_test.cpp                 | 108 +++++++++++++++++++++
 25 files changed, 266 insertions(+), 236 deletions(-)

diff --git a/be/src/cloud/cloud_meta_mgr.cpp b/be/src/cloud/cloud_meta_mgr.cpp
index fefdb65e44b..0beeea42bd6 100644
--- a/be/src/cloud/cloud_meta_mgr.cpp
+++ b/be/src/cloud/cloud_meta_mgr.cpp
@@ -1225,12 +1225,8 @@ int64_t CloudMetaMgr::get_inverted_index_file_szie(const 
RowsetMeta& rs_meta) {
     }
     if (rs_meta.tablet_schema()->get_inverted_index_storage_format() ==
         InvertedIndexStorageFormatPB::V1) {
-        auto indices = rs_meta.tablet_schema()->indexes();
+        const auto& indices = rs_meta.tablet_schema()->inverted_indexes();
         for (auto& index : indices) {
-            // only get file_size for inverted index
-            if (index.index_type() != IndexType::INVERTED) {
-                continue;
-            }
             for (int seg_id = 0; seg_id < rs_meta.num_segments(); ++seg_id) {
                 std::string segment_path = 
StorageResource().remote_segment_path(
                         rs_meta.tablet_id(), rs_meta.rowset_id().to_string(), 
seg_id);
@@ -1239,7 +1235,7 @@ int64_t CloudMetaMgr::get_inverted_index_file_szie(const 
RowsetMeta& rs_meta) {
                 std::string inverted_index_file_path =
                         InvertedIndexDescriptor::get_index_file_path_v1(
                                 
InvertedIndexDescriptor::get_index_file_path_prefix(segment_path),
-                                index.index_id(), index.get_index_suffix());
+                                index->index_id(), index->get_index_suffix());
                 auto st = fs->file_size(inverted_index_file_path, &file_size);
                 if (!st.ok()) {
                     file_size = 0;
diff --git a/be/src/cloud/cloud_tablet.cpp b/be/src/cloud/cloud_tablet.cpp
index a8b5fcde662..c7a85fc820d 100644
--- a/be/src/cloud/cloud_tablet.cpp
+++ b/be/src/cloud/cloud_tablet.cpp
@@ -285,15 +285,13 @@ void 
CloudTablet::add_rowsets(std::vector<RowsetSharedPtr> to_add, bool version_
                     auto schema_ptr = rowset_meta->tablet_schema();
                     auto idx_version = 
schema_ptr->get_inverted_index_storage_format();
                     if (idx_version == InvertedIndexStorageFormatPB::V1) {
-                        for (const auto& index : schema_ptr->indexes()) {
-                            if (index.index_type() == IndexType::INVERTED) {
-                                auto idx_path = 
storage_resource.value()->remote_idx_v1_path(
-                                        *rowset_meta, seg_id, index.index_id(),
-                                        index.get_index_suffix());
-                                download_idx_file(idx_path);
-                            }
+                        for (const auto& index : 
schema_ptr->inverted_indexes()) {
+                            auto idx_path = 
storage_resource.value()->remote_idx_v1_path(
+                                    *rowset_meta, seg_id, index->index_id(),
+                                    index->get_index_suffix());
+                            download_idx_file(idx_path);
                         }
-                    } else if (idx_version == 
InvertedIndexStorageFormatPB::V2) {
+                    } else {
                         if (schema_ptr->has_inverted_index()) {
                             auto idx_path = 
storage_resource.value()->remote_idx_v2_path(
                                     *rowset_meta, seg_id);
diff --git a/be/src/cloud/cloud_warm_up_manager.cpp 
b/be/src/cloud/cloud_warm_up_manager.cpp
index 96a25fe8fe5..06d6df11dc4 100644
--- a/be/src/cloud/cloud_warm_up_manager.cpp
+++ b/be/src/cloud/cloud_warm_up_manager.cpp
@@ -147,15 +147,13 @@ void CloudWarmUpManager::handle_jobs() {
                     auto schema_ptr = rs->tablet_schema();
                     auto idx_version = 
schema_ptr->get_inverted_index_storage_format();
                     if (idx_version == InvertedIndexStorageFormatPB::V1) {
-                        for (const auto& index : schema_ptr->indexes()) {
-                            if (index.index_type() == IndexType::INVERTED) {
-                                wait->add_count();
-                                auto idx_path = 
storage_resource.value()->remote_idx_v1_path(
-                                        *rs, seg_id, index.index_id(), 
index.get_index_suffix());
-                                download_idx_file(idx_path);
-                            }
+                        for (const auto& index : 
schema_ptr->inverted_indexes()) {
+                            wait->add_count();
+                            auto idx_path = 
storage_resource.value()->remote_idx_v1_path(
+                                    *rs, seg_id, index->index_id(), 
index->get_index_suffix());
+                            download_idx_file(idx_path);
                         }
-                    } else if (idx_version == 
InvertedIndexStorageFormatPB::V2) {
+                    } else {
                         if (schema_ptr->has_inverted_index()) {
                             wait->add_count();
                             auto idx_path =
diff --git a/be/src/olap/compaction.cpp b/be/src/olap/compaction.cpp
index 7226a986905..6a8c88d99df 100644
--- a/be/src/olap/compaction.cpp
+++ b/be/src/olap/compaction.cpp
@@ -638,7 +638,7 @@ Status Compaction::do_inverted_index_compaction() {
     Status status = Status::OK();
     for (auto&& column_uniq_id : ctx.columns_to_do_index_compaction) {
         auto col = _cur_tablet_schema->column_by_uid(column_uniq_id);
-        const auto* index_meta = _cur_tablet_schema->get_inverted_index(col);
+        const auto* index_meta = _cur_tablet_schema->inverted_index(col);
 
         std::vector<lucene::store::Directory*> 
dest_index_dirs(dest_segment_num);
         try {
@@ -676,15 +676,11 @@ Status Compaction::do_inverted_index_compaction() {
 }
 
 void Compaction::construct_index_compaction_columns(RowsetWriterContext& ctx) {
-    for (const auto& index : _cur_tablet_schema->indexes()) {
-        if (index.index_type() != IndexType::INVERTED) {
-            continue;
-        }
-
-        auto col_unique_ids = index.col_unique_ids();
+    for (const auto& index : _cur_tablet_schema->inverted_indexes()) {
+        auto col_unique_ids = index->col_unique_ids();
         // check if column unique ids is empty to avoid crash
         if (col_unique_ids.empty()) {
-            LOG(WARNING) << "tablet[" << _tablet->tablet_id() << "] index[" << 
index.index_id()
+            LOG(WARNING) << "tablet[" << _tablet->tablet_id() << "] index[" << 
index->index_id()
                          << "] has no column unique id, will skip index 
compaction."
                          << " tablet_schema=" << 
_cur_tablet_schema->dump_full_schema();
             continue;
@@ -699,10 +695,9 @@ void 
Compaction::construct_index_compaction_columns(RowsetWriterContext& ctx) {
         bool is_continue = false;
         std::optional<std::map<std::string, std::string>> first_properties;
         for (const auto& rowset : _input_rowsets) {
-            const auto* tablet_index =
-                    rowset->tablet_schema()->get_inverted_index(col_unique_id, 
"");
+            const auto* tablet_index = 
rowset->tablet_schema()->inverted_index(col_unique_id);
             // no inverted index or index id is different from current index id
-            if (tablet_index == nullptr || tablet_index->index_id() != 
index.index_id()) {
+            if (tablet_index == nullptr || tablet_index->index_id() != 
index->index_id()) {
                 is_continue = true;
                 break;
             }
@@ -735,7 +730,7 @@ void 
Compaction::construct_index_compaction_columns(RowsetWriterContext& ctx) {
                 return false;
             }
 
-            const auto* index_meta = 
rowset->tablet_schema()->get_inverted_index(col_unique_id, "");
+            const auto* index_meta = 
rowset->tablet_schema()->inverted_index(col_unique_id);
             if (index_meta == nullptr) {
                 LOG(WARNING) << "tablet[" << _tablet->tablet_id() << "] 
column_unique_id["
                              << col_unique_id << "] index meta is null, will 
skip index compaction";
diff --git a/be/src/olap/delta_writer.cpp b/be/src/olap/delta_writer.cpp
index 00c622df59f..f42253a7884 100644
--- a/be/src/olap/delta_writer.cpp
+++ b/be/src/olap/delta_writer.cpp
@@ -249,7 +249,7 @@ void DeltaWriter::_request_slave_tablet_pull_rowset(const 
PNodeInfo& node_info)
     auto tablet_schema = cur_rowset->rowset_meta()->tablet_schema();
     if (!tablet_schema->skip_write_index_on_load()) {
         for (auto& column : tablet_schema->columns()) {
-            const TabletIndex* index_meta = 
tablet_schema->get_inverted_index(*column);
+            const TabletIndex* index_meta = 
tablet_schema->inverted_index(*column);
             if (index_meta) {
                 indices_ids.emplace_back(index_meta->index_id(), 
index_meta->get_index_suffix());
             }
diff --git a/be/src/olap/rowset/beta_rowset.cpp 
b/be/src/olap/rowset/beta_rowset.cpp
index 4b51dcc3530..bbb2ca72b4a 100644
--- a/be/src/olap/rowset/beta_rowset.cpp
+++ b/be/src/olap/rowset/beta_rowset.cpp
@@ -81,12 +81,7 @@ Status BetaRowset::get_inverted_index_size(size_t* 
index_size) {
     }
 
     if (_schema->get_inverted_index_storage_format() == 
InvertedIndexStorageFormatPB::V1) {
-        auto indices = _schema->indexes();
-        for (auto& index : indices) {
-            // only get file_size for inverted index
-            if (index.index_type() != IndexType::INVERTED) {
-                continue;
-            }
+        for (const auto& index : _schema->inverted_indexes()) {
             for (int seg_id = 0; seg_id < num_segments(); ++seg_id) {
                 auto seg_path = DORIS_TRY(segment_path(seg_id));
                 int64_t file_size = 0;
@@ -94,7 +89,7 @@ Status BetaRowset::get_inverted_index_size(size_t* 
index_size) {
                 std::string inverted_index_file_path =
                         InvertedIndexDescriptor::get_index_file_path_v1(
                                 
InvertedIndexDescriptor::get_index_file_path_prefix(seg_path),
-                                index.index_id(), index.get_index_suffix());
+                                index->index_id(), index->get_index_suffix());
                 RETURN_IF_ERROR(fs->file_size(inverted_index_file_path, 
&file_size));
                 *index_size += file_size;
             }
@@ -122,7 +117,7 @@ void BetaRowset::clear_inverted_index_cache() {
 
         auto index_path_prefix = 
InvertedIndexDescriptor::get_index_file_path_prefix(*seg_path);
         for (const auto& column : tablet_schema()->columns()) {
-            const TabletIndex* index_meta = 
tablet_schema()->get_inverted_index(*column);
+            const TabletIndex* index_meta = 
tablet_schema()->inverted_index(*column);
             if (index_meta) {
                 auto inverted_index_file_cache_key =
                         InvertedIndexDescriptor::get_index_file_cache_key(
@@ -227,7 +222,7 @@ Status BetaRowset::remove() {
 
         if (_schema->get_inverted_index_storage_format() == 
InvertedIndexStorageFormatPB::V1) {
             for (auto& column : _schema->columns()) {
-                const TabletIndex* index_meta = 
_schema->get_inverted_index(*column);
+                const TabletIndex* index_meta = 
_schema->inverted_index(*column);
                 if (index_meta) {
                     std::string inverted_index_file =
                             InvertedIndexDescriptor::get_index_file_path_v1(
@@ -311,22 +306,19 @@ Status BetaRowset::link_files_to(const std::string& dir, 
RowsetId new_rowset_id,
             return status;
         });
         if (_schema->get_inverted_index_storage_format() == 
InvertedIndexStorageFormatPB::V1) {
-            for (const auto& index : _schema->indexes()) {
-                if (index.index_type() != IndexType::INVERTED) {
-                    continue;
-                }
-                auto index_id = index.index_id();
+            for (const auto& index : _schema->inverted_indexes()) {
+                auto index_id = index->index_id();
                 if (without_index_uids != nullptr && 
without_index_uids->count(index_id)) {
                     continue;
                 }
                 std::string inverted_index_src_file_path =
                         InvertedIndexDescriptor::get_index_file_path_v1(
                                 
InvertedIndexDescriptor::get_index_file_path_prefix(src_path),
-                                index_id, index.get_index_suffix());
+                                index_id, index->get_index_suffix());
                 std::string inverted_index_dst_file_path =
                         InvertedIndexDescriptor::get_index_file_path_v1(
                                 
InvertedIndexDescriptor::get_index_file_path_prefix(dst_path),
-                                index_id, index.get_index_suffix());
+                                index_id, index->get_index_suffix());
                 bool index_file_exists = true;
                 RETURN_IF_ERROR(local_fs->exists(inverted_index_src_file_path, 
&index_file_exists));
                 if (index_file_exists) {
@@ -405,7 +397,7 @@ Status BetaRowset::copy_files_to(const std::string& dir, 
const RowsetId& new_row
         if (_schema->get_inverted_index_storage_format() == 
InvertedIndexStorageFormatPB::V1) {
             for (auto& column : _schema->columns()) {
                 // if (column.has_inverted_index()) {
-                const TabletIndex* index_meta = 
_schema->get_inverted_index(*column);
+                const TabletIndex* index_meta = 
_schema->inverted_index(*column);
                 if (index_meta) {
                     std::string inverted_index_src_file_path =
                             InvertedIndexDescriptor::get_index_file_path_v1(
@@ -464,7 +456,7 @@ Status BetaRowset::upload_to(const StorageResource& 
dest_fs, const RowsetId& new
         if (_schema->get_inverted_index_storage_format() == 
InvertedIndexStorageFormatPB::V1) {
             for (auto& column : _schema->columns()) {
                 // if (column.has_inverted_index()) {
-                const TabletIndex* index_meta = 
_schema->get_inverted_index(*column);
+                const TabletIndex* index_meta = 
_schema->inverted_index(*column);
                 if (index_meta) {
                     std::string remote_inverted_index_file =
                             InvertedIndexDescriptor::get_index_file_path_v1(
@@ -613,14 +605,11 @@ Status BetaRowset::add_to_binlog() {
         linked_success_files.push_back(binlog_file);
 
         if (_schema->get_inverted_index_storage_format() == 
InvertedIndexStorageFormatPB::V1) {
-            for (const auto& index : _schema->indexes()) {
-                if (index.index_type() != IndexType::INVERTED) {
-                    continue;
-                }
-                auto index_id = index.index_id();
+            for (const auto& index : _schema->inverted_indexes()) {
+                auto index_id = index->index_id();
                 auto index_file = 
InvertedIndexDescriptor::get_index_file_path_v1(
                         
InvertedIndexDescriptor::get_index_file_path_prefix(seg_file), index_id,
-                        index.get_index_suffix());
+                        index->get_index_suffix());
                 auto binlog_index_file = (std::filesystem::path(binlog_dir) /
                                           
std::filesystem::path(index_file).filename())
                                                  .string();
@@ -661,7 +650,7 @@ Status BetaRowset::calc_file_crc(uint32_t* crc_value, 
int64_t* file_count) {
         file_paths.emplace_back(seg_path);
         if (_schema->get_inverted_index_storage_format() == 
InvertedIndexStorageFormatPB::V1) {
             for (auto& column : _schema->columns()) {
-                const TabletIndex* index_meta = 
_schema->get_inverted_index(*column);
+                const TabletIndex* index_meta = 
_schema->inverted_index(*column);
                 if (index_meta) {
                     std::string inverted_index_file =
                             InvertedIndexDescriptor::get_index_file_path_v1(
@@ -805,7 +794,7 @@ Status BetaRowset::show_nested_index_file(rapidjson::Value* 
rowset_value,
         } else {
             rapidjson::Value indices(rapidjson::kArrayType);
             for (auto column : _rowset_meta->tablet_schema()->columns()) {
-                const auto* index_meta = 
_rowset_meta->tablet_schema()->get_inverted_index(*column);
+                const auto* index_meta = 
_rowset_meta->tablet_schema()->inverted_index(*column);
                 if (index_meta == nullptr) {
                     continue;
                 }
diff --git a/be/src/olap/rowset/beta_rowset_writer.cpp 
b/be/src/olap/rowset/beta_rowset_writer.cpp
index 9cd634ec5ac..97e3bcb1deb 100644
--- a/be/src/olap/rowset/beta_rowset_writer.cpp
+++ b/be/src/olap/rowset/beta_rowset_writer.cpp
@@ -540,8 +540,8 @@ Status BetaRowsetWriter::_rename_compacted_indices(int64_t 
begin, int64_t end, u
     }
     // rename remaining inverted index files
     for (auto column : _context.tablet_schema->columns()) {
-        if (_context.tablet_schema->has_inverted_index(*column)) {
-            const auto* index_info = 
_context.tablet_schema->get_inverted_index(*column);
+        if (const auto& index_info = 
_context.tablet_schema->inverted_index(*column);
+            index_info != nullptr) {
             auto index_id = index_info->index_id();
             if (_context.tablet_schema->get_inverted_index_storage_format() ==
                 InvertedIndexStorageFormatPB::V1) {
diff --git a/be/src/olap/rowset/segcompaction.cpp 
b/be/src/olap/rowset/segcompaction.cpp
index 8f4184b7175..413e6150336 100644
--- a/be/src/olap/rowset/segcompaction.cpp
+++ b/be/src/olap/rowset/segcompaction.cpp
@@ -152,8 +152,7 @@ Status 
SegcompactionWorker::_delete_original_segments(uint32_t begin, uint32_t e
         }
         // Delete inverted index files
         for (auto&& column : schema->columns()) {
-            if (schema->has_inverted_index(*column)) {
-                const auto* index_info = schema->get_inverted_index(*column);
+            if (const auto* index_info = schema->inverted_index(*column); 
index_info != nullptr) {
                 auto index_id = index_info->index_id();
                 if (schema->get_inverted_index_storage_format() ==
                     InvertedIndexStorageFormatPB::V1) {
diff --git a/be/src/olap/rowset/segment_v2/column_writer.h 
b/be/src/olap/rowset/segment_v2/column_writer.h
index 62f209db5ad..2d66b940a38 100644
--- a/be/src/olap/rowset/segment_v2/column_writer.h
+++ b/be/src/olap/rowset/segment_v2/column_writer.h
@@ -63,7 +63,7 @@ struct ColumnWriterOptions {
     bool need_inverted_index = false;
     uint8_t gram_size;
     uint16_t gram_bf_size;
-    std::vector<const TabletIndex*> indexes;
+    std::vector<const TabletIndex*> indexes; // unused
     const TabletIndex* inverted_index = nullptr;
     InvertedIndexFileWriter* inverted_index_file_writer;
     std::string to_string() const {
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp 
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index f43ccf37e78..000c1d0c300 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -1055,16 +1055,17 @@ Status 
SegmentIterator::_init_inverted_index_iterators() {
         return Status::OK();
     }
     for (auto cid : _schema->column_ids()) {
+        // Use segment’s own index_meta, for compatibility with future 
indexing needs to default to lowercase.
         if (_inverted_index_iterators[cid] == nullptr) {
-            // Not check type valid, since we need to get inverted index for 
related variant type when reading the segment.
-            // If check type valid, we can not get inverted index for variant 
type, and result nullptr.The result for calling
-            // get_inverted_index with variant suffix should return 
corresponding inverted index meta.
-            bool check_inverted_index_by_type = false;
-            // Use segment’s own index_meta, for compatibility with future 
indexing needs to default to lowercase.
+            // In the _opts.tablet_schema, the sub-column type information for 
the variant is FieldType::OLAP_FIELD_TYPE_VARIANT.
+            // This is because the sub-column is created in 
create_materialized_variant_column.
+            // We use this column to locate the metadata for the inverted 
index, which requires a unique_id and path.
+            const auto& column = _opts.tablet_schema->column(cid);
+            int32_t col_unique_id =
+                    column.is_extracted_column() ? column.parent_unique_id() : 
column.unique_id();
             RETURN_IF_ERROR(_segment->new_inverted_index_iterator(
-                    _opts.tablet_schema->column(cid),
-                    
_segment->_tablet_schema->get_inverted_index(_opts.tablet_schema->column(cid),
-                                                                 
check_inverted_index_by_type),
+                    column,
+                    _segment->_tablet_schema->inverted_index(col_unique_id, 
column.suffix_path()),
                     _opts, &_inverted_index_iterators[cid]));
         }
     }
diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp 
b/be/src/olap/rowset/segment_v2/segment_writer.cpp
index c532969baa4..5c702df3334 100644
--- a/be/src/olap/rowset/segment_v2/segment_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp
@@ -218,22 +218,21 @@ Status SegmentWriter::_create_column_writer(uint32_t cid, 
const TabletColumn& co
     if (_opts.write_type == DataWriteType::TYPE_DIRECT && 
schema->skip_write_index_on_load()) {
         skip_inverted_index = true;
     }
-    // indexes for this column
-    opts.indexes = schema->get_indexes_for_column(column);
+
     if (!InvertedIndexColumnWriter::check_support_inverted_index(column)) {
         opts.need_zone_map = false;
         opts.need_bloom_filter = false;
         opts.need_bitmap_index = false;
     }
-    opts.inverted_index_file_writer = _inverted_index_file_writer;
-    for (const auto* index : opts.indexes) {
-        if (!skip_inverted_index && index->index_type() == 
IndexType::INVERTED) {
-            opts.inverted_index = index;
-            opts.need_inverted_index = true;
-            DCHECK(_inverted_index_file_writer != nullptr);
-            // TODO support multiple inverted index
-            break;
-        }
+
+    // indexes for this column
+    if (const auto& index = schema->inverted_index(column);
+        index != nullptr && !skip_inverted_index) {
+        opts.inverted_index = index;
+        opts.need_inverted_index = true;
+        DCHECK(_inverted_index_file_writer != nullptr);
+        opts.inverted_index_file_writer = _inverted_index_file_writer;
+        // TODO support multiple inverted index
     }
 #define CHECK_FIELD_TYPE(TYPE, type_name)                                      
                \
     if (column.type() == FieldType::OLAP_FIELD_TYPE_##TYPE) {                  
                \
diff --git a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp 
b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp
index 2cea4c86c09..a0fcd58bfb7 100644
--- a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp
@@ -210,23 +210,20 @@ Status 
VerticalSegmentWriter::_create_column_writer(uint32_t cid, const TabletCo
         tablet_schema->skip_write_index_on_load()) {
         skip_inverted_index = true;
     }
-    // indexes for this column
-    opts.indexes = tablet_schema->get_indexes_for_column(column);
+
     if (!InvertedIndexColumnWriter::check_support_inverted_index(column)) {
         opts.need_zone_map = false;
         opts.need_bloom_filter = false;
         opts.need_bitmap_index = false;
     }
-    for (const auto* index : opts.indexes) {
-        if (!skip_inverted_index && index->index_type() == 
IndexType::INVERTED) {
-            opts.inverted_index = index;
-            opts.need_inverted_index = true;
-            DCHECK(_inverted_index_file_writer != nullptr);
-            // TODO support multiple inverted index
-            break;
-        }
+    if (const auto& index = tablet_schema->inverted_index(column);
+        index != nullptr && !skip_inverted_index) {
+        opts.inverted_index = index;
+        opts.need_inverted_index = true;
+        DCHECK(_inverted_index_file_writer != nullptr);
+        opts.inverted_index_file_writer = _inverted_index_file_writer;
+        // TODO support multiple inverted index
     }
-    opts.inverted_index_file_writer = _inverted_index_file_writer;
 
 #define CHECK_FIELD_TYPE(TYPE, type_name)                                      
                \
     if (column.type() == FieldType::OLAP_FIELD_TYPE_##TYPE) {                  
                \
diff --git a/be/src/olap/schema_change.cpp b/be/src/olap/schema_change.cpp
index 2125b508d24..1c06f3a405a 100644
--- a/be/src/olap/schema_change.cpp
+++ b/be/src/olap/schema_change.cpp
@@ -81,6 +81,7 @@
 #include "vec/columns/column.h"
 #include "vec/columns/column_nullable.h"
 #include "vec/common/assert_cast.h"
+#include "vec/common/schema_util.h"
 #include "vec/core/block.h"
 #include "vec/core/column_with_type_and_name.h"
 #include "vec/exprs/vexpr.h"
@@ -1367,13 +1368,9 @@ Status SchemaChangeJob::parse_request(const 
SchemaChangeParams& sc_params,
             *sc_directly = true;
             return Status::OK();
         } else if (column_mapping->ref_column_idx >= 0) {
-            const auto& column_new = new_tablet_schema->column(i);
-            const auto& column_old = 
base_tablet_schema->column(column_mapping->ref_column_idx);
             // index changed
-            if (column_new.is_bf_column() != column_old.is_bf_column() ||
-                column_new.has_bitmap_index() != column_old.has_bitmap_index() 
||
-                new_tablet_schema->has_inverted_index(column_new) !=
-                        base_tablet_schema->has_inverted_index(column_old)) {
+            if (vectorized::schema_util::has_schema_index_diff(
+                        new_tablet_schema, base_tablet_schema, i, 
column_mapping->ref_column_idx)) {
                 *sc_directly = true;
                 return Status::OK();
             }
diff --git a/be/src/olap/snapshot_manager.cpp b/be/src/olap/snapshot_manager.cpp
index 2cfa9a8e8b7..67205835b53 100644
--- a/be/src/olap/snapshot_manager.cpp
+++ b/be/src/olap/snapshot_manager.cpp
@@ -698,11 +698,8 @@ Status SnapshotManager::_create_snapshot_files(const 
TabletSharedPtr& ref_tablet
 
                 if (tablet_schema.get_inverted_index_storage_format() ==
                     InvertedIndexStorageFormatPB::V1) {
-                    for (const auto& index : tablet_schema.indexes()) {
-                        if (index.index_type() != IndexType::INVERTED) {
-                            continue;
-                        }
-                        auto index_id = index.index_id();
+                    for (const auto& index : tablet_schema.inverted_indexes()) 
{
+                        auto index_id = index->index_id();
                         auto index_file = 
ref_tablet->get_segment_index_filepath(
                                 rowset_id, segment_index, index_id);
                         auto snapshot_segment_index_file_path =
diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp
index ed9ffaa0664..f97f153a860 100644
--- a/be/src/olap/tablet.cpp
+++ b/be/src/olap/tablet.cpp
@@ -1234,7 +1234,7 @@ std::vector<RowsetSharedPtr> 
Tablet::pick_candidate_rowsets_to_build_inverted_in
         std::shared_lock rlock(_meta_lock);
         auto has_alter_inverted_index = [&](RowsetSharedPtr rowset) -> bool {
             for (const auto& index_id : alter_index_uids) {
-                if 
(rowset->tablet_schema()->has_inverted_index_with_index_id(index_id, "")) {
+                if 
(rowset->tablet_schema()->has_inverted_index_with_index_id(index_id)) {
                     return true;
                 }
             }
@@ -2611,12 +2611,9 @@ void Tablet::gc_binlogs(int64_t version) {
         // add binlog segment files and index files
         for (int64_t i = 0; i < num_segments; ++i) {
             
wait_for_deleted_binlog_files.emplace_back(get_segment_filepath(rowset_id, i));
-            for (const auto& index : this->tablet_schema()->indexes()) {
-                if (index.index_type() != IndexType::INVERTED) {
-                    continue;
-                }
+            for (const auto& index : 
this->tablet_schema()->inverted_indexes()) {
                 wait_for_deleted_binlog_files.emplace_back(
-                        get_segment_index_filepath(rowset_id, i, 
index.index_id()));
+                        get_segment_index_filepath(rowset_id, i, 
index->index_id()));
             }
         }
     };
@@ -2765,12 +2762,8 @@ int64_t Tablet::get_inverted_index_file_szie(const 
RowsetMetaSharedPtr& rs_meta)
 
     if (rs_meta->tablet_schema()->get_inverted_index_storage_format() ==
         InvertedIndexStorageFormatPB::V1) {
-        auto indices = rs_meta->tablet_schema()->indexes();
+        const auto& indices = rs_meta->tablet_schema()->inverted_indexes();
         for (auto& index : indices) {
-            // only get file_size for inverted index
-            if (index.index_type() != IndexType::INVERTED) {
-                continue;
-            }
             for (int seg_id = 0; seg_id < rs_meta->num_segments(); ++seg_id) {
                 std::string segment_path = get_segment_path(rs_meta, seg_id);
                 int64_t file_size = 0;
@@ -2778,7 +2771,7 @@ int64_t Tablet::get_inverted_index_file_szie(const 
RowsetMetaSharedPtr& rs_meta)
                 std::string inverted_index_file_path =
                         InvertedIndexDescriptor::get_index_file_path_v1(
                                 
InvertedIndexDescriptor::get_index_file_path_prefix(segment_path),
-                                index.index_id(), index.get_index_suffix());
+                                index->index_id(), index->get_index_suffix());
                 auto st = fs->file_size(inverted_index_file_path, &file_size);
                 if (!st.ok()) {
                     file_size = 0;
diff --git a/be/src/olap/tablet_schema.cpp b/be/src/olap/tablet_schema.cpp
index ef48c06aa3f..b5479d34016 100644
--- a/be/src/olap/tablet_schema.cpp
+++ b/be/src/olap/tablet_schema.cpp
@@ -905,14 +905,13 @@ void TabletColumn::append_sparse_column(TabletColumn 
column) {
     _num_sparse_columns++;
 }
 
-void TabletSchema::append_index(TabletIndex index) {
+void TabletSchema::append_index(TabletIndex&& index) {
     _indexes.push_back(std::move(index));
 }
 
 void TabletSchema::update_index(const TabletColumn& col, TabletIndex index) {
     int32_t col_unique_id = col.unique_id();
-    const std::string& suffix_path =
-            col.has_path_info() ? 
escape_for_path_name(col.path_info_ptr()->get_path()) : "";
+    const std::string& suffix_path = escape_for_path_name(col.suffix_path());
     for (size_t i = 0; i < _indexes.size(); i++) {
         for (int32_t id : _indexes[i].col_unique_ids()) {
             if (id == col_unique_id && _indexes[i].get_index_suffix() == 
suffix_path) {
@@ -1352,28 +1351,6 @@ Result<const TabletColumn*> TabletSchema::column(const 
std::string& field_name)
     return _cols[it->second].get();
 }
 
-std::vector<const TabletIndex*> TabletSchema::get_indexes_for_column(
-        const TabletColumn& col) const {
-    std::vector<const TabletIndex*> indexes_for_column;
-    // Some columns (Float, Double, JSONB ...) from the variant do not support 
index, but they are listed in TabltetIndex.
-    if 
(!segment_v2::InvertedIndexColumnWriter::check_support_inverted_index(col)) {
-        return indexes_for_column;
-    }
-    int32_t col_unique_id = col.is_extracted_column() ? col.parent_unique_id() 
: col.unique_id();
-    const std::string& suffix_path =
-            col.has_path_info() ? 
escape_for_path_name(col.path_info_ptr()->get_path()) : "";
-    // TODO use more efficient impl
-    for (size_t i = 0; i < _indexes.size(); i++) {
-        for (int32_t id : _indexes[i].col_unique_ids()) {
-            if (id == col_unique_id && _indexes[i].get_index_suffix() == 
suffix_path) {
-                indexes_for_column.push_back(&(_indexes[i]));
-            }
-        }
-    }
-
-    return indexes_for_column;
-}
-
 void TabletSchema::update_tablet_columns(const TabletSchema& tablet_schema,
                                          const std::vector<TColumn>& 
t_columns) {
     copy_from(tablet_schema);
@@ -1385,49 +1362,17 @@ void TabletSchema::update_tablet_columns(const 
TabletSchema& tablet_schema,
     }
 }
 
-bool TabletSchema::has_inverted_index(const TabletColumn& col) const {
-    // TODO use more efficient impl
-    int32_t col_unique_id = col.is_extracted_column() ? col.parent_unique_id() 
: col.unique_id();
-    const std::string& suffix_path =
-            col.has_path_info() ? 
escape_for_path_name(col.path_info_ptr()->get_path()) : "";
+bool TabletSchema::has_inverted_index_with_index_id(int64_t index_id) const {
     for (size_t i = 0; i < _indexes.size(); i++) {
-        if (_indexes[i].index_type() == IndexType::INVERTED) {
-            for (int32_t id : _indexes[i].col_unique_ids()) {
-                if (id == col_unique_id && _indexes[i].get_index_suffix() == 
suffix_path) {
-                    return true;
-                }
-            }
-        }
-    }
-
-    return false;
-}
-
-bool TabletSchema::has_inverted_index_with_index_id(int64_t index_id,
-                                                    const std::string& 
suffix_name) const {
-    for (size_t i = 0; i < _indexes.size(); i++) {
-        if (_indexes[i].index_type() == IndexType::INVERTED &&
-            _indexes[i].get_index_suffix() == suffix_name && 
_indexes[i].index_id() == index_id) {
+        if (_indexes[i].index_type() == IndexType::INVERTED && 
_indexes[i].index_id() == index_id) {
             return true;
         }
     }
     return false;
 }
 
-const TabletIndex* TabletSchema::get_inverted_index_with_index_id(
-        int64_t index_id, const std::string& suffix_name) const {
-    for (size_t i = 0; i < _indexes.size(); i++) {
-        if (_indexes[i].index_type() == IndexType::INVERTED &&
-            _indexes[i].get_index_suffix() == suffix_name && 
_indexes[i].index_id() == index_id) {
-            return &(_indexes[i]);
-        }
-    }
-
-    return nullptr;
-}
-
-const TabletIndex* TabletSchema::get_inverted_index(int32_t col_unique_id,
-                                                    const std::string& 
suffix_path) const {
+const TabletIndex* TabletSchema::inverted_index(int32_t col_unique_id,
+                                                const std::string& 
suffix_path) const {
     for (size_t i = 0; i < _indexes.size(); i++) {
         if (_indexes[i].index_type() == IndexType::INVERTED) {
             for (int32_t id : _indexes[i].col_unique_ids()) {
@@ -1441,19 +1386,15 @@ const TabletIndex* 
TabletSchema::get_inverted_index(int32_t col_unique_id,
     return nullptr;
 }
 
-const TabletIndex* TabletSchema::get_inverted_index(const TabletColumn& col,
-                                                    bool check_valid) const {
-    // With check_valid set to true by default
+const TabletIndex* TabletSchema::inverted_index(const TabletColumn& col) const 
{
     // Some columns(Float, Double, JSONB ...) from the variant do not support 
inverted index
-    if (check_valid && 
!segment_v2::InvertedIndexColumnWriter::check_support_inverted_index(col)) {
+    if 
(!segment_v2::InvertedIndexColumnWriter::check_support_inverted_index(col)) {
         return nullptr;
     }
     // TODO use more efficient impl
     // Use parent id if unique not assigned, this could happend when accessing 
subcolumns of variants
     int32_t col_unique_id = col.is_extracted_column() ? col.parent_unique_id() 
: col.unique_id();
-    const std::string& suffix_path =
-            col.has_path_info() ? 
escape_for_path_name(col.path_info_ptr()->get_path()) : "";
-    return get_inverted_index(col_unique_id, suffix_path);
+    return inverted_index(col_unique_id, 
escape_for_path_name(col.suffix_path()));
 }
 
 bool TabletSchema::has_ngram_bf_index(int32_t col_unique_id) const {
diff --git a/be/src/olap/tablet_schema.h b/be/src/olap/tablet_schema.h
index e2f90e2716f..f3ff0d694b4 100644
--- a/be/src/olap/tablet_schema.h
+++ b/be/src/olap/tablet_schema.h
@@ -163,6 +163,9 @@ public:
     bool is_extracted_column() const {
         return _column_path != nullptr && !_column_path->empty() && 
_parent_col_unique_id > 0;
     };
+    std::string suffix_path() const {
+        return is_extracted_column() ? _column_path->get_path() : "";
+    }
     bool is_nested_subcolumn() const {
         return _column_path != nullptr && _column_path->has_nested_part();
     }
@@ -223,13 +226,16 @@ private:
 
     bool _has_bitmap_index = false;
     bool _visible = true;
-    int32_t _parent_col_unique_id = -1;
+
     std::vector<TabletColumnPtr> _sub_columns;
     uint32_t _sub_column_count = 0;
 
     bool _result_is_nullable = false;
     int _be_exec_version = -1;
-    vectorized::PathInDataPtr _column_path;
+
+    // The extracted sub-columns from "variant" contain the following 
information:
+    int32_t _parent_col_unique_id = -1;     // "variant" -> col_unique_id
+    vectorized::PathInDataPtr _column_path; // the path of the sub-columns 
themselves
 
     // Record information about columns merged into a sparse column within a 
variant
     // `{"id": 100, "name" : "jack", "point" : 3.9}`
@@ -315,7 +321,7 @@ public:
     }
     void to_schema_pb(TabletSchemaPB* tablet_meta_pb) const;
     void append_column(TabletColumn column, ColumnType col_type = 
ColumnType::NORMAL);
-    void append_index(TabletIndex index);
+    void append_index(TabletIndex&& index);
     void update_index(const TabletColumn& column, TabletIndex index);
     void remove_index(int64_t index_id);
     void clear_index();
@@ -385,7 +391,15 @@ public:
     void set_row_store_page_size(long page_size) { _row_store_page_size = 
page_size; }
     long row_store_page_size() const { return _row_store_page_size; }
 
-    const std::vector<TabletIndex>& indexes() const { return _indexes; }
+    const std::vector<const TabletIndex*> inverted_indexes() const {
+        std::vector<const TabletIndex*> inverted_indexes;
+        for (const auto& index : _indexes) {
+            if (index.index_type() == IndexType::INVERTED) {
+                inverted_indexes.emplace_back(&index);
+            }
+        }
+        return inverted_indexes;
+    }
     bool has_inverted_index() const {
         for (const auto& index : _indexes) {
             if (index.index_type() == IndexType::INVERTED) {
@@ -394,17 +408,15 @@ public:
         }
         return false;
     }
-    std::vector<const TabletIndex*> get_indexes_for_column(const TabletColumn& 
col) const;
-    bool has_inverted_index(const TabletColumn& col) const;
-    bool has_inverted_index_with_index_id(int64_t index_id, const std::string& 
suffix_path) const;
-    const TabletIndex* get_inverted_index_with_index_id(int64_t index_id,
-                                                        const std::string& 
suffix_name) const;
-    // check_valid: check if this column supports inverted index
+    bool has_inverted_index_with_index_id(int64_t index_id) const;
+    // Check whether this column supports inverted index
     // Some columns (Float, Double, JSONB ...) from the variant do not support 
index, but they are listed in TabletIndex.
-    // If returned, the index file will not be found.
-    const TabletIndex* get_inverted_index(const TabletColumn& col, bool 
check_valid = true) const;
-    const TabletIndex* get_inverted_index(int32_t col_unique_id,
-                                          const std::string& suffix_path) 
const;
+    const TabletIndex* inverted_index(const TabletColumn& col) const;
+
+    // Regardless of whether this column supports inverted index
+    // TabletIndex information will be returned as long as it exists.
+    const TabletIndex* inverted_index(int32_t col_unique_id,
+                                      const std::string& suffix_path = "") 
const;
     bool has_ngram_bf_index(int32_t col_unique_id) const;
     const TabletIndex* get_ngram_bf_index(int32_t col_unique_id) const;
     void update_indexes_from_thrift(const std::vector<doris::TOlapTableIndex>& 
indexes);
diff --git a/be/src/olap/task/engine_storage_migration_task.cpp 
b/be/src/olap/task/engine_storage_migration_task.cpp
index 21be34a334d..a300e6e0f09 100644
--- a/be/src/olap/task/engine_storage_migration_task.cpp
+++ b/be/src/olap/task/engine_storage_migration_task.cpp
@@ -407,11 +407,8 @@ Status 
EngineStorageMigrationTask::_copy_index_and_data_files(
 
             if (tablet_schema.get_inverted_index_storage_format() ==
                 InvertedIndexStorageFormatPB::V1) {
-                for (const auto& index : tablet_schema.indexes()) {
-                    if (index.index_type() != IndexType::INVERTED) {
-                        continue;
-                    }
-                    auto index_id = index.index_id();
+                for (const auto& index : tablet_schema.inverted_indexes()) {
+                    auto index_id = index->index_id();
                     auto index_file =
                             _tablet->get_segment_index_filepath(rowset_id, 
segment_index, index_id);
                     auto snapshot_segment_index_file_path =
diff --git a/be/src/olap/task/index_builder.cpp 
b/be/src/olap/task/index_builder.cpp
index 69ab95770f7..975920a437e 100644
--- a/be/src/olap/task/index_builder.cpp
+++ b/be/src/olap/task/index_builder.cpp
@@ -106,7 +106,7 @@ Status IndexBuilder::update_inverted_index_info() {
                     }
                 }
                 auto column = output_rs_tablet_schema->column(column_idx);
-                const auto* index_meta = 
output_rs_tablet_schema->get_inverted_index(column);
+                const auto* index_meta = 
output_rs_tablet_schema->inverted_index(column);
                 if (index_meta == nullptr) {
                     LOG(ERROR) << "failed to find column: " << column_name
                                << " index_id: " << t_inverted_index.index_id;
@@ -142,12 +142,7 @@ Status IndexBuilder::update_inverted_index_info() {
                     return Status::Error<ErrorCode::INTERNAL_ERROR>(
                             "indexes count cannot be negative");
                 }
-                int32_t indexes_size = 0;
-                for (auto index : output_rs_tablet_schema->indexes()) {
-                    if (index.index_type() == IndexType::INVERTED) {
-                        indexes_size++;
-                    }
-                }
+                int32_t indexes_size = 
output_rs_tablet_schema->inverted_indexes().size();
                 if (indexes_count != indexes_size) {
                     return Status::Error<ErrorCode::INTERNAL_ERROR>(
                             "indexes count not equal to expected");
@@ -165,11 +160,11 @@ Status IndexBuilder::update_inverted_index_info() {
                     LOG(WARNING) << "referenced column was missing. "
                                  << "[column=" << t_inverted_index.columns[0]
                                  << " referenced_column=" << column_uid << "]";
-                    output_rs_tablet_schema->append_index(index);
+                    output_rs_tablet_schema->append_index(std::move(index));
                     continue;
                 }
                 const TabletColumn& col = 
output_rs_tablet_schema->column_by_uid(column_uid);
-                const TabletIndex* exist_index = 
output_rs_tablet_schema->get_inverted_index(col);
+                const TabletIndex* exist_index = 
output_rs_tablet_schema->inverted_index(col);
                 if (exist_index && exist_index->index_id() != 
index.index_id()) {
                     LOG(WARNING) << fmt::format(
                             "column: {} has a exist inverted index, but the 
index id not equal "
@@ -179,7 +174,7 @@ Status IndexBuilder::update_inverted_index_info() {
                     without_index_uids.insert(exist_index->index_id());
                     
output_rs_tablet_schema->remove_index(exist_index->index_id());
                 }
-                output_rs_tablet_schema->append_index(index);
+                output_rs_tablet_schema->append_index(std::move(index));
             }
         }
         // construct input rowset reader
@@ -399,14 +394,15 @@ Status 
IndexBuilder::handle_single_rowset(RowsetMetaSharedPtr output_rowset_meta
                     }
                 }
                 auto column = output_rowset_schema->column(column_idx);
+                // variant column is not support for building index
                 if 
(!InvertedIndexColumnWriter::check_support_inverted_index(column)) {
                     continue;
                 }
-                
DCHECK(output_rowset_schema->has_inverted_index_with_index_id(index_id, ""));
+                
DCHECK(output_rowset_schema->has_inverted_index_with_index_id(index_id));
                 _olap_data_convertor->add_column_data_convertor(column);
                 return_columns.emplace_back(column_idx);
                 std::unique_ptr<Field> field(FieldFactory::create(column));
-                const auto* index_meta = 
output_rowset_schema->get_inverted_index(column);
+                const auto* index_meta = 
output_rowset_schema->inverted_index(column);
                 std::unique_ptr<segment_v2::InvertedIndexColumnWriter> 
inverted_index_builder;
                 try {
                     
RETURN_IF_ERROR(segment_v2::InvertedIndexColumnWriter::create(
diff --git a/be/src/service/backend_service.cpp 
b/be/src/service/backend_service.cpp
index abdef513296..d74a9cd2e0b 100644
--- a/be/src/service/backend_service.cpp
+++ b/be/src/service/backend_service.cpp
@@ -353,11 +353,8 @@ void _ingest_binlog(StorageEngine& engine, 
IngestBinlogArg* arg) {
     std::vector<std::string> segment_index_file_names;
     auto tablet_schema = rowset_meta->tablet_schema();
     if (tablet_schema->get_inverted_index_storage_format() == 
InvertedIndexStorageFormatPB::V1) {
-        for (const auto& index : tablet_schema->indexes()) {
-            if (index.index_type() != IndexType::INVERTED) {
-                continue;
-            }
-            auto index_id = index.index_id();
+        for (const auto& index : tablet_schema->inverted_indexes()) {
+            auto index_id = index->index_id();
             for (int64_t segment_index = 0; segment_index < num_segments; 
++segment_index) {
                 auto get_segment_index_file_size_url = fmt::format(
                         
"{}?method={}&tablet_id={}&rowset_id={}&segment_index={}&segment_index_id={"
@@ -379,7 +376,7 @@ void _ingest_binlog(StorageEngine& engine, IngestBinlogArg* 
arg) {
                                            
rowset_meta->rowset_id().to_string(), segment_index);
                 
segment_index_file_names.push_back(InvertedIndexDescriptor::get_index_file_path_v1(
                         
InvertedIndexDescriptor::get_index_file_path_prefix(segment_path), index_id,
-                        index.get_index_suffix()));
+                        index->get_index_suffix()));
 
                 status = HttpClient::execute_with_retry(max_retry, 1,
                                                         
get_segment_index_file_size_cb);
diff --git a/be/src/vec/common/schema_util.cpp 
b/be/src/vec/common/schema_util.cpp
index adaee6e9fe6..9692c02fda3 100644
--- a/be/src/vec/common/schema_util.cpp
+++ b/be/src/vec/common/schema_util.cpp
@@ -360,6 +360,7 @@ void update_least_sparse_column(const 
std::vector<TabletSchemaSPtr>& schemas,
 
 void inherit_column_attributes(const TabletColumn& source, TabletColumn& 
target,
                                TabletSchemaSPtr& target_schema) {
+    DCHECK(target.is_extracted_column());
     if (target.type() != FieldType::OLAP_FIELD_TYPE_TINYINT &&
         target.type() != FieldType::OLAP_FIELD_TYPE_ARRAY &&
         target.type() != FieldType::OLAP_FIELD_TYPE_DOUBLE &&
@@ -368,18 +369,18 @@ void inherit_column_attributes(const TabletColumn& 
source, TabletColumn& target,
         target.set_is_bf_column(source.is_bf_column());
     }
     target.set_aggregation_method(source.aggregation());
-    const auto* source_index_meta = 
target_schema->get_inverted_index(source.unique_id(), "");
+    const auto* source_index_meta = 
target_schema->inverted_index(source.unique_id());
     if (source_index_meta != nullptr) {
         // add index meta
         TabletIndex index_info = *source_index_meta;
         
index_info.set_escaped_escaped_index_suffix_path(target.path_info_ptr()->get_path());
-        // get_inverted_index: No need to check, just inherit directly
-        const auto* target_index_meta = 
target_schema->get_inverted_index(target, false);
+        const auto* target_index_meta = target_schema->inverted_index(
+                target.parent_unique_id(), target.path_info_ptr()->get_path());
         if (target_index_meta != nullptr) {
             // already exist
             target_schema->update_index(target, index_info);
         } else {
-            target_schema->append_index(index_info);
+            target_schema->append_index(std::move(index_info));
         }
     }
 }
@@ -591,4 +592,20 @@ Status extract(ColumnPtr source, const PathInData& path, 
MutableColumnPtr& dst)
     return Status::OK();
 }
 
+bool has_schema_index_diff(const TabletSchema* new_schema, const TabletSchema* 
old_schema,
+                           int32_t new_col_idx, int32_t old_col_idx) {
+    const auto& column_new = new_schema->column(new_col_idx);
+    const auto& column_old = old_schema->column(old_col_idx);
+
+    if (column_new.is_bf_column() != column_old.is_bf_column() ||
+        column_new.has_bitmap_index() != column_old.has_bitmap_index()) {
+        return true;
+    }
+
+    bool new_schema_has_inverted_index = 
new_schema->inverted_index(column_new);
+    bool old_schema_has_inverted_index = 
old_schema->inverted_index(column_old);
+
+    return new_schema_has_inverted_index != old_schema_has_inverted_index;
+}
+
 } // namespace doris::vectorized::schema_util
diff --git a/be/src/vec/common/schema_util.h b/be/src/vec/common/schema_util.h
index 080e6331dc1..8ceb97a9156 100644
--- a/be/src/vec/common/schema_util.h
+++ b/be/src/vec/common/schema_util.h
@@ -121,4 +121,7 @@ Status extract(ColumnPtr source, const PathInData& path, 
MutableColumnPtr& dst);
 
 std::string dump_column(DataTypePtr type, const ColumnPtr& col);
 
+bool has_schema_index_diff(const TabletSchema* new_schema, const TabletSchema* 
old_schema,
+                           int32_t new_col_idx, int32_t old_col_idx);
+
 } // namespace  doris::vectorized::schema_util
diff --git 
a/be/test/olap/rowset/segment_v2/inverted_index/compaction/index_compaction_test.cpp
 
b/be/test/olap/rowset/segment_v2/inverted_index/compaction/index_compaction_test.cpp
index 922a77fcaa4..5e3370847e9 100644
--- 
a/be/test/olap/rowset/segment_v2/inverted_index/compaction/index_compaction_test.cpp
+++ 
b/be/test/olap/rowset/segment_v2/inverted_index/compaction/index_compaction_test.cpp
@@ -406,7 +406,7 @@ TEST_F(IndexCompactionTest, write_index_test) {
 
         // read col key
         const auto& key = _tablet_schema->column_by_uid(0);
-        const auto* key_index = _tablet_schema->get_inverted_index(key);
+        const auto* key_index = _tablet_schema->inverted_index(key);
         EXPECT_TRUE(key_index != nullptr);
         std::vector<int> query_data {99, 66, 56, 87, 85, 96, 20000};
         std::vector<int> query_result {21, 25, 22, 18, 14, 18, 0};
@@ -414,7 +414,7 @@ TEST_F(IndexCompactionTest, write_index_test) {
 
         // read col v3
         const auto& v3_column = _tablet_schema->column_by_uid(3);
-        const auto* v3_index = _tablet_schema->get_inverted_index(v3_column);
+        const auto* v3_index = _tablet_schema->inverted_index(v3_column);
         EXPECT_TRUE(v3_index != nullptr);
         std::vector<int> query_data3 {99, 66, 56, 87, 85, 96, 10000};
         std::vector<int> query_result3 {12, 20, 25, 23, 16, 24, 0};
@@ -422,7 +422,7 @@ TEST_F(IndexCompactionTest, write_index_test) {
 
         // read col v1
         const auto& v1_column = _tablet_schema->column_by_uid(1);
-        const auto* v1_index = _tablet_schema->get_inverted_index(v1_column);
+        const auto* v1_index = _tablet_schema->inverted_index(v1_column);
         EXPECT_TRUE(v1_index != nullptr);
         std::vector<std::string> query_data1 {"good", "maybe", "great", 
"null"};
         std::vector<int> query_result1 {197, 191, 194, 0};
@@ -431,7 +431,7 @@ TEST_F(IndexCompactionTest, write_index_test) {
 
         // read col v2
         const auto& v2_column = _tablet_schema->column_by_uid(2);
-        const auto* v2_index = _tablet_schema->get_inverted_index(v2_column);
+        const auto* v2_index = _tablet_schema->inverted_index(v2_column);
         EXPECT_TRUE(v2_index != nullptr);
         std::vector<std::string> query_data2 {"musicstream.com", "http", 
"https", "null"};
         std::vector<int> query_result2 {191, 799, 1201, 0};
diff --git 
a/be/test/olap/rowset/segment_v2/inverted_index/compaction/index_compaction_with_deleted_term.cpp
 
b/be/test/olap/rowset/segment_v2/inverted_index/compaction/index_compaction_with_deleted_term.cpp
index 4c77b71d66d..321d43fa872 100644
--- 
a/be/test/olap/rowset/segment_v2/inverted_index/compaction/index_compaction_with_deleted_term.cpp
+++ 
b/be/test/olap/rowset/segment_v2/inverted_index/compaction/index_compaction_with_deleted_term.cpp
@@ -498,7 +498,7 @@ protected:
 
         // read col key
         const auto& key = _tablet_schema->column_by_uid(0);
-        const auto* key_index = _tablet_schema->get_inverted_index(key);
+        const auto* key_index = _tablet_schema->inverted_index(key);
         EXPECT_TRUE(key_index != nullptr);
         std::vector<int> query_data {99, 66, 56, 87, 85, 96, 20000};
         std::vector<int> query_result {19, 21, 21, 16, 14, 18, 0};
@@ -506,7 +506,7 @@ protected:
 
         // read col v3
         const auto& v3_column = _tablet_schema->column_by_uid(3);
-        const auto* v3_index = _tablet_schema->get_inverted_index(v3_column);
+        const auto* v3_index = _tablet_schema->inverted_index(v3_column);
         EXPECT_TRUE(v3_index != nullptr);
         std::vector<int> query_data3 {99, 66, 56, 87, 85, 96, 10000};
         std::vector<int> query_result3 {12, 18, 22, 21, 16, 20, 0};
@@ -514,7 +514,7 @@ protected:
 
         // read col v1
         const auto& v1_column = _tablet_schema->column_by_uid(1);
-        const auto* v1_index = _tablet_schema->get_inverted_index(v1_column);
+        const auto* v1_index = _tablet_schema->inverted_index(v1_column);
         EXPECT_TRUE(v1_index != nullptr);
         std::vector<std::string> query_data1 {"good", "maybe", "great", 
"null"};
         std::vector<int> query_result1 {197, 191, 0, 0};
@@ -523,7 +523,7 @@ protected:
 
         // read col v2
         const auto& v2_column = _tablet_schema->column_by_uid(2);
-        const auto* v2_index = _tablet_schema->get_inverted_index(v2_column);
+        const auto* v2_index = _tablet_schema->inverted_index(v2_column);
         EXPECT_TRUE(v2_index != nullptr);
         std::vector<std::string> query_data2 {"musicstream.com", "http", 
"https", "null"};
         std::vector<int> query_result2 {176, 719, 1087, 0};
diff --git a/be/test/olap/tablet_index_test.cpp 
b/be/test/olap/tablet_index_test.cpp
new file mode 100644
index 00000000000..7842f9af18d
--- /dev/null
+++ b/be/test/olap/tablet_index_test.cpp
@@ -0,0 +1,108 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+
+#include "olap/tablet_schema.h"
+#include "vec/common/schema_util.h"
+
+namespace doris {
+
+class TabletIndexTest : public testing::Test {};
+
+void construct_column(ColumnPB* column_pb, TabletIndexPB* tablet_index, 
int64_t index_id,
+                      const std::string& index_name, int32_t col_unique_id,
+                      const std::string& column_type, const std::string& 
column_name,
+                      const IndexType& index_type, bool is_bf_column) {
+    column_pb->set_unique_id(col_unique_id);
+    column_pb->set_name(column_name);
+    column_pb->set_type(column_type);
+    column_pb->set_is_nullable(true);
+    column_pb->set_is_bf_column(is_bf_column);
+    tablet_index->set_index_id(index_id);
+    tablet_index->set_index_name(index_name);
+    tablet_index->set_index_type(index_type);
+    tablet_index->add_col_unique_id(col_unique_id);
+    if (index_type == IndexType::NGRAM_BF) {
+        auto* properties = tablet_index->mutable_properties();
+        (*properties)["gram_size"] = "5";
+        (*properties)["bf_size"] = "1024";
+    }
+}
+
+TEST_F(TabletIndexTest, test_inverted_index) {
+    TabletSchemaPB schema_pb;
+    schema_pb.set_keys_type(KeysType::DUP_KEYS);
+    
schema_pb.set_inverted_index_storage_format(InvertedIndexStorageFormatPB::V2);
+
+    construct_column(schema_pb.add_column(), schema_pb.add_index(), 10000, 
"key_index", 0, "INT",
+                     "key", IndexType::INVERTED, true);
+    construct_column(schema_pb.add_column(), schema_pb.add_index(), 10001, 
"v1_index", 1, "STRING",
+                     "v1", IndexType::INVERTED, false);
+    construct_column(schema_pb.add_column(), schema_pb.add_index(), 10002, 
"v2_index", 2, "STRING",
+                     "v2", IndexType::NGRAM_BF, true);
+
+    TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>();
+    tablet_schema->init_from_pb(schema_pb);
+
+    EXPECT_TRUE(tablet_schema->has_inverted_index());
+    EXPECT_EQ(tablet_schema->inverted_indexes().size(), 2);
+    EXPECT_TRUE(tablet_schema->inverted_index(tablet_schema->column_by_uid(0)) 
!= nullptr);
+    EXPECT_TRUE(tablet_schema->inverted_index(tablet_schema->column_by_uid(1)) 
!= nullptr);
+    EXPECT_TRUE(tablet_schema->inverted_index(tablet_schema->column_by_uid(2)) 
== nullptr);
+    EXPECT_TRUE(tablet_schema->inverted_index(3) == nullptr);
+    EXPECT_TRUE(tablet_schema->inverted_index(4, "v1.a") == nullptr);
+}
+
+TEST_F(TabletIndexTest, test_schema_index_diff) {
+    TabletSchemaPB new_schema_pb;
+    new_schema_pb.set_keys_type(KeysType::DUP_KEYS);
+    
new_schema_pb.set_inverted_index_storage_format(InvertedIndexStorageFormatPB::V2);
+
+    construct_column(new_schema_pb.add_column(), new_schema_pb.add_index(), 
10000, "key_index", 0,
+                     "INT", "key", IndexType::INVERTED, true);
+    construct_column(new_schema_pb.add_column(), new_schema_pb.add_index(), 
10001, "v1_index", 1,
+                     "STRING", "v1", IndexType::INVERTED, false);
+    construct_column(new_schema_pb.add_column(), new_schema_pb.add_index(), 
10002, "v2_index", 2,
+                     "STRING", "v2", IndexType::NGRAM_BF, true);
+
+    TabletSchemaSPtr new_tablet_schema = std::make_shared<TabletSchema>();
+    new_tablet_schema->init_from_pb(new_schema_pb);
+
+    TabletSchemaPB old_schema_pb;
+    old_schema_pb.set_keys_type(KeysType::DUP_KEYS);
+    
old_schema_pb.set_inverted_index_storage_format(InvertedIndexStorageFormatPB::V2);
+
+    construct_column(old_schema_pb.add_column(), old_schema_pb.add_index(), 
10000, "key_index", 0,
+                     "INT", "key", IndexType::INVERTED, true);
+    construct_column(old_schema_pb.add_column(), old_schema_pb.add_index(), 
10001, "v1_index", 1,
+                     "STRING", "v1", IndexType::INVERTED, true);
+    construct_column(old_schema_pb.add_column(), old_schema_pb.add_index(), 
10002, "v2_index", 2,
+                     "STRING", "v2", IndexType::INVERTED, true);
+
+    TabletSchemaSPtr old_tablet_schema = std::make_shared<TabletSchema>();
+    old_tablet_schema->init_from_pb(old_schema_pb);
+
+    
EXPECT_FALSE(vectorized::schema_util::has_schema_index_diff(new_tablet_schema.get(),
+                                                                
old_tablet_schema.get(), 0, 0));
+    
EXPECT_TRUE(vectorized::schema_util::has_schema_index_diff(new_tablet_schema.get(),
+                                                               
old_tablet_schema.get(), 1, 1));
+    
EXPECT_TRUE(vectorized::schema_util::has_schema_index_diff(new_tablet_schema.get(),
+                                                               
old_tablet_schema.get(), 2, 2));
+}
+
+} // namespace doris


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to