This is an automated email from the ASF dual-hosted git repository. airborne pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.0 by this push: new 9c672fb1856 [Refactor](inverted index) refactor inverted index interface in tablet schema (#43003) (#43280) (#43669) 9c672fb1856 is described below commit 9c672fb1856369a5d0d742e146ccbf6923a43728 Author: Sun Chenyang <csun5...@gmail.com> AuthorDate: Tue Nov 12 17:54:44 2024 +0800 [Refactor](inverted index) refactor inverted index interface in tablet schema (#43003) (#43280) (#43669) pick from master #43003,#43280 --- be/src/cloud/cloud_meta_mgr.cpp | 8 +- be/src/cloud/cloud_tablet.cpp | 14 ++- be/src/cloud/cloud_warm_up_manager.cpp | 14 ++- be/src/olap/compaction.cpp | 19 ++-- be/src/olap/delta_writer.cpp | 2 +- be/src/olap/rowset/beta_rowset.cpp | 41 +++----- be/src/olap/rowset/beta_rowset_writer.cpp | 4 +- be/src/olap/rowset/segcompaction.cpp | 3 +- be/src/olap/rowset/segment_v2/column_writer.h | 2 +- be/src/olap/rowset/segment_v2/segment_iterator.cpp | 17 ++-- be/src/olap/rowset/segment_v2/segment_writer.cpp | 21 ++-- .../rowset/segment_v2/vertical_segment_writer.cpp | 19 ++-- be/src/olap/schema_change.cpp | 9 +- be/src/olap/snapshot_manager.cpp | 7 +- be/src/olap/tablet.cpp | 17 +--- be/src/olap/tablet_schema.cpp | 77 ++------------- be/src/olap/tablet_schema.h | 40 +++++--- be/src/olap/task/engine_storage_migration_task.cpp | 7 +- be/src/olap/task/index_builder.cpp | 20 ++-- be/src/service/backend_service.cpp | 9 +- be/src/vec/common/schema_util.cpp | 25 ++++- be/src/vec/common/schema_util.h | 3 + .../compaction/index_compaction_test.cpp | 8 +- .../index_compaction_with_deleted_term.cpp | 8 +- be/test/olap/tablet_index_test.cpp | 108 +++++++++++++++++++++ 25 files changed, 266 insertions(+), 236 deletions(-) diff --git a/be/src/cloud/cloud_meta_mgr.cpp b/be/src/cloud/cloud_meta_mgr.cpp index fefdb65e44b..0beeea42bd6 100644 --- a/be/src/cloud/cloud_meta_mgr.cpp +++ b/be/src/cloud/cloud_meta_mgr.cpp @@ -1225,12 +1225,8 @@ int64_t CloudMetaMgr::get_inverted_index_file_szie(const RowsetMeta& rs_meta) { } if (rs_meta.tablet_schema()->get_inverted_index_storage_format() == InvertedIndexStorageFormatPB::V1) { - auto indices = rs_meta.tablet_schema()->indexes(); + const auto& indices = rs_meta.tablet_schema()->inverted_indexes(); for (auto& index : indices) { - // only get file_size for inverted index - if (index.index_type() != IndexType::INVERTED) { - continue; - } for (int seg_id = 0; seg_id < rs_meta.num_segments(); ++seg_id) { std::string segment_path = StorageResource().remote_segment_path( rs_meta.tablet_id(), rs_meta.rowset_id().to_string(), seg_id); @@ -1239,7 +1235,7 @@ int64_t CloudMetaMgr::get_inverted_index_file_szie(const RowsetMeta& rs_meta) { std::string inverted_index_file_path = InvertedIndexDescriptor::get_index_file_path_v1( InvertedIndexDescriptor::get_index_file_path_prefix(segment_path), - index.index_id(), index.get_index_suffix()); + index->index_id(), index->get_index_suffix()); auto st = fs->file_size(inverted_index_file_path, &file_size); if (!st.ok()) { file_size = 0; diff --git a/be/src/cloud/cloud_tablet.cpp b/be/src/cloud/cloud_tablet.cpp index a8b5fcde662..c7a85fc820d 100644 --- a/be/src/cloud/cloud_tablet.cpp +++ b/be/src/cloud/cloud_tablet.cpp @@ -285,15 +285,13 @@ void CloudTablet::add_rowsets(std::vector<RowsetSharedPtr> to_add, bool version_ auto schema_ptr = rowset_meta->tablet_schema(); auto idx_version = schema_ptr->get_inverted_index_storage_format(); if (idx_version == InvertedIndexStorageFormatPB::V1) { - for (const auto& index : schema_ptr->indexes()) { - if (index.index_type() == IndexType::INVERTED) { - auto idx_path = storage_resource.value()->remote_idx_v1_path( - *rowset_meta, seg_id, index.index_id(), - index.get_index_suffix()); - download_idx_file(idx_path); - } + for (const auto& index : schema_ptr->inverted_indexes()) { + auto idx_path = storage_resource.value()->remote_idx_v1_path( + *rowset_meta, seg_id, index->index_id(), + index->get_index_suffix()); + download_idx_file(idx_path); } - } else if (idx_version == InvertedIndexStorageFormatPB::V2) { + } else { if (schema_ptr->has_inverted_index()) { auto idx_path = storage_resource.value()->remote_idx_v2_path( *rowset_meta, seg_id); diff --git a/be/src/cloud/cloud_warm_up_manager.cpp b/be/src/cloud/cloud_warm_up_manager.cpp index 96a25fe8fe5..06d6df11dc4 100644 --- a/be/src/cloud/cloud_warm_up_manager.cpp +++ b/be/src/cloud/cloud_warm_up_manager.cpp @@ -147,15 +147,13 @@ void CloudWarmUpManager::handle_jobs() { auto schema_ptr = rs->tablet_schema(); auto idx_version = schema_ptr->get_inverted_index_storage_format(); if (idx_version == InvertedIndexStorageFormatPB::V1) { - for (const auto& index : schema_ptr->indexes()) { - if (index.index_type() == IndexType::INVERTED) { - wait->add_count(); - auto idx_path = storage_resource.value()->remote_idx_v1_path( - *rs, seg_id, index.index_id(), index.get_index_suffix()); - download_idx_file(idx_path); - } + for (const auto& index : schema_ptr->inverted_indexes()) { + wait->add_count(); + auto idx_path = storage_resource.value()->remote_idx_v1_path( + *rs, seg_id, index->index_id(), index->get_index_suffix()); + download_idx_file(idx_path); } - } else if (idx_version == InvertedIndexStorageFormatPB::V2) { + } else { if (schema_ptr->has_inverted_index()) { wait->add_count(); auto idx_path = diff --git a/be/src/olap/compaction.cpp b/be/src/olap/compaction.cpp index 7226a986905..6a8c88d99df 100644 --- a/be/src/olap/compaction.cpp +++ b/be/src/olap/compaction.cpp @@ -638,7 +638,7 @@ Status Compaction::do_inverted_index_compaction() { Status status = Status::OK(); for (auto&& column_uniq_id : ctx.columns_to_do_index_compaction) { auto col = _cur_tablet_schema->column_by_uid(column_uniq_id); - const auto* index_meta = _cur_tablet_schema->get_inverted_index(col); + const auto* index_meta = _cur_tablet_schema->inverted_index(col); std::vector<lucene::store::Directory*> dest_index_dirs(dest_segment_num); try { @@ -676,15 +676,11 @@ Status Compaction::do_inverted_index_compaction() { } void Compaction::construct_index_compaction_columns(RowsetWriterContext& ctx) { - for (const auto& index : _cur_tablet_schema->indexes()) { - if (index.index_type() != IndexType::INVERTED) { - continue; - } - - auto col_unique_ids = index.col_unique_ids(); + for (const auto& index : _cur_tablet_schema->inverted_indexes()) { + auto col_unique_ids = index->col_unique_ids(); // check if column unique ids is empty to avoid crash if (col_unique_ids.empty()) { - LOG(WARNING) << "tablet[" << _tablet->tablet_id() << "] index[" << index.index_id() + LOG(WARNING) << "tablet[" << _tablet->tablet_id() << "] index[" << index->index_id() << "] has no column unique id, will skip index compaction." << " tablet_schema=" << _cur_tablet_schema->dump_full_schema(); continue; @@ -699,10 +695,9 @@ void Compaction::construct_index_compaction_columns(RowsetWriterContext& ctx) { bool is_continue = false; std::optional<std::map<std::string, std::string>> first_properties; for (const auto& rowset : _input_rowsets) { - const auto* tablet_index = - rowset->tablet_schema()->get_inverted_index(col_unique_id, ""); + const auto* tablet_index = rowset->tablet_schema()->inverted_index(col_unique_id); // no inverted index or index id is different from current index id - if (tablet_index == nullptr || tablet_index->index_id() != index.index_id()) { + if (tablet_index == nullptr || tablet_index->index_id() != index->index_id()) { is_continue = true; break; } @@ -735,7 +730,7 @@ void Compaction::construct_index_compaction_columns(RowsetWriterContext& ctx) { return false; } - const auto* index_meta = rowset->tablet_schema()->get_inverted_index(col_unique_id, ""); + const auto* index_meta = rowset->tablet_schema()->inverted_index(col_unique_id); if (index_meta == nullptr) { LOG(WARNING) << "tablet[" << _tablet->tablet_id() << "] column_unique_id[" << col_unique_id << "] index meta is null, will skip index compaction"; diff --git a/be/src/olap/delta_writer.cpp b/be/src/olap/delta_writer.cpp index 00c622df59f..f42253a7884 100644 --- a/be/src/olap/delta_writer.cpp +++ b/be/src/olap/delta_writer.cpp @@ -249,7 +249,7 @@ void DeltaWriter::_request_slave_tablet_pull_rowset(const PNodeInfo& node_info) auto tablet_schema = cur_rowset->rowset_meta()->tablet_schema(); if (!tablet_schema->skip_write_index_on_load()) { for (auto& column : tablet_schema->columns()) { - const TabletIndex* index_meta = tablet_schema->get_inverted_index(*column); + const TabletIndex* index_meta = tablet_schema->inverted_index(*column); if (index_meta) { indices_ids.emplace_back(index_meta->index_id(), index_meta->get_index_suffix()); } diff --git a/be/src/olap/rowset/beta_rowset.cpp b/be/src/olap/rowset/beta_rowset.cpp index 4b51dcc3530..bbb2ca72b4a 100644 --- a/be/src/olap/rowset/beta_rowset.cpp +++ b/be/src/olap/rowset/beta_rowset.cpp @@ -81,12 +81,7 @@ Status BetaRowset::get_inverted_index_size(size_t* index_size) { } if (_schema->get_inverted_index_storage_format() == InvertedIndexStorageFormatPB::V1) { - auto indices = _schema->indexes(); - for (auto& index : indices) { - // only get file_size for inverted index - if (index.index_type() != IndexType::INVERTED) { - continue; - } + for (const auto& index : _schema->inverted_indexes()) { for (int seg_id = 0; seg_id < num_segments(); ++seg_id) { auto seg_path = DORIS_TRY(segment_path(seg_id)); int64_t file_size = 0; @@ -94,7 +89,7 @@ Status BetaRowset::get_inverted_index_size(size_t* index_size) { std::string inverted_index_file_path = InvertedIndexDescriptor::get_index_file_path_v1( InvertedIndexDescriptor::get_index_file_path_prefix(seg_path), - index.index_id(), index.get_index_suffix()); + index->index_id(), index->get_index_suffix()); RETURN_IF_ERROR(fs->file_size(inverted_index_file_path, &file_size)); *index_size += file_size; } @@ -122,7 +117,7 @@ void BetaRowset::clear_inverted_index_cache() { auto index_path_prefix = InvertedIndexDescriptor::get_index_file_path_prefix(*seg_path); for (const auto& column : tablet_schema()->columns()) { - const TabletIndex* index_meta = tablet_schema()->get_inverted_index(*column); + const TabletIndex* index_meta = tablet_schema()->inverted_index(*column); if (index_meta) { auto inverted_index_file_cache_key = InvertedIndexDescriptor::get_index_file_cache_key( @@ -227,7 +222,7 @@ Status BetaRowset::remove() { if (_schema->get_inverted_index_storage_format() == InvertedIndexStorageFormatPB::V1) { for (auto& column : _schema->columns()) { - const TabletIndex* index_meta = _schema->get_inverted_index(*column); + const TabletIndex* index_meta = _schema->inverted_index(*column); if (index_meta) { std::string inverted_index_file = InvertedIndexDescriptor::get_index_file_path_v1( @@ -311,22 +306,19 @@ Status BetaRowset::link_files_to(const std::string& dir, RowsetId new_rowset_id, return status; }); if (_schema->get_inverted_index_storage_format() == InvertedIndexStorageFormatPB::V1) { - for (const auto& index : _schema->indexes()) { - if (index.index_type() != IndexType::INVERTED) { - continue; - } - auto index_id = index.index_id(); + for (const auto& index : _schema->inverted_indexes()) { + auto index_id = index->index_id(); if (without_index_uids != nullptr && without_index_uids->count(index_id)) { continue; } std::string inverted_index_src_file_path = InvertedIndexDescriptor::get_index_file_path_v1( InvertedIndexDescriptor::get_index_file_path_prefix(src_path), - index_id, index.get_index_suffix()); + index_id, index->get_index_suffix()); std::string inverted_index_dst_file_path = InvertedIndexDescriptor::get_index_file_path_v1( InvertedIndexDescriptor::get_index_file_path_prefix(dst_path), - index_id, index.get_index_suffix()); + index_id, index->get_index_suffix()); bool index_file_exists = true; RETURN_IF_ERROR(local_fs->exists(inverted_index_src_file_path, &index_file_exists)); if (index_file_exists) { @@ -405,7 +397,7 @@ Status BetaRowset::copy_files_to(const std::string& dir, const RowsetId& new_row if (_schema->get_inverted_index_storage_format() == InvertedIndexStorageFormatPB::V1) { for (auto& column : _schema->columns()) { // if (column.has_inverted_index()) { - const TabletIndex* index_meta = _schema->get_inverted_index(*column); + const TabletIndex* index_meta = _schema->inverted_index(*column); if (index_meta) { std::string inverted_index_src_file_path = InvertedIndexDescriptor::get_index_file_path_v1( @@ -464,7 +456,7 @@ Status BetaRowset::upload_to(const StorageResource& dest_fs, const RowsetId& new if (_schema->get_inverted_index_storage_format() == InvertedIndexStorageFormatPB::V1) { for (auto& column : _schema->columns()) { // if (column.has_inverted_index()) { - const TabletIndex* index_meta = _schema->get_inverted_index(*column); + const TabletIndex* index_meta = _schema->inverted_index(*column); if (index_meta) { std::string remote_inverted_index_file = InvertedIndexDescriptor::get_index_file_path_v1( @@ -613,14 +605,11 @@ Status BetaRowset::add_to_binlog() { linked_success_files.push_back(binlog_file); if (_schema->get_inverted_index_storage_format() == InvertedIndexStorageFormatPB::V1) { - for (const auto& index : _schema->indexes()) { - if (index.index_type() != IndexType::INVERTED) { - continue; - } - auto index_id = index.index_id(); + for (const auto& index : _schema->inverted_indexes()) { + auto index_id = index->index_id(); auto index_file = InvertedIndexDescriptor::get_index_file_path_v1( InvertedIndexDescriptor::get_index_file_path_prefix(seg_file), index_id, - index.get_index_suffix()); + index->get_index_suffix()); auto binlog_index_file = (std::filesystem::path(binlog_dir) / std::filesystem::path(index_file).filename()) .string(); @@ -661,7 +650,7 @@ Status BetaRowset::calc_file_crc(uint32_t* crc_value, int64_t* file_count) { file_paths.emplace_back(seg_path); if (_schema->get_inverted_index_storage_format() == InvertedIndexStorageFormatPB::V1) { for (auto& column : _schema->columns()) { - const TabletIndex* index_meta = _schema->get_inverted_index(*column); + const TabletIndex* index_meta = _schema->inverted_index(*column); if (index_meta) { std::string inverted_index_file = InvertedIndexDescriptor::get_index_file_path_v1( @@ -805,7 +794,7 @@ Status BetaRowset::show_nested_index_file(rapidjson::Value* rowset_value, } else { rapidjson::Value indices(rapidjson::kArrayType); for (auto column : _rowset_meta->tablet_schema()->columns()) { - const auto* index_meta = _rowset_meta->tablet_schema()->get_inverted_index(*column); + const auto* index_meta = _rowset_meta->tablet_schema()->inverted_index(*column); if (index_meta == nullptr) { continue; } diff --git a/be/src/olap/rowset/beta_rowset_writer.cpp b/be/src/olap/rowset/beta_rowset_writer.cpp index 9cd634ec5ac..97e3bcb1deb 100644 --- a/be/src/olap/rowset/beta_rowset_writer.cpp +++ b/be/src/olap/rowset/beta_rowset_writer.cpp @@ -540,8 +540,8 @@ Status BetaRowsetWriter::_rename_compacted_indices(int64_t begin, int64_t end, u } // rename remaining inverted index files for (auto column : _context.tablet_schema->columns()) { - if (_context.tablet_schema->has_inverted_index(*column)) { - const auto* index_info = _context.tablet_schema->get_inverted_index(*column); + if (const auto& index_info = _context.tablet_schema->inverted_index(*column); + index_info != nullptr) { auto index_id = index_info->index_id(); if (_context.tablet_schema->get_inverted_index_storage_format() == InvertedIndexStorageFormatPB::V1) { diff --git a/be/src/olap/rowset/segcompaction.cpp b/be/src/olap/rowset/segcompaction.cpp index 8f4184b7175..413e6150336 100644 --- a/be/src/olap/rowset/segcompaction.cpp +++ b/be/src/olap/rowset/segcompaction.cpp @@ -152,8 +152,7 @@ Status SegcompactionWorker::_delete_original_segments(uint32_t begin, uint32_t e } // Delete inverted index files for (auto&& column : schema->columns()) { - if (schema->has_inverted_index(*column)) { - const auto* index_info = schema->get_inverted_index(*column); + if (const auto* index_info = schema->inverted_index(*column); index_info != nullptr) { auto index_id = index_info->index_id(); if (schema->get_inverted_index_storage_format() == InvertedIndexStorageFormatPB::V1) { diff --git a/be/src/olap/rowset/segment_v2/column_writer.h b/be/src/olap/rowset/segment_v2/column_writer.h index 62f209db5ad..2d66b940a38 100644 --- a/be/src/olap/rowset/segment_v2/column_writer.h +++ b/be/src/olap/rowset/segment_v2/column_writer.h @@ -63,7 +63,7 @@ struct ColumnWriterOptions { bool need_inverted_index = false; uint8_t gram_size; uint16_t gram_bf_size; - std::vector<const TabletIndex*> indexes; + std::vector<const TabletIndex*> indexes; // unused const TabletIndex* inverted_index = nullptr; InvertedIndexFileWriter* inverted_index_file_writer; std::string to_string() const { diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index f43ccf37e78..000c1d0c300 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -1055,16 +1055,17 @@ Status SegmentIterator::_init_inverted_index_iterators() { return Status::OK(); } for (auto cid : _schema->column_ids()) { + // Use segment’s own index_meta, for compatibility with future indexing needs to default to lowercase. if (_inverted_index_iterators[cid] == nullptr) { - // Not check type valid, since we need to get inverted index for related variant type when reading the segment. - // If check type valid, we can not get inverted index for variant type, and result nullptr.The result for calling - // get_inverted_index with variant suffix should return corresponding inverted index meta. - bool check_inverted_index_by_type = false; - // Use segment’s own index_meta, for compatibility with future indexing needs to default to lowercase. + // In the _opts.tablet_schema, the sub-column type information for the variant is FieldType::OLAP_FIELD_TYPE_VARIANT. + // This is because the sub-column is created in create_materialized_variant_column. + // We use this column to locate the metadata for the inverted index, which requires a unique_id and path. + const auto& column = _opts.tablet_schema->column(cid); + int32_t col_unique_id = + column.is_extracted_column() ? column.parent_unique_id() : column.unique_id(); RETURN_IF_ERROR(_segment->new_inverted_index_iterator( - _opts.tablet_schema->column(cid), - _segment->_tablet_schema->get_inverted_index(_opts.tablet_schema->column(cid), - check_inverted_index_by_type), + column, + _segment->_tablet_schema->inverted_index(col_unique_id, column.suffix_path()), _opts, &_inverted_index_iterators[cid])); } } diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp b/be/src/olap/rowset/segment_v2/segment_writer.cpp index c532969baa4..5c702df3334 100644 --- a/be/src/olap/rowset/segment_v2/segment_writer.cpp +++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp @@ -218,22 +218,21 @@ Status SegmentWriter::_create_column_writer(uint32_t cid, const TabletColumn& co if (_opts.write_type == DataWriteType::TYPE_DIRECT && schema->skip_write_index_on_load()) { skip_inverted_index = true; } - // indexes for this column - opts.indexes = schema->get_indexes_for_column(column); + if (!InvertedIndexColumnWriter::check_support_inverted_index(column)) { opts.need_zone_map = false; opts.need_bloom_filter = false; opts.need_bitmap_index = false; } - opts.inverted_index_file_writer = _inverted_index_file_writer; - for (const auto* index : opts.indexes) { - if (!skip_inverted_index && index->index_type() == IndexType::INVERTED) { - opts.inverted_index = index; - opts.need_inverted_index = true; - DCHECK(_inverted_index_file_writer != nullptr); - // TODO support multiple inverted index - break; - } + + // indexes for this column + if (const auto& index = schema->inverted_index(column); + index != nullptr && !skip_inverted_index) { + opts.inverted_index = index; + opts.need_inverted_index = true; + DCHECK(_inverted_index_file_writer != nullptr); + opts.inverted_index_file_writer = _inverted_index_file_writer; + // TODO support multiple inverted index } #define CHECK_FIELD_TYPE(TYPE, type_name) \ if (column.type() == FieldType::OLAP_FIELD_TYPE_##TYPE) { \ diff --git a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp index 2cea4c86c09..a0fcd58bfb7 100644 --- a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp +++ b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp @@ -210,23 +210,20 @@ Status VerticalSegmentWriter::_create_column_writer(uint32_t cid, const TabletCo tablet_schema->skip_write_index_on_load()) { skip_inverted_index = true; } - // indexes for this column - opts.indexes = tablet_schema->get_indexes_for_column(column); + if (!InvertedIndexColumnWriter::check_support_inverted_index(column)) { opts.need_zone_map = false; opts.need_bloom_filter = false; opts.need_bitmap_index = false; } - for (const auto* index : opts.indexes) { - if (!skip_inverted_index && index->index_type() == IndexType::INVERTED) { - opts.inverted_index = index; - opts.need_inverted_index = true; - DCHECK(_inverted_index_file_writer != nullptr); - // TODO support multiple inverted index - break; - } + if (const auto& index = tablet_schema->inverted_index(column); + index != nullptr && !skip_inverted_index) { + opts.inverted_index = index; + opts.need_inverted_index = true; + DCHECK(_inverted_index_file_writer != nullptr); + opts.inverted_index_file_writer = _inverted_index_file_writer; + // TODO support multiple inverted index } - opts.inverted_index_file_writer = _inverted_index_file_writer; #define CHECK_FIELD_TYPE(TYPE, type_name) \ if (column.type() == FieldType::OLAP_FIELD_TYPE_##TYPE) { \ diff --git a/be/src/olap/schema_change.cpp b/be/src/olap/schema_change.cpp index 2125b508d24..1c06f3a405a 100644 --- a/be/src/olap/schema_change.cpp +++ b/be/src/olap/schema_change.cpp @@ -81,6 +81,7 @@ #include "vec/columns/column.h" #include "vec/columns/column_nullable.h" #include "vec/common/assert_cast.h" +#include "vec/common/schema_util.h" #include "vec/core/block.h" #include "vec/core/column_with_type_and_name.h" #include "vec/exprs/vexpr.h" @@ -1367,13 +1368,9 @@ Status SchemaChangeJob::parse_request(const SchemaChangeParams& sc_params, *sc_directly = true; return Status::OK(); } else if (column_mapping->ref_column_idx >= 0) { - const auto& column_new = new_tablet_schema->column(i); - const auto& column_old = base_tablet_schema->column(column_mapping->ref_column_idx); // index changed - if (column_new.is_bf_column() != column_old.is_bf_column() || - column_new.has_bitmap_index() != column_old.has_bitmap_index() || - new_tablet_schema->has_inverted_index(column_new) != - base_tablet_schema->has_inverted_index(column_old)) { + if (vectorized::schema_util::has_schema_index_diff( + new_tablet_schema, base_tablet_schema, i, column_mapping->ref_column_idx)) { *sc_directly = true; return Status::OK(); } diff --git a/be/src/olap/snapshot_manager.cpp b/be/src/olap/snapshot_manager.cpp index 2cfa9a8e8b7..67205835b53 100644 --- a/be/src/olap/snapshot_manager.cpp +++ b/be/src/olap/snapshot_manager.cpp @@ -698,11 +698,8 @@ Status SnapshotManager::_create_snapshot_files(const TabletSharedPtr& ref_tablet if (tablet_schema.get_inverted_index_storage_format() == InvertedIndexStorageFormatPB::V1) { - for (const auto& index : tablet_schema.indexes()) { - if (index.index_type() != IndexType::INVERTED) { - continue; - } - auto index_id = index.index_id(); + for (const auto& index : tablet_schema.inverted_indexes()) { + auto index_id = index->index_id(); auto index_file = ref_tablet->get_segment_index_filepath( rowset_id, segment_index, index_id); auto snapshot_segment_index_file_path = diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp index ed9ffaa0664..f97f153a860 100644 --- a/be/src/olap/tablet.cpp +++ b/be/src/olap/tablet.cpp @@ -1234,7 +1234,7 @@ std::vector<RowsetSharedPtr> Tablet::pick_candidate_rowsets_to_build_inverted_in std::shared_lock rlock(_meta_lock); auto has_alter_inverted_index = [&](RowsetSharedPtr rowset) -> bool { for (const auto& index_id : alter_index_uids) { - if (rowset->tablet_schema()->has_inverted_index_with_index_id(index_id, "")) { + if (rowset->tablet_schema()->has_inverted_index_with_index_id(index_id)) { return true; } } @@ -2611,12 +2611,9 @@ void Tablet::gc_binlogs(int64_t version) { // add binlog segment files and index files for (int64_t i = 0; i < num_segments; ++i) { wait_for_deleted_binlog_files.emplace_back(get_segment_filepath(rowset_id, i)); - for (const auto& index : this->tablet_schema()->indexes()) { - if (index.index_type() != IndexType::INVERTED) { - continue; - } + for (const auto& index : this->tablet_schema()->inverted_indexes()) { wait_for_deleted_binlog_files.emplace_back( - get_segment_index_filepath(rowset_id, i, index.index_id())); + get_segment_index_filepath(rowset_id, i, index->index_id())); } } }; @@ -2765,12 +2762,8 @@ int64_t Tablet::get_inverted_index_file_szie(const RowsetMetaSharedPtr& rs_meta) if (rs_meta->tablet_schema()->get_inverted_index_storage_format() == InvertedIndexStorageFormatPB::V1) { - auto indices = rs_meta->tablet_schema()->indexes(); + const auto& indices = rs_meta->tablet_schema()->inverted_indexes(); for (auto& index : indices) { - // only get file_size for inverted index - if (index.index_type() != IndexType::INVERTED) { - continue; - } for (int seg_id = 0; seg_id < rs_meta->num_segments(); ++seg_id) { std::string segment_path = get_segment_path(rs_meta, seg_id); int64_t file_size = 0; @@ -2778,7 +2771,7 @@ int64_t Tablet::get_inverted_index_file_szie(const RowsetMetaSharedPtr& rs_meta) std::string inverted_index_file_path = InvertedIndexDescriptor::get_index_file_path_v1( InvertedIndexDescriptor::get_index_file_path_prefix(segment_path), - index.index_id(), index.get_index_suffix()); + index->index_id(), index->get_index_suffix()); auto st = fs->file_size(inverted_index_file_path, &file_size); if (!st.ok()) { file_size = 0; diff --git a/be/src/olap/tablet_schema.cpp b/be/src/olap/tablet_schema.cpp index ef48c06aa3f..b5479d34016 100644 --- a/be/src/olap/tablet_schema.cpp +++ b/be/src/olap/tablet_schema.cpp @@ -905,14 +905,13 @@ void TabletColumn::append_sparse_column(TabletColumn column) { _num_sparse_columns++; } -void TabletSchema::append_index(TabletIndex index) { +void TabletSchema::append_index(TabletIndex&& index) { _indexes.push_back(std::move(index)); } void TabletSchema::update_index(const TabletColumn& col, TabletIndex index) { int32_t col_unique_id = col.unique_id(); - const std::string& suffix_path = - col.has_path_info() ? escape_for_path_name(col.path_info_ptr()->get_path()) : ""; + const std::string& suffix_path = escape_for_path_name(col.suffix_path()); for (size_t i = 0; i < _indexes.size(); i++) { for (int32_t id : _indexes[i].col_unique_ids()) { if (id == col_unique_id && _indexes[i].get_index_suffix() == suffix_path) { @@ -1352,28 +1351,6 @@ Result<const TabletColumn*> TabletSchema::column(const std::string& field_name) return _cols[it->second].get(); } -std::vector<const TabletIndex*> TabletSchema::get_indexes_for_column( - const TabletColumn& col) const { - std::vector<const TabletIndex*> indexes_for_column; - // Some columns (Float, Double, JSONB ...) from the variant do not support index, but they are listed in TabltetIndex. - if (!segment_v2::InvertedIndexColumnWriter::check_support_inverted_index(col)) { - return indexes_for_column; - } - int32_t col_unique_id = col.is_extracted_column() ? col.parent_unique_id() : col.unique_id(); - const std::string& suffix_path = - col.has_path_info() ? escape_for_path_name(col.path_info_ptr()->get_path()) : ""; - // TODO use more efficient impl - for (size_t i = 0; i < _indexes.size(); i++) { - for (int32_t id : _indexes[i].col_unique_ids()) { - if (id == col_unique_id && _indexes[i].get_index_suffix() == suffix_path) { - indexes_for_column.push_back(&(_indexes[i])); - } - } - } - - return indexes_for_column; -} - void TabletSchema::update_tablet_columns(const TabletSchema& tablet_schema, const std::vector<TColumn>& t_columns) { copy_from(tablet_schema); @@ -1385,49 +1362,17 @@ void TabletSchema::update_tablet_columns(const TabletSchema& tablet_schema, } } -bool TabletSchema::has_inverted_index(const TabletColumn& col) const { - // TODO use more efficient impl - int32_t col_unique_id = col.is_extracted_column() ? col.parent_unique_id() : col.unique_id(); - const std::string& suffix_path = - col.has_path_info() ? escape_for_path_name(col.path_info_ptr()->get_path()) : ""; +bool TabletSchema::has_inverted_index_with_index_id(int64_t index_id) const { for (size_t i = 0; i < _indexes.size(); i++) { - if (_indexes[i].index_type() == IndexType::INVERTED) { - for (int32_t id : _indexes[i].col_unique_ids()) { - if (id == col_unique_id && _indexes[i].get_index_suffix() == suffix_path) { - return true; - } - } - } - } - - return false; -} - -bool TabletSchema::has_inverted_index_with_index_id(int64_t index_id, - const std::string& suffix_name) const { - for (size_t i = 0; i < _indexes.size(); i++) { - if (_indexes[i].index_type() == IndexType::INVERTED && - _indexes[i].get_index_suffix() == suffix_name && _indexes[i].index_id() == index_id) { + if (_indexes[i].index_type() == IndexType::INVERTED && _indexes[i].index_id() == index_id) { return true; } } return false; } -const TabletIndex* TabletSchema::get_inverted_index_with_index_id( - int64_t index_id, const std::string& suffix_name) const { - for (size_t i = 0; i < _indexes.size(); i++) { - if (_indexes[i].index_type() == IndexType::INVERTED && - _indexes[i].get_index_suffix() == suffix_name && _indexes[i].index_id() == index_id) { - return &(_indexes[i]); - } - } - - return nullptr; -} - -const TabletIndex* TabletSchema::get_inverted_index(int32_t col_unique_id, - const std::string& suffix_path) const { +const TabletIndex* TabletSchema::inverted_index(int32_t col_unique_id, + const std::string& suffix_path) const { for (size_t i = 0; i < _indexes.size(); i++) { if (_indexes[i].index_type() == IndexType::INVERTED) { for (int32_t id : _indexes[i].col_unique_ids()) { @@ -1441,19 +1386,15 @@ const TabletIndex* TabletSchema::get_inverted_index(int32_t col_unique_id, return nullptr; } -const TabletIndex* TabletSchema::get_inverted_index(const TabletColumn& col, - bool check_valid) const { - // With check_valid set to true by default +const TabletIndex* TabletSchema::inverted_index(const TabletColumn& col) const { // Some columns(Float, Double, JSONB ...) from the variant do not support inverted index - if (check_valid && !segment_v2::InvertedIndexColumnWriter::check_support_inverted_index(col)) { + if (!segment_v2::InvertedIndexColumnWriter::check_support_inverted_index(col)) { return nullptr; } // TODO use more efficient impl // Use parent id if unique not assigned, this could happend when accessing subcolumns of variants int32_t col_unique_id = col.is_extracted_column() ? col.parent_unique_id() : col.unique_id(); - const std::string& suffix_path = - col.has_path_info() ? escape_for_path_name(col.path_info_ptr()->get_path()) : ""; - return get_inverted_index(col_unique_id, suffix_path); + return inverted_index(col_unique_id, escape_for_path_name(col.suffix_path())); } bool TabletSchema::has_ngram_bf_index(int32_t col_unique_id) const { diff --git a/be/src/olap/tablet_schema.h b/be/src/olap/tablet_schema.h index e2f90e2716f..f3ff0d694b4 100644 --- a/be/src/olap/tablet_schema.h +++ b/be/src/olap/tablet_schema.h @@ -163,6 +163,9 @@ public: bool is_extracted_column() const { return _column_path != nullptr && !_column_path->empty() && _parent_col_unique_id > 0; }; + std::string suffix_path() const { + return is_extracted_column() ? _column_path->get_path() : ""; + } bool is_nested_subcolumn() const { return _column_path != nullptr && _column_path->has_nested_part(); } @@ -223,13 +226,16 @@ private: bool _has_bitmap_index = false; bool _visible = true; - int32_t _parent_col_unique_id = -1; + std::vector<TabletColumnPtr> _sub_columns; uint32_t _sub_column_count = 0; bool _result_is_nullable = false; int _be_exec_version = -1; - vectorized::PathInDataPtr _column_path; + + // The extracted sub-columns from "variant" contain the following information: + int32_t _parent_col_unique_id = -1; // "variant" -> col_unique_id + vectorized::PathInDataPtr _column_path; // the path of the sub-columns themselves // Record information about columns merged into a sparse column within a variant // `{"id": 100, "name" : "jack", "point" : 3.9}` @@ -315,7 +321,7 @@ public: } void to_schema_pb(TabletSchemaPB* tablet_meta_pb) const; void append_column(TabletColumn column, ColumnType col_type = ColumnType::NORMAL); - void append_index(TabletIndex index); + void append_index(TabletIndex&& index); void update_index(const TabletColumn& column, TabletIndex index); void remove_index(int64_t index_id); void clear_index(); @@ -385,7 +391,15 @@ public: void set_row_store_page_size(long page_size) { _row_store_page_size = page_size; } long row_store_page_size() const { return _row_store_page_size; } - const std::vector<TabletIndex>& indexes() const { return _indexes; } + const std::vector<const TabletIndex*> inverted_indexes() const { + std::vector<const TabletIndex*> inverted_indexes; + for (const auto& index : _indexes) { + if (index.index_type() == IndexType::INVERTED) { + inverted_indexes.emplace_back(&index); + } + } + return inverted_indexes; + } bool has_inverted_index() const { for (const auto& index : _indexes) { if (index.index_type() == IndexType::INVERTED) { @@ -394,17 +408,15 @@ public: } return false; } - std::vector<const TabletIndex*> get_indexes_for_column(const TabletColumn& col) const; - bool has_inverted_index(const TabletColumn& col) const; - bool has_inverted_index_with_index_id(int64_t index_id, const std::string& suffix_path) const; - const TabletIndex* get_inverted_index_with_index_id(int64_t index_id, - const std::string& suffix_name) const; - // check_valid: check if this column supports inverted index + bool has_inverted_index_with_index_id(int64_t index_id) const; + // Check whether this column supports inverted index // Some columns (Float, Double, JSONB ...) from the variant do not support index, but they are listed in TabletIndex. - // If returned, the index file will not be found. - const TabletIndex* get_inverted_index(const TabletColumn& col, bool check_valid = true) const; - const TabletIndex* get_inverted_index(int32_t col_unique_id, - const std::string& suffix_path) const; + const TabletIndex* inverted_index(const TabletColumn& col) const; + + // Regardless of whether this column supports inverted index + // TabletIndex information will be returned as long as it exists. + const TabletIndex* inverted_index(int32_t col_unique_id, + const std::string& suffix_path = "") const; bool has_ngram_bf_index(int32_t col_unique_id) const; const TabletIndex* get_ngram_bf_index(int32_t col_unique_id) const; void update_indexes_from_thrift(const std::vector<doris::TOlapTableIndex>& indexes); diff --git a/be/src/olap/task/engine_storage_migration_task.cpp b/be/src/olap/task/engine_storage_migration_task.cpp index 21be34a334d..a300e6e0f09 100644 --- a/be/src/olap/task/engine_storage_migration_task.cpp +++ b/be/src/olap/task/engine_storage_migration_task.cpp @@ -407,11 +407,8 @@ Status EngineStorageMigrationTask::_copy_index_and_data_files( if (tablet_schema.get_inverted_index_storage_format() == InvertedIndexStorageFormatPB::V1) { - for (const auto& index : tablet_schema.indexes()) { - if (index.index_type() != IndexType::INVERTED) { - continue; - } - auto index_id = index.index_id(); + for (const auto& index : tablet_schema.inverted_indexes()) { + auto index_id = index->index_id(); auto index_file = _tablet->get_segment_index_filepath(rowset_id, segment_index, index_id); auto snapshot_segment_index_file_path = diff --git a/be/src/olap/task/index_builder.cpp b/be/src/olap/task/index_builder.cpp index 69ab95770f7..975920a437e 100644 --- a/be/src/olap/task/index_builder.cpp +++ b/be/src/olap/task/index_builder.cpp @@ -106,7 +106,7 @@ Status IndexBuilder::update_inverted_index_info() { } } auto column = output_rs_tablet_schema->column(column_idx); - const auto* index_meta = output_rs_tablet_schema->get_inverted_index(column); + const auto* index_meta = output_rs_tablet_schema->inverted_index(column); if (index_meta == nullptr) { LOG(ERROR) << "failed to find column: " << column_name << " index_id: " << t_inverted_index.index_id; @@ -142,12 +142,7 @@ Status IndexBuilder::update_inverted_index_info() { return Status::Error<ErrorCode::INTERNAL_ERROR>( "indexes count cannot be negative"); } - int32_t indexes_size = 0; - for (auto index : output_rs_tablet_schema->indexes()) { - if (index.index_type() == IndexType::INVERTED) { - indexes_size++; - } - } + int32_t indexes_size = output_rs_tablet_schema->inverted_indexes().size(); if (indexes_count != indexes_size) { return Status::Error<ErrorCode::INTERNAL_ERROR>( "indexes count not equal to expected"); @@ -165,11 +160,11 @@ Status IndexBuilder::update_inverted_index_info() { LOG(WARNING) << "referenced column was missing. " << "[column=" << t_inverted_index.columns[0] << " referenced_column=" << column_uid << "]"; - output_rs_tablet_schema->append_index(index); + output_rs_tablet_schema->append_index(std::move(index)); continue; } const TabletColumn& col = output_rs_tablet_schema->column_by_uid(column_uid); - const TabletIndex* exist_index = output_rs_tablet_schema->get_inverted_index(col); + const TabletIndex* exist_index = output_rs_tablet_schema->inverted_index(col); if (exist_index && exist_index->index_id() != index.index_id()) { LOG(WARNING) << fmt::format( "column: {} has a exist inverted index, but the index id not equal " @@ -179,7 +174,7 @@ Status IndexBuilder::update_inverted_index_info() { without_index_uids.insert(exist_index->index_id()); output_rs_tablet_schema->remove_index(exist_index->index_id()); } - output_rs_tablet_schema->append_index(index); + output_rs_tablet_schema->append_index(std::move(index)); } } // construct input rowset reader @@ -399,14 +394,15 @@ Status IndexBuilder::handle_single_rowset(RowsetMetaSharedPtr output_rowset_meta } } auto column = output_rowset_schema->column(column_idx); + // variant column is not support for building index if (!InvertedIndexColumnWriter::check_support_inverted_index(column)) { continue; } - DCHECK(output_rowset_schema->has_inverted_index_with_index_id(index_id, "")); + DCHECK(output_rowset_schema->has_inverted_index_with_index_id(index_id)); _olap_data_convertor->add_column_data_convertor(column); return_columns.emplace_back(column_idx); std::unique_ptr<Field> field(FieldFactory::create(column)); - const auto* index_meta = output_rowset_schema->get_inverted_index(column); + const auto* index_meta = output_rowset_schema->inverted_index(column); std::unique_ptr<segment_v2::InvertedIndexColumnWriter> inverted_index_builder; try { RETURN_IF_ERROR(segment_v2::InvertedIndexColumnWriter::create( diff --git a/be/src/service/backend_service.cpp b/be/src/service/backend_service.cpp index abdef513296..d74a9cd2e0b 100644 --- a/be/src/service/backend_service.cpp +++ b/be/src/service/backend_service.cpp @@ -353,11 +353,8 @@ void _ingest_binlog(StorageEngine& engine, IngestBinlogArg* arg) { std::vector<std::string> segment_index_file_names; auto tablet_schema = rowset_meta->tablet_schema(); if (tablet_schema->get_inverted_index_storage_format() == InvertedIndexStorageFormatPB::V1) { - for (const auto& index : tablet_schema->indexes()) { - if (index.index_type() != IndexType::INVERTED) { - continue; - } - auto index_id = index.index_id(); + for (const auto& index : tablet_schema->inverted_indexes()) { + auto index_id = index->index_id(); for (int64_t segment_index = 0; segment_index < num_segments; ++segment_index) { auto get_segment_index_file_size_url = fmt::format( "{}?method={}&tablet_id={}&rowset_id={}&segment_index={}&segment_index_id={" @@ -379,7 +376,7 @@ void _ingest_binlog(StorageEngine& engine, IngestBinlogArg* arg) { rowset_meta->rowset_id().to_string(), segment_index); segment_index_file_names.push_back(InvertedIndexDescriptor::get_index_file_path_v1( InvertedIndexDescriptor::get_index_file_path_prefix(segment_path), index_id, - index.get_index_suffix())); + index->get_index_suffix())); status = HttpClient::execute_with_retry(max_retry, 1, get_segment_index_file_size_cb); diff --git a/be/src/vec/common/schema_util.cpp b/be/src/vec/common/schema_util.cpp index adaee6e9fe6..9692c02fda3 100644 --- a/be/src/vec/common/schema_util.cpp +++ b/be/src/vec/common/schema_util.cpp @@ -360,6 +360,7 @@ void update_least_sparse_column(const std::vector<TabletSchemaSPtr>& schemas, void inherit_column_attributes(const TabletColumn& source, TabletColumn& target, TabletSchemaSPtr& target_schema) { + DCHECK(target.is_extracted_column()); if (target.type() != FieldType::OLAP_FIELD_TYPE_TINYINT && target.type() != FieldType::OLAP_FIELD_TYPE_ARRAY && target.type() != FieldType::OLAP_FIELD_TYPE_DOUBLE && @@ -368,18 +369,18 @@ void inherit_column_attributes(const TabletColumn& source, TabletColumn& target, target.set_is_bf_column(source.is_bf_column()); } target.set_aggregation_method(source.aggregation()); - const auto* source_index_meta = target_schema->get_inverted_index(source.unique_id(), ""); + const auto* source_index_meta = target_schema->inverted_index(source.unique_id()); if (source_index_meta != nullptr) { // add index meta TabletIndex index_info = *source_index_meta; index_info.set_escaped_escaped_index_suffix_path(target.path_info_ptr()->get_path()); - // get_inverted_index: No need to check, just inherit directly - const auto* target_index_meta = target_schema->get_inverted_index(target, false); + const auto* target_index_meta = target_schema->inverted_index( + target.parent_unique_id(), target.path_info_ptr()->get_path()); if (target_index_meta != nullptr) { // already exist target_schema->update_index(target, index_info); } else { - target_schema->append_index(index_info); + target_schema->append_index(std::move(index_info)); } } } @@ -591,4 +592,20 @@ Status extract(ColumnPtr source, const PathInData& path, MutableColumnPtr& dst) return Status::OK(); } +bool has_schema_index_diff(const TabletSchema* new_schema, const TabletSchema* old_schema, + int32_t new_col_idx, int32_t old_col_idx) { + const auto& column_new = new_schema->column(new_col_idx); + const auto& column_old = old_schema->column(old_col_idx); + + if (column_new.is_bf_column() != column_old.is_bf_column() || + column_new.has_bitmap_index() != column_old.has_bitmap_index()) { + return true; + } + + bool new_schema_has_inverted_index = new_schema->inverted_index(column_new); + bool old_schema_has_inverted_index = old_schema->inverted_index(column_old); + + return new_schema_has_inverted_index != old_schema_has_inverted_index; +} + } // namespace doris::vectorized::schema_util diff --git a/be/src/vec/common/schema_util.h b/be/src/vec/common/schema_util.h index 080e6331dc1..8ceb97a9156 100644 --- a/be/src/vec/common/schema_util.h +++ b/be/src/vec/common/schema_util.h @@ -121,4 +121,7 @@ Status extract(ColumnPtr source, const PathInData& path, MutableColumnPtr& dst); std::string dump_column(DataTypePtr type, const ColumnPtr& col); +bool has_schema_index_diff(const TabletSchema* new_schema, const TabletSchema* old_schema, + int32_t new_col_idx, int32_t old_col_idx); + } // namespace doris::vectorized::schema_util diff --git a/be/test/olap/rowset/segment_v2/inverted_index/compaction/index_compaction_test.cpp b/be/test/olap/rowset/segment_v2/inverted_index/compaction/index_compaction_test.cpp index 922a77fcaa4..5e3370847e9 100644 --- a/be/test/olap/rowset/segment_v2/inverted_index/compaction/index_compaction_test.cpp +++ b/be/test/olap/rowset/segment_v2/inverted_index/compaction/index_compaction_test.cpp @@ -406,7 +406,7 @@ TEST_F(IndexCompactionTest, write_index_test) { // read col key const auto& key = _tablet_schema->column_by_uid(0); - const auto* key_index = _tablet_schema->get_inverted_index(key); + const auto* key_index = _tablet_schema->inverted_index(key); EXPECT_TRUE(key_index != nullptr); std::vector<int> query_data {99, 66, 56, 87, 85, 96, 20000}; std::vector<int> query_result {21, 25, 22, 18, 14, 18, 0}; @@ -414,7 +414,7 @@ TEST_F(IndexCompactionTest, write_index_test) { // read col v3 const auto& v3_column = _tablet_schema->column_by_uid(3); - const auto* v3_index = _tablet_schema->get_inverted_index(v3_column); + const auto* v3_index = _tablet_schema->inverted_index(v3_column); EXPECT_TRUE(v3_index != nullptr); std::vector<int> query_data3 {99, 66, 56, 87, 85, 96, 10000}; std::vector<int> query_result3 {12, 20, 25, 23, 16, 24, 0}; @@ -422,7 +422,7 @@ TEST_F(IndexCompactionTest, write_index_test) { // read col v1 const auto& v1_column = _tablet_schema->column_by_uid(1); - const auto* v1_index = _tablet_schema->get_inverted_index(v1_column); + const auto* v1_index = _tablet_schema->inverted_index(v1_column); EXPECT_TRUE(v1_index != nullptr); std::vector<std::string> query_data1 {"good", "maybe", "great", "null"}; std::vector<int> query_result1 {197, 191, 194, 0}; @@ -431,7 +431,7 @@ TEST_F(IndexCompactionTest, write_index_test) { // read col v2 const auto& v2_column = _tablet_schema->column_by_uid(2); - const auto* v2_index = _tablet_schema->get_inverted_index(v2_column); + const auto* v2_index = _tablet_schema->inverted_index(v2_column); EXPECT_TRUE(v2_index != nullptr); std::vector<std::string> query_data2 {"musicstream.com", "http", "https", "null"}; std::vector<int> query_result2 {191, 799, 1201, 0}; diff --git a/be/test/olap/rowset/segment_v2/inverted_index/compaction/index_compaction_with_deleted_term.cpp b/be/test/olap/rowset/segment_v2/inverted_index/compaction/index_compaction_with_deleted_term.cpp index 4c77b71d66d..321d43fa872 100644 --- a/be/test/olap/rowset/segment_v2/inverted_index/compaction/index_compaction_with_deleted_term.cpp +++ b/be/test/olap/rowset/segment_v2/inverted_index/compaction/index_compaction_with_deleted_term.cpp @@ -498,7 +498,7 @@ protected: // read col key const auto& key = _tablet_schema->column_by_uid(0); - const auto* key_index = _tablet_schema->get_inverted_index(key); + const auto* key_index = _tablet_schema->inverted_index(key); EXPECT_TRUE(key_index != nullptr); std::vector<int> query_data {99, 66, 56, 87, 85, 96, 20000}; std::vector<int> query_result {19, 21, 21, 16, 14, 18, 0}; @@ -506,7 +506,7 @@ protected: // read col v3 const auto& v3_column = _tablet_schema->column_by_uid(3); - const auto* v3_index = _tablet_schema->get_inverted_index(v3_column); + const auto* v3_index = _tablet_schema->inverted_index(v3_column); EXPECT_TRUE(v3_index != nullptr); std::vector<int> query_data3 {99, 66, 56, 87, 85, 96, 10000}; std::vector<int> query_result3 {12, 18, 22, 21, 16, 20, 0}; @@ -514,7 +514,7 @@ protected: // read col v1 const auto& v1_column = _tablet_schema->column_by_uid(1); - const auto* v1_index = _tablet_schema->get_inverted_index(v1_column); + const auto* v1_index = _tablet_schema->inverted_index(v1_column); EXPECT_TRUE(v1_index != nullptr); std::vector<std::string> query_data1 {"good", "maybe", "great", "null"}; std::vector<int> query_result1 {197, 191, 0, 0}; @@ -523,7 +523,7 @@ protected: // read col v2 const auto& v2_column = _tablet_schema->column_by_uid(2); - const auto* v2_index = _tablet_schema->get_inverted_index(v2_column); + const auto* v2_index = _tablet_schema->inverted_index(v2_column); EXPECT_TRUE(v2_index != nullptr); std::vector<std::string> query_data2 {"musicstream.com", "http", "https", "null"}; std::vector<int> query_result2 {176, 719, 1087, 0}; diff --git a/be/test/olap/tablet_index_test.cpp b/be/test/olap/tablet_index_test.cpp new file mode 100644 index 00000000000..7842f9af18d --- /dev/null +++ b/be/test/olap/tablet_index_test.cpp @@ -0,0 +1,108 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include <gtest/gtest.h> + +#include "olap/tablet_schema.h" +#include "vec/common/schema_util.h" + +namespace doris { + +class TabletIndexTest : public testing::Test {}; + +void construct_column(ColumnPB* column_pb, TabletIndexPB* tablet_index, int64_t index_id, + const std::string& index_name, int32_t col_unique_id, + const std::string& column_type, const std::string& column_name, + const IndexType& index_type, bool is_bf_column) { + column_pb->set_unique_id(col_unique_id); + column_pb->set_name(column_name); + column_pb->set_type(column_type); + column_pb->set_is_nullable(true); + column_pb->set_is_bf_column(is_bf_column); + tablet_index->set_index_id(index_id); + tablet_index->set_index_name(index_name); + tablet_index->set_index_type(index_type); + tablet_index->add_col_unique_id(col_unique_id); + if (index_type == IndexType::NGRAM_BF) { + auto* properties = tablet_index->mutable_properties(); + (*properties)["gram_size"] = "5"; + (*properties)["bf_size"] = "1024"; + } +} + +TEST_F(TabletIndexTest, test_inverted_index) { + TabletSchemaPB schema_pb; + schema_pb.set_keys_type(KeysType::DUP_KEYS); + schema_pb.set_inverted_index_storage_format(InvertedIndexStorageFormatPB::V2); + + construct_column(schema_pb.add_column(), schema_pb.add_index(), 10000, "key_index", 0, "INT", + "key", IndexType::INVERTED, true); + construct_column(schema_pb.add_column(), schema_pb.add_index(), 10001, "v1_index", 1, "STRING", + "v1", IndexType::INVERTED, false); + construct_column(schema_pb.add_column(), schema_pb.add_index(), 10002, "v2_index", 2, "STRING", + "v2", IndexType::NGRAM_BF, true); + + TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>(); + tablet_schema->init_from_pb(schema_pb); + + EXPECT_TRUE(tablet_schema->has_inverted_index()); + EXPECT_EQ(tablet_schema->inverted_indexes().size(), 2); + EXPECT_TRUE(tablet_schema->inverted_index(tablet_schema->column_by_uid(0)) != nullptr); + EXPECT_TRUE(tablet_schema->inverted_index(tablet_schema->column_by_uid(1)) != nullptr); + EXPECT_TRUE(tablet_schema->inverted_index(tablet_schema->column_by_uid(2)) == nullptr); + EXPECT_TRUE(tablet_schema->inverted_index(3) == nullptr); + EXPECT_TRUE(tablet_schema->inverted_index(4, "v1.a") == nullptr); +} + +TEST_F(TabletIndexTest, test_schema_index_diff) { + TabletSchemaPB new_schema_pb; + new_schema_pb.set_keys_type(KeysType::DUP_KEYS); + new_schema_pb.set_inverted_index_storage_format(InvertedIndexStorageFormatPB::V2); + + construct_column(new_schema_pb.add_column(), new_schema_pb.add_index(), 10000, "key_index", 0, + "INT", "key", IndexType::INVERTED, true); + construct_column(new_schema_pb.add_column(), new_schema_pb.add_index(), 10001, "v1_index", 1, + "STRING", "v1", IndexType::INVERTED, false); + construct_column(new_schema_pb.add_column(), new_schema_pb.add_index(), 10002, "v2_index", 2, + "STRING", "v2", IndexType::NGRAM_BF, true); + + TabletSchemaSPtr new_tablet_schema = std::make_shared<TabletSchema>(); + new_tablet_schema->init_from_pb(new_schema_pb); + + TabletSchemaPB old_schema_pb; + old_schema_pb.set_keys_type(KeysType::DUP_KEYS); + old_schema_pb.set_inverted_index_storage_format(InvertedIndexStorageFormatPB::V2); + + construct_column(old_schema_pb.add_column(), old_schema_pb.add_index(), 10000, "key_index", 0, + "INT", "key", IndexType::INVERTED, true); + construct_column(old_schema_pb.add_column(), old_schema_pb.add_index(), 10001, "v1_index", 1, + "STRING", "v1", IndexType::INVERTED, true); + construct_column(old_schema_pb.add_column(), old_schema_pb.add_index(), 10002, "v2_index", 2, + "STRING", "v2", IndexType::INVERTED, true); + + TabletSchemaSPtr old_tablet_schema = std::make_shared<TabletSchema>(); + old_tablet_schema->init_from_pb(old_schema_pb); + + EXPECT_FALSE(vectorized::schema_util::has_schema_index_diff(new_tablet_schema.get(), + old_tablet_schema.get(), 0, 0)); + EXPECT_TRUE(vectorized::schema_util::has_schema_index_diff(new_tablet_schema.get(), + old_tablet_schema.get(), 1, 1)); + EXPECT_TRUE(vectorized::schema_util::has_schema_index_diff(new_tablet_schema.get(), + old_tablet_schema.get(), 2, 2)); +} + +} // namespace doris --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org