qidaye commented on code in PR #30145: URL: https://github.com/apache/doris/pull/30145#discussion_r1500173120
########## be/src/olap/compaction.cpp: ########## @@ -490,29 +492,70 @@ Status CompactionMixin::do_inverted_index_compaction() { return Status::OK(); } - // src index files + // create index_writer to compaction indexes + const auto& fs = _output_rowset->rowset_meta()->fs(); + const auto& tablet_path = _tablet->tablet_path(); + + // src index dirs // format: rowsetId_segmentId - std::vector<std::string> src_index_files(src_segment_num); + std::vector<std::unique_ptr<InvertedIndexFileReader>> inverted_index_file_readers( + src_segment_num); for (const auto& m : src_seg_to_id_map) { std::pair<RowsetId, uint32_t> p = m.first; - src_index_files[m.second] = p.first.to_string() + "_" + std::to_string(p.second); + auto segment_file_name = p.first.to_string() + "_" + std::to_string(p.second) + ".dat"; + auto inverted_index_file_reader = std::make_unique<InvertedIndexFileReader>( + fs, tablet_path, segment_file_name, + _cur_tablet_schema->get_inverted_index_storage_format()); + bool open_idx_file_cache = false; + auto st = inverted_index_file_reader->init(config::inverted_index_read_buffer_size, + open_idx_file_cache); + if (!st.ok()) { + LOG(ERROR) << "init inverted index " + << InvertedIndexDescriptor::get_index_file_name(segment_file_name) + << " failed in compaction when init inverted index file reader"; + return st; + } + inverted_index_file_readers[m.second] = std::move(inverted_index_file_reader); } // dest index files // format: rowsetId_segmentId - std::vector<std::string> dest_index_files(dest_segment_num); + std::vector<std::unique_ptr<InvertedIndexFileWriter>> inverted_index_file_writers( + dest_segment_num); for (int i = 0; i < dest_segment_num; ++i) { - auto prefix = dest_rowset_id.to_string() + "_" + std::to_string(i); - dest_index_files[i] = prefix; + auto prefix = dest_rowset_id.to_string() + "_" + std::to_string(i) + ".dat"; + auto inverted_index_file_reader = std::make_unique<InvertedIndexFileReader>( + fs, tablet_path, prefix, _cur_tablet_schema->get_inverted_index_storage_format()); + bool open_idx_file_cache = false; + auto st = inverted_index_file_reader->init(config::inverted_index_read_buffer_size, + open_idx_file_cache); + if (st.ok()) { Review Comment: Add some comments to explain the logic. ########## be/src/olap/compaction.cpp: ########## @@ -582,53 +640,54 @@ void CompactionMixin::construct_skip_inverted_index(RowsetWriterContext& ctx) { for (auto i = 0; i < rowset->num_segments(); i++) { auto segment_file = rowset->segment_file_path(i); - std::string inverted_index_src_file_path = - InvertedIndexDescriptor::get_index_file_name( - segment_file, index_meta->index_id(), - index_meta->get_index_suffix()); + io::Path segment_path(segment_file); + auto inverted_index_file_reader = std::make_unique<InvertedIndexFileReader>( + fs, segment_path.parent_path(), segment_path.filename(), + _cur_tablet_schema->get_inverted_index_storage_format()); + bool open_idx_file_cache = false; + auto st = inverted_index_file_reader->init(config::inverted_index_read_buffer_size, + open_idx_file_cache); + if (!st.ok()) { + LOG(WARNING) << "init index " + << inverted_index_file_reader->get_index_file_path(index_meta) + << " error:" << st; + return false; + } + bool exists = false; - if (!fs->exists(inverted_index_src_file_path, &exists).ok()) { - LOG(ERROR) << inverted_index_src_file_path << " fs->exists error"; + if (!inverted_index_file_reader->index_file_exist(index_meta, &exists).ok()) { + LOG(ERROR) << inverted_index_file_reader->get_index_file_path(index_meta) + << " fs->exists error"; return false; } if (!exists) { LOG(WARNING) << "tablet[" << _tablet->tablet_id() << "] column_unique_id[" - << col_unique_id << "]," << inverted_index_src_file_path + << col_unique_id << "]," + << inverted_index_file_reader->get_index_file_path(index_meta) << " is not exists, will skip index compaction"; return false; } - // check idx file size Review Comment: Why remove this check? ########## be/src/olap/tablet_meta.cpp: ########## @@ -168,6 +170,18 @@ TabletMeta::TabletMeta(int64_t table_id, int64_t partition_id, int64_t tablet_id break; } + switch (inverted_index_storage_format) { + case TInvertedIndexStorageFormat::V1: + schema->set_inverted_index_storage_format(InvertedIndexStorageFormatPB::V1); + break; + case TInvertedIndexStorageFormat::V2: + schema->set_inverted_index_storage_format(InvertedIndexStorageFormatPB::V2); + break; + default: + schema->set_inverted_index_storage_format(InvertedIndexStorageFormatPB::V2); + break; Review Comment: Maybe merge to one break? ########## gensrc/thrift/AgentService.thrift: ########## @@ -151,7 +159,7 @@ struct TCreateTabletReq { 24: optional i64 time_series_compaction_file_count_threshold = 2000 25: optional i64 time_series_compaction_time_threshold_seconds = 3600 26: optional i64 time_series_compaction_empty_rowsets_threshold = 5 - + 27: optional TInvertedIndexStorageFormat inverted_index_storage_format Review Comment: Default value? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org