xiaokang commented on code in PR #30145: URL: https://github.com/apache/doris/pull/30145#discussion_r1501942443
########## be/src/olap/task/index_builder.cpp: ########## @@ -67,39 +67,34 @@ Status IndexBuilder::update_inverted_index_info() { TabletSchemaSPtr output_rs_tablet_schema = std::make_shared<TabletSchema>(); const auto& input_rs_tablet_schema = input_rowset->tablet_schema(); output_rs_tablet_schema->copy_from(*input_rs_tablet_schema); + size_t total_index_size = 0; + auto* beta_rowset = reinterpret_cast<BetaRowset*>(input_rowset.get()); + auto size_st = beta_rowset->get_inverted_index_size(&total_index_size); + if (!size_st.ok() && !size_st.is<ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND>() && + !size_st.is<ErrorCode::NOT_FOUND>()) { + return size_st; + } if (_is_drop_op) { - size_t total_index_size = 0; for (const auto& t_inverted_index : _alter_inverted_indexes) { - auto* beta_rowset = reinterpret_cast<BetaRowset*>(input_rowset.get()); - size_t index_size = 0; - RETURN_IF_ERROR(beta_rowset->get_inverted_index_size_by_index_id( - t_inverted_index.index_id, &index_size)); - total_index_size += index_size; + DCHECK_EQ(t_inverted_index.columns.size(), 1); + auto column_name = t_inverted_index.columns[0]; + auto column_idx = output_rs_tablet_schema->field_index(column_name); + if (column_idx < 0) { + LOG(WARNING) << "referenced column was missing. " + << "[column=" << column_name << " referenced_column=" << column_idx + << "]"; + continue; + } + auto column = output_rs_tablet_schema->column(column_idx); + const auto* index_meta = output_rs_tablet_schema->get_inverted_index(column); + if (index_meta == nullptr) { + LOG(ERROR) << "failed to find column: " << column_name + << " index_id: " << t_inverted_index.index_id; + continue; + } + _drop_indices_meta.push_back(*index_meta); output_rs_tablet_schema->remove_index(t_inverted_index.index_id); } - - auto input_rowset_meta = input_rowset->rowset_meta(); - auto update_disk_size = [&](size_t& disk_size, const std::string& size_type) { - if (disk_size >= total_index_size) { - disk_size -= total_index_size; - } else { - LOG(WARNING) << "rowset " << input_rowset_meta->rowset_id() << " " << size_type - << " size:" << disk_size - << " is less than index size:" << total_index_size; - } - }; - - size_t before_size = input_rowset_meta->total_disk_size(); - update_disk_size(before_size, "total disk"); - input_rowset_meta->set_total_disk_size(before_size); Review Comment: mark: why delete ########## be/src/olap/task/index_builder.cpp: ########## @@ -175,7 +187,52 @@ Status IndexBuilder::update_inverted_index_info() { Status IndexBuilder::handle_single_rowset(RowsetMetaSharedPtr output_rowset_meta, std::vector<segment_v2::SegmentSharedPtr>& segments) { if (_is_drop_op) { - // delete invertd index file by gc thread when gc input rowset + std::string segment_dir = _tablet->tablet_path(); Review Comment: why not the same as before? ########## be/src/olap/task/index_builder.cpp: ########## @@ -189,6 +246,18 @@ Status IndexBuilder::handle_single_rowset(RowsetMetaSharedPtr output_rowset_meta std::vector<ColumnId> return_columns; std::vector<std::pair<int64_t, int64_t>> inverted_index_writer_signs; _olap_data_convertor->reserve(_alter_inverted_indexes.size()); + auto idx_file_reader_iter = _inverted_index_file_readers.find( + std::make_pair(output_rowset_meta->rowset_id().to_string(), seg_ptr->id())); + if (idx_file_reader_iter == _inverted_index_file_readers.end()) { + LOG(ERROR) << "idx_file_reader_iter" << output_rowset_meta->rowset_id() << ":" + << seg_ptr->id() << " cannot be found"; + continue; + } + auto dirs = DORIS_TRY(idx_file_reader_iter->second->get_all_directories()); + auto inverted_index_file_writer = std::make_unique<InvertedIndexFileWriter>( + fs, segment_dir, segment_filename, + output_rowset_schema->get_inverted_index_storage_format()); + RETURN_IF_ERROR(inverted_index_file_writer->initialize(dirs)); Review Comment: why initialize writer with reader dirs? ########## be/src/olap/task/index_builder.cpp: ########## @@ -189,6 +246,18 @@ Status IndexBuilder::handle_single_rowset(RowsetMetaSharedPtr output_rowset_meta std::vector<ColumnId> return_columns; std::vector<std::pair<int64_t, int64_t>> inverted_index_writer_signs; _olap_data_convertor->reserve(_alter_inverted_indexes.size()); + auto idx_file_reader_iter = _inverted_index_file_readers.find( + std::make_pair(output_rowset_meta->rowset_id().to_string(), seg_ptr->id())); + if (idx_file_reader_iter == _inverted_index_file_readers.end()) { + LOG(ERROR) << "idx_file_reader_iter" << output_rowset_meta->rowset_id() << ":" + << seg_ptr->id() << " cannot be found"; + continue; + } + auto dirs = DORIS_TRY(idx_file_reader_iter->second->get_all_directories()); + auto inverted_index_file_writer = std::make_unique<InvertedIndexFileWriter>( + fs, segment_dir, segment_filename, + output_rowset_schema->get_inverted_index_storage_format()); + RETURN_IF_ERROR(inverted_index_file_writer->initialize(dirs)); Review Comment: It's straightforward to init writers by index_meta as in normal writh path. ########## be/src/olap/task/index_builder.cpp: ########## @@ -137,6 +132,23 @@ Status IndexBuilder::update_inverted_index_info() { auto output_rs_writer = DORIS_TRY(_tablet->create_rowset_writer(context, false)); _pending_rs_guards.push_back(_engine.add_pending_rowset(context)); + for (int seg_id = 0; seg_id < num_segments; seg_id++) { + auto segment_full_path = io::Path( + static_cast<BetaRowset*>(input_rowset.get())->segment_file_path(seg_id)); + std::string segment_filename = segment_full_path.filename().native(); + auto segment_dir = segment_full_path.parent_path(); + auto idx_file_reader = std::make_unique<InvertedIndexFileReader>( + context.fs, segment_dir, segment_filename, + output_rs_tablet_schema->get_inverted_index_storage_format()); + auto st = idx_file_reader->init(); + if (!st.ok() && !st.is<ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND>()) { + return st; + } + _inverted_index_file_readers.emplace( Review Comment: mark: why need index reader? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org