This is an automated email from the ASF dual-hosted git repository. kxiao pushed a commit to branch branch-2.0 in repository https://gitbox.apache.org/repos/asf/doris.git
commit e9d65bb4320972fcc2ef1c1f973b21c54b4cf414 Author: YueW <45946325+tany...@users.noreply.github.com> AuthorDate: Sun Jul 9 16:45:27 2023 +0800 [fix](inverted index) use index id instead of column uid to determine whether a hard link is required when build index (#21574) Fix problem: For the same column, there are concurrent drop index request and build index request, if build index obtain lock before drop index, build a new index file, but when drop index request execute, link file not contains all index files for the column, that lead to new index file is missed. Based on the above questions, use index id instead of column unique id to determine whether a hard link is required when do build index --- be/src/common/status.h | 2 ++ be/src/olap/rowset/beta_rowset.cpp | 64 ++++++++++++++++++-------------------- be/src/olap/rowset/beta_rowset.h | 2 +- be/src/olap/rowset/rowset.h | 2 +- be/src/olap/task/index_builder.cpp | 42 ++++++++----------------- be/src/olap/task/index_builder.h | 3 -- be/test/testutil/mock_rowset.h | 2 +- 7 files changed, 48 insertions(+), 69 deletions(-) diff --git a/be/src/common/status.h b/be/src/common/status.h index 538b7de24f..45f9286933 100644 --- a/be/src/common/status.h +++ b/be/src/common/status.h @@ -266,6 +266,7 @@ E(INVERTED_INDEX_BYPASS, -6004); E(INVERTED_INDEX_NO_TERMS, -6005); E(INVERTED_INDEX_RENAME_FILE_FAILED, -6006); E(INVERTED_INDEX_EVALUATE_SKIPPED, -6007); +E(INVERTED_INDEX_BUILD_WAITTING, -6008); #undef E } // namespace ErrorCode @@ -296,6 +297,7 @@ constexpr bool capture_stacktrace() { && code != ErrorCode::INVERTED_INDEX_BYPASS && code != ErrorCode::INVERTED_INDEX_NO_TERMS && code != ErrorCode::INVERTED_INDEX_EVALUATE_SKIPPED + && code != ErrorCode::INVERTED_INDEX_BUILD_WAITTING && code != ErrorCode::META_KEY_NOT_FOUND && code != ErrorCode::PUSH_VERSION_ALREADY_EXIST && code != ErrorCode::TABLE_ALREADY_DELETED_ERROR diff --git a/be/src/olap/rowset/beta_rowset.cpp b/be/src/olap/rowset/beta_rowset.cpp index dd3073b0cd..d69621cf92 100644 --- a/be/src/olap/rowset/beta_rowset.cpp +++ b/be/src/olap/rowset/beta_rowset.cpp @@ -221,7 +221,7 @@ void BetaRowset::do_close() { Status BetaRowset::link_files_to(const std::string& dir, RowsetId new_rowset_id, size_t new_rowset_start_seg_id, - std::set<int32_t>* without_index_column_uids) { + std::set<int32_t>* without_index_uids) { DCHECK(is_local()); auto fs = _rowset_meta->fs(); if (!fs) { @@ -246,45 +246,41 @@ Status BetaRowset::link_files_to(const std::string& dir, RowsetId new_rowset_id, << "to=" << dst_path << ", errno=" << Errno::no(); return Status::Error<OS_ERROR>(); } - for (auto& column : _schema->columns()) { - if (without_index_column_uids != nullptr && - without_index_column_uids->count(column.unique_id())) { + for (auto& index : _schema->indexes()) { + if (index.index_type() != IndexType::INVERTED) { continue; } - const TabletIndex* index_meta = _schema->get_inverted_index(column.unique_id()); - if (index_meta) { - std::string inverted_index_src_file_path = - InvertedIndexDescriptor::get_index_file_name(src_path, - index_meta->index_id()); - std::string inverted_index_dst_file_path = - InvertedIndexDescriptor::get_index_file_name(dst_path, - index_meta->index_id()); - bool need_to_link = true; - if (_schema->skip_write_index_on_load()) { - local_fs->exists(inverted_index_src_file_path, &need_to_link); - if (!need_to_link) { - LOG(INFO) << "skip create hard link to not existed file=" - << inverted_index_src_file_path; - } - } + auto index_id = index.index_id(); + if (without_index_uids != nullptr && without_index_uids->count(index_id)) { + continue; + } - if (need_to_link) { - if (!local_fs->link_file(inverted_index_src_file_path, - inverted_index_dst_file_path) - .ok()) { - LOG(WARNING) - << "fail to create hard link. from=" << inverted_index_src_file_path - << ", " - << "to=" << inverted_index_dst_file_path - << ", errno=" << Errno::no(); - return Status::Error<OS_ERROR>(); - } - LOG(INFO) << "success to create hard link. from=" - << inverted_index_src_file_path << ", " - << "to=" << inverted_index_dst_file_path; + std::string inverted_index_src_file_path = + InvertedIndexDescriptor::get_index_file_name(src_path, index_id); + std::string inverted_index_dst_file_path = + InvertedIndexDescriptor::get_index_file_name(dst_path, index_id); + bool need_to_link = true; + if (_schema->skip_write_index_on_load()) { + local_fs->exists(inverted_index_src_file_path, &need_to_link); + if (!need_to_link) { + LOG(INFO) << "skip create hard link to not existed file=" + << inverted_index_src_file_path; } } + if (need_to_link) { + if (!local_fs->link_file(inverted_index_src_file_path, inverted_index_dst_file_path) + .ok()) { + LOG(WARNING) << "fail to create hard link. from=" + << inverted_index_src_file_path << ", " + << "to=" << inverted_index_dst_file_path + << ", errno=" << Errno::no(); + return Status::Error<OS_ERROR>(); + } + LOG(INFO) << "success to create hard link. from=" << inverted_index_src_file_path + << ", " + << "to=" << inverted_index_dst_file_path; + } } } return Status::OK(); diff --git a/be/src/olap/rowset/beta_rowset.h b/be/src/olap/rowset/beta_rowset.h index 643782c461..064b8fcb6e 100644 --- a/be/src/olap/rowset/beta_rowset.h +++ b/be/src/olap/rowset/beta_rowset.h @@ -72,7 +72,7 @@ public: Status link_files_to(const std::string& dir, RowsetId new_rowset_id, size_t new_rowset_start_seg_id = 0, - std::set<int32_t>* without_index_column_uids = nullptr) override; + std::set<int32_t>* without_index_uids = nullptr) override; Status copy_files_to(const std::string& dir, const RowsetId& new_rowset_id) override; diff --git a/be/src/olap/rowset/rowset.h b/be/src/olap/rowset/rowset.h index 8829cc0770..d3a0c10447 100644 --- a/be/src/olap/rowset/rowset.h +++ b/be/src/olap/rowset/rowset.h @@ -203,7 +203,7 @@ public: // hard link all files in this rowset to `dir` to form a new rowset with id `new_rowset_id`. virtual Status link_files_to(const std::string& dir, RowsetId new_rowset_id, size_t new_rowset_start_seg_id = 0, - std::set<int32_t>* without_index_column_uids = nullptr) = 0; + std::set<int32_t>* without_index_uids = nullptr) = 0; // copy all files to `dir` virtual Status copy_files_to(const std::string& dir, const RowsetId& new_rowset_id) = 0; diff --git a/be/src/olap/task/index_builder.cpp b/be/src/olap/task/index_builder.cpp index 1022daa113..1dc1d5294a 100644 --- a/be/src/olap/task/index_builder.cpp +++ b/be/src/olap/task/index_builder.cpp @@ -74,6 +74,18 @@ Status IndexBuilder::update_inverted_index_info() { for (auto t_inverted_index : _alter_inverted_indexes) { TabletIndex index; index.init_from_thrift(t_inverted_index, *input_rs_tablet_schema); + auto column_uid = index.col_unique_ids()[0]; + const TabletIndex* exist_index = + output_rs_tablet_schema->get_inverted_index(column_uid); + if (exist_index && exist_index->index_id() != index.index_id()) { + // maybe there are concurrent drop index request did not obtain the lock, + // so return error, to wait the drop index request finished. + LOG(WARNING) << "column: " << column_uid << " has a exist inverted index" + << ", but the index id not equal request's index id, " + << ", exist index id: " << exist_index->index_id() + << ", request's index id: " << index.index_id(); + return Status::Error<ErrorCode::INVERTED_INDEX_BUILD_WAITTING>(); + } output_rs_tablet_schema->append_index(std::move(index)); } } @@ -94,11 +106,9 @@ Status IndexBuilder::update_inverted_index_info() { if (!status.ok()) { return Status::Error<ErrorCode::ROWSET_BUILDER_INIT>(); } - std::set<int32_t> alter_column_ids_set = - _rowset_alter_index_column_ids[input_rowset->rowset_id().to_string()]; RETURN_IF_ERROR(input_rowset->link_files_to(_tablet->tablet_path(), output_rs_writer->rowset_id(), 0, - &alter_column_ids_set)); // build output rowset + &_alter_index_ids)); // build output rowset auto input_rowset_meta = input_rowset->rowset_meta(); RowsetMetaSharedPtr rowset_meta = std::make_shared<RowsetMeta>(); @@ -402,8 +412,6 @@ Status IndexBuilder::do_build_inverted_index() { return Status::OK(); } - _calc_alter_column_ids(); - auto st = update_inverted_index_info(); if (!st.ok()) { LOG(WARNING) << "failed to update_inverted_index_info. " @@ -473,28 +481,4 @@ void IndexBuilder::gc_output_rowset() { } } -Status IndexBuilder::_calc_alter_column_ids() { - for (auto& rs : _input_rowsets) { - RowsetId rowset_id = rs->rowset_id(); - auto rs_tablet_schema = rs->tablet_schema(); - std::set<int32_t> alter_column_uids; - for (auto inverted_index : _alter_inverted_indexes) { - auto column_name = inverted_index.columns[0]; - auto column_idx = rs_tablet_schema->field_index(column_name); - if (column_idx < 0) { - LOG(WARNING) << "referenced column was missing. " - << "[column=" << column_name << " referenced_column=" << column_idx - << "]"; - continue; - } - auto column = rs_tablet_schema->column(column_idx); - alter_column_uids.insert(column.unique_id()); - } - _rowset_alter_index_column_ids.insert( - std::make_pair(rowset_id.to_string(), alter_column_uids)); - } - - return Status::OK(); -} - } // namespace doris diff --git a/be/src/olap/task/index_builder.h b/be/src/olap/task/index_builder.h index 9e406c22c1..2ff266f5a7 100644 --- a/be/src/olap/task/index_builder.h +++ b/be/src/olap/task/index_builder.h @@ -59,14 +59,11 @@ private: const std::pair<int64_t, int64_t>& index_writer_sign, Field* field, const uint8_t* null_map, const uint8_t** ptr, size_t num_rows); - Status _calc_alter_column_ids(); - private: TabletSharedPtr _tablet; std::vector<TColumn> _columns; std::vector<doris::TOlapTableIndex> _alter_inverted_indexes; bool _is_drop_op; - std::unordered_map<std::string, std::set<int32_t>> _rowset_alter_index_column_ids; std::set<int32_t> _alter_index_ids; std::vector<RowsetSharedPtr> _input_rowsets; std::vector<RowsetSharedPtr> _output_rowsets; diff --git a/be/test/testutil/mock_rowset.h b/be/test/testutil/mock_rowset.h index a16a37e3cd..93b76d9c67 100644 --- a/be/test/testutil/mock_rowset.h +++ b/be/test/testutil/mock_rowset.h @@ -30,7 +30,7 @@ class MockRowset : public Rowset { Status remove() override { return Status::NotSupported("MockRowset not support this method."); } Status link_files_to(const std::string& dir, RowsetId new_rowset_id, size_t start_seg_id, - std::set<int32_t>* without_index_column_uids) override { + std::set<int32_t>* without_index_uids) override { return Status::NotSupported("MockRowset not support this method."); } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org