This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit e9d65bb4320972fcc2ef1c1f973b21c54b4cf414
Author: YueW <45946325+tany...@users.noreply.github.com>
AuthorDate: Sun Jul 9 16:45:27 2023 +0800

    [fix](inverted index) use index id instead of column uid to determine 
whether a hard link is required when build index (#21574)
    
    Fix problem:
    For the same column, there are concurrent drop index request and build 
index request, if build index obtain lock before drop index, build a new index 
file, but when drop index request execute, link file not contains all index 
files for the column, that lead to new index file is missed.
    
    Based on the above questions, use index id instead of column unique id to 
determine whether a hard link is required when do build index
---
 be/src/common/status.h             |  2 ++
 be/src/olap/rowset/beta_rowset.cpp | 64 ++++++++++++++++++--------------------
 be/src/olap/rowset/beta_rowset.h   |  2 +-
 be/src/olap/rowset/rowset.h        |  2 +-
 be/src/olap/task/index_builder.cpp | 42 ++++++++-----------------
 be/src/olap/task/index_builder.h   |  3 --
 be/test/testutil/mock_rowset.h     |  2 +-
 7 files changed, 48 insertions(+), 69 deletions(-)

diff --git a/be/src/common/status.h b/be/src/common/status.h
index 538b7de24f..45f9286933 100644
--- a/be/src/common/status.h
+++ b/be/src/common/status.h
@@ -266,6 +266,7 @@ E(INVERTED_INDEX_BYPASS, -6004);
 E(INVERTED_INDEX_NO_TERMS, -6005);
 E(INVERTED_INDEX_RENAME_FILE_FAILED, -6006);
 E(INVERTED_INDEX_EVALUATE_SKIPPED, -6007);
+E(INVERTED_INDEX_BUILD_WAITTING, -6008);
 #undef E
 } // namespace ErrorCode
 
@@ -296,6 +297,7 @@ constexpr bool capture_stacktrace() {
         && code != ErrorCode::INVERTED_INDEX_BYPASS
         && code != ErrorCode::INVERTED_INDEX_NO_TERMS
         && code != ErrorCode::INVERTED_INDEX_EVALUATE_SKIPPED
+        && code != ErrorCode::INVERTED_INDEX_BUILD_WAITTING
         && code != ErrorCode::META_KEY_NOT_FOUND
         && code != ErrorCode::PUSH_VERSION_ALREADY_EXIST
         && code != ErrorCode::TABLE_ALREADY_DELETED_ERROR
diff --git a/be/src/olap/rowset/beta_rowset.cpp 
b/be/src/olap/rowset/beta_rowset.cpp
index dd3073b0cd..d69621cf92 100644
--- a/be/src/olap/rowset/beta_rowset.cpp
+++ b/be/src/olap/rowset/beta_rowset.cpp
@@ -221,7 +221,7 @@ void BetaRowset::do_close() {
 
 Status BetaRowset::link_files_to(const std::string& dir, RowsetId 
new_rowset_id,
                                  size_t new_rowset_start_seg_id,
-                                 std::set<int32_t>* without_index_column_uids) 
{
+                                 std::set<int32_t>* without_index_uids) {
     DCHECK(is_local());
     auto fs = _rowset_meta->fs();
     if (!fs) {
@@ -246,45 +246,41 @@ Status BetaRowset::link_files_to(const std::string& dir, 
RowsetId new_rowset_id,
                          << "to=" << dst_path << ", errno=" << Errno::no();
             return Status::Error<OS_ERROR>();
         }
-        for (auto& column : _schema->columns()) {
-            if (without_index_column_uids != nullptr &&
-                without_index_column_uids->count(column.unique_id())) {
+        for (auto& index : _schema->indexes()) {
+            if (index.index_type() != IndexType::INVERTED) {
                 continue;
             }
-            const TabletIndex* index_meta = 
_schema->get_inverted_index(column.unique_id());
-            if (index_meta) {
-                std::string inverted_index_src_file_path =
-                        InvertedIndexDescriptor::get_index_file_name(src_path,
-                                                                     
index_meta->index_id());
-                std::string inverted_index_dst_file_path =
-                        InvertedIndexDescriptor::get_index_file_name(dst_path,
-                                                                     
index_meta->index_id());
 
-                bool need_to_link = true;
-                if (_schema->skip_write_index_on_load()) {
-                    local_fs->exists(inverted_index_src_file_path, 
&need_to_link);
-                    if (!need_to_link) {
-                        LOG(INFO) << "skip create hard link to not existed 
file="
-                                  << inverted_index_src_file_path;
-                    }
-                }
+            auto index_id = index.index_id();
+            if (without_index_uids != nullptr && 
without_index_uids->count(index_id)) {
+                continue;
+            }
 
-                if (need_to_link) {
-                    if (!local_fs->link_file(inverted_index_src_file_path,
-                                             inverted_index_dst_file_path)
-                                 .ok()) {
-                        LOG(WARNING)
-                                << "fail to create hard link. from=" << 
inverted_index_src_file_path
-                                << ", "
-                                << "to=" << inverted_index_dst_file_path
-                                << ", errno=" << Errno::no();
-                        return Status::Error<OS_ERROR>();
-                    }
-                    LOG(INFO) << "success to create hard link. from="
-                              << inverted_index_src_file_path << ", "
-                              << "to=" << inverted_index_dst_file_path;
+            std::string inverted_index_src_file_path =
+                    InvertedIndexDescriptor::get_index_file_name(src_path, 
index_id);
+            std::string inverted_index_dst_file_path =
+                    InvertedIndexDescriptor::get_index_file_name(dst_path, 
index_id);
+            bool need_to_link = true;
+            if (_schema->skip_write_index_on_load()) {
+                local_fs->exists(inverted_index_src_file_path, &need_to_link);
+                if (!need_to_link) {
+                    LOG(INFO) << "skip create hard link to not existed file="
+                              << inverted_index_src_file_path;
                 }
             }
+            if (need_to_link) {
+                if (!local_fs->link_file(inverted_index_src_file_path, 
inverted_index_dst_file_path)
+                             .ok()) {
+                    LOG(WARNING) << "fail to create hard link. from="
+                                 << inverted_index_src_file_path << ", "
+                                 << "to=" << inverted_index_dst_file_path
+                                 << ", errno=" << Errno::no();
+                    return Status::Error<OS_ERROR>();
+                }
+                LOG(INFO) << "success to create hard link. from=" << 
inverted_index_src_file_path
+                          << ", "
+                          << "to=" << inverted_index_dst_file_path;
+            }
         }
     }
     return Status::OK();
diff --git a/be/src/olap/rowset/beta_rowset.h b/be/src/olap/rowset/beta_rowset.h
index 643782c461..064b8fcb6e 100644
--- a/be/src/olap/rowset/beta_rowset.h
+++ b/be/src/olap/rowset/beta_rowset.h
@@ -72,7 +72,7 @@ public:
 
     Status link_files_to(const std::string& dir, RowsetId new_rowset_id,
                          size_t new_rowset_start_seg_id = 0,
-                         std::set<int32_t>* without_index_column_uids = 
nullptr) override;
+                         std::set<int32_t>* without_index_uids = nullptr) 
override;
 
     Status copy_files_to(const std::string& dir, const RowsetId& 
new_rowset_id) override;
 
diff --git a/be/src/olap/rowset/rowset.h b/be/src/olap/rowset/rowset.h
index 8829cc0770..d3a0c10447 100644
--- a/be/src/olap/rowset/rowset.h
+++ b/be/src/olap/rowset/rowset.h
@@ -203,7 +203,7 @@ public:
     // hard link all files in this rowset to `dir` to form a new rowset with 
id `new_rowset_id`.
     virtual Status link_files_to(const std::string& dir, RowsetId 
new_rowset_id,
                                  size_t new_rowset_start_seg_id = 0,
-                                 std::set<int32_t>* without_index_column_uids 
= nullptr) = 0;
+                                 std::set<int32_t>* without_index_uids = 
nullptr) = 0;
 
     // copy all files to `dir`
     virtual Status copy_files_to(const std::string& dir, const RowsetId& 
new_rowset_id) = 0;
diff --git a/be/src/olap/task/index_builder.cpp 
b/be/src/olap/task/index_builder.cpp
index 1022daa113..1dc1d5294a 100644
--- a/be/src/olap/task/index_builder.cpp
+++ b/be/src/olap/task/index_builder.cpp
@@ -74,6 +74,18 @@ Status IndexBuilder::update_inverted_index_info() {
             for (auto t_inverted_index : _alter_inverted_indexes) {
                 TabletIndex index;
                 index.init_from_thrift(t_inverted_index, 
*input_rs_tablet_schema);
+                auto column_uid = index.col_unique_ids()[0];
+                const TabletIndex* exist_index =
+                        
output_rs_tablet_schema->get_inverted_index(column_uid);
+                if (exist_index && exist_index->index_id() != 
index.index_id()) {
+                    // maybe there are concurrent drop index request did not 
obtain the lock,
+                    // so return error, to wait the drop index request 
finished.
+                    LOG(WARNING) << "column: " << column_uid << " has a exist 
inverted index"
+                                 << ", but the index id not equal request's 
index id, "
+                                 << ", exist index id: " << 
exist_index->index_id()
+                                 << ", request's index id: " << 
index.index_id();
+                    return 
Status::Error<ErrorCode::INVERTED_INDEX_BUILD_WAITTING>();
+                }
                 output_rs_tablet_schema->append_index(std::move(index));
             }
         }
@@ -94,11 +106,9 @@ Status IndexBuilder::update_inverted_index_info() {
         if (!status.ok()) {
             return Status::Error<ErrorCode::ROWSET_BUILDER_INIT>();
         }
-        std::set<int32_t> alter_column_ids_set =
-                
_rowset_alter_index_column_ids[input_rowset->rowset_id().to_string()];
         RETURN_IF_ERROR(input_rowset->link_files_to(_tablet->tablet_path(),
                                                     
output_rs_writer->rowset_id(), 0,
-                                                    &alter_column_ids_set)); 
// build output rowset
+                                                    &_alter_index_ids)); // 
build output rowset
 
         auto input_rowset_meta = input_rowset->rowset_meta();
         RowsetMetaSharedPtr rowset_meta = std::make_shared<RowsetMeta>();
@@ -402,8 +412,6 @@ Status IndexBuilder::do_build_inverted_index() {
         return Status::OK();
     }
 
-    _calc_alter_column_ids();
-
     auto st = update_inverted_index_info();
     if (!st.ok()) {
         LOG(WARNING) << "failed to update_inverted_index_info. "
@@ -473,28 +481,4 @@ void IndexBuilder::gc_output_rowset() {
     }
 }
 
-Status IndexBuilder::_calc_alter_column_ids() {
-    for (auto& rs : _input_rowsets) {
-        RowsetId rowset_id = rs->rowset_id();
-        auto rs_tablet_schema = rs->tablet_schema();
-        std::set<int32_t> alter_column_uids;
-        for (auto inverted_index : _alter_inverted_indexes) {
-            auto column_name = inverted_index.columns[0];
-            auto column_idx = rs_tablet_schema->field_index(column_name);
-            if (column_idx < 0) {
-                LOG(WARNING) << "referenced column was missing. "
-                             << "[column=" << column_name << " 
referenced_column=" << column_idx
-                             << "]";
-                continue;
-            }
-            auto column = rs_tablet_schema->column(column_idx);
-            alter_column_uids.insert(column.unique_id());
-        }
-        _rowset_alter_index_column_ids.insert(
-                std::make_pair(rowset_id.to_string(), alter_column_uids));
-    }
-
-    return Status::OK();
-}
-
 } // namespace doris
diff --git a/be/src/olap/task/index_builder.h b/be/src/olap/task/index_builder.h
index 9e406c22c1..2ff266f5a7 100644
--- a/be/src/olap/task/index_builder.h
+++ b/be/src/olap/task/index_builder.h
@@ -59,14 +59,11 @@ private:
                          const std::pair<int64_t, int64_t>& index_writer_sign, 
Field* field,
                          const uint8_t* null_map, const uint8_t** ptr, size_t 
num_rows);
 
-    Status _calc_alter_column_ids();
-
 private:
     TabletSharedPtr _tablet;
     std::vector<TColumn> _columns;
     std::vector<doris::TOlapTableIndex> _alter_inverted_indexes;
     bool _is_drop_op;
-    std::unordered_map<std::string, std::set<int32_t>> 
_rowset_alter_index_column_ids;
     std::set<int32_t> _alter_index_ids;
     std::vector<RowsetSharedPtr> _input_rowsets;
     std::vector<RowsetSharedPtr> _output_rowsets;
diff --git a/be/test/testutil/mock_rowset.h b/be/test/testutil/mock_rowset.h
index a16a37e3cd..93b76d9c67 100644
--- a/be/test/testutil/mock_rowset.h
+++ b/be/test/testutil/mock_rowset.h
@@ -30,7 +30,7 @@ class MockRowset : public Rowset {
     Status remove() override { return Status::NotSupported("MockRowset not 
support this method."); }
 
     Status link_files_to(const std::string& dir, RowsetId new_rowset_id, 
size_t start_seg_id,
-                         std::set<int32_t>* without_index_column_uids) 
override {
+                         std::set<int32_t>* without_index_uids) override {
         return Status::NotSupported("MockRowset not support this method.");
     }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to