hust-hhb commented on code in PR #50062:
URL: https://github.com/apache/doris/pull/50062#discussion_r2076731762


##########
cloud/src/recycler/checker.cpp:
##########
@@ -1189,4 +1195,75 @@ int 
InstanceChecker::do_delete_bitmap_storage_optimize_check() {
     return (failed_tablets_num > 0) ? 1 : 0;
 }
 
+int InstanceChecker::do_mow_compaction_key_check() {
+    std::unique_ptr<RangeGetIterator> it;
+    std::string begin = mow_tablet_compaction_key({instance_id_, 0, 0});
+    std::string end = mow_tablet_compaction_key({instance_id_, INT64_MAX, 0});
+    MowTabletCompactionPB mow_tablet_compaction;
+    do {
+        std::unique_ptr<Transaction> txn;
+        TxnErrorCode err = txn_kv_->create_txn(&txn);
+        if (err != TxnErrorCode::TXN_OK) {
+            LOG(WARNING) << "failed to create txn";
+            return -1;
+        }
+        err = txn->get(begin, end, &it);
+        if (err != TxnErrorCode::TXN_OK) {
+            LOG(WARNING) << "failed to get mow tablet compaction key, err=" << 
err;
+            return -1;
+        }
+        int64_t now = duration_cast<std::chrono::seconds>(
+                              
std::chrono::system_clock::now().time_since_epoch())
+                              .count();
+        while (it->has_next() && !stopped()) {
+            auto [k, v] = it->next();
+            std::string_view k1 = k;
+            k1.remove_prefix(1);
+            std::vector<std::tuple<std::variant<int64_t, std::string>, int, 
int>> out;
+            decode_key(&k1, &out);
+            // 0x01 "meta" ${instance_id} "mow_tablet_comp" ${table_id} 
${initiator}
+            auto table_id = std::get<int64_t>(std::get<0>(out[3]));
+            auto initiator = std::get<int64_t>(std::get<0>(out[4]));
+            if (!mow_tablet_compaction.ParseFromArray(v.data(), v.size())) 
[[unlikely]] {
+                LOG(WARNING) << "failed to parse MowTabletCompactionPB";
+                return -1;
+            }
+            int64_t expiration = mow_tablet_compaction.expiration();
+            //check compaction key failed should meet both following two 
condition:
+            //1.compaction key is expired
+            //2.table lock key is not found or key is not expired
+            if (expiration < now - 
config::compaction_key_check_expiration_diff_seconds) {
+                std::string lock_key =
+                        meta_delete_bitmap_update_lock_key({instance_id_, 
table_id, -1});
+                std::string lock_val;
+                err = txn->get(lock_key, &lock_val);
+                std::string reason = "";
+                if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
+                    reason = "table lock key not found";
+
+                } else {
+                    DeleteBitmapUpdateLockPB lock_info;
+                    if (!lock_info.ParseFromString(lock_val)) [[unlikely]] {
+                        LOG(WARNING) << "failed to parse 
DeleteBitmapUpdateLockPB";
+                        return -1;
+                    }
+                    if (lock_info.expiration() > now) {

Review Comment:
   升级过程中出现compaction key残留(compaction在新ms请求锁,去旧ms释放锁失败了,导致compaction key残留)
   接下来会出现几个可能的场景:
   a.导入抢到锁,抢锁过程中,会检查并删除过期的compaction key,锁没过期
   b.导入抢到锁,抢锁过程中,会检查并删除过期的compaction key,执行完导入,并释放锁,此时锁不存在
   c.导入抢锁释放锁,lock key不存在,compaction抢到锁,抢锁过程中会检查并删除过期的compaction key,锁没过期
   d.compaction持续抢到锁,但是锁的过期时间已经不会更新(假设所有ms都走新路径),这个时候compaction key依然残留
   综上,a,b,c这三种场景,应该会删掉compaction key,如果依然存在过期的compaction key,说明删除没work
   所以checker这里,只要出现锁没过期或者锁不存在的场景,但是又有过期的compaction key,就说明删除执行的有问题



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to