This is an automated email from the ASF dual-hosted git repository.
zhangchen pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new 840503e8d58 [improve](cloud-mow) reduce ms update_delete_bitmap kv
confict (#47375)
840503e8d58 is described below
commit 840503e8d584b53635b92ead247ac02b3772d48b
Author: meiyi <[email protected]>
AuthorDate: Thu Jun 5 14:51:56 2025 +0800
[improve](cloud-mow) reduce ms update_delete_bitmap kv confict (#47375)
Problem Summary:
1. `update_delete_bitmap` may split to several transactions to avoid
delete bitmap size is larger than the fdb transaction limit
2. multi compaction jobs will change the initiators of the lock_info,
which will cause txn_conflict of `update_delete_bitmap`.
3. for update with multi transactions, the txn_confict error is more
easily to happen, even after some retries, the `update_delete_bitmap`
will fail
4. the root cause is multi compactions should not conflict, pr 48024
solve it
5. but branch-3.0 does not contain pr 48024, so modify the check lock_id
to snapshot read to avoid txn_conflict. if lock_id is changed, the final
`commit_txn` or `commit_job` can handle it
---
cloud/src/meta-service/meta_service.cpp | 14 +++++++++-----
cloud/src/meta-service/meta_service.h | 2 ++
cloud/src/meta-service/meta_service_job.cpp | 3 ---
3 files changed, 11 insertions(+), 8 deletions(-)
diff --git a/cloud/src/meta-service/meta_service.cpp
b/cloud/src/meta-service/meta_service.cpp
index 36b32063a96..45e64f475f6 100644
--- a/cloud/src/meta-service/meta_service.cpp
+++ b/cloud/src/meta-service/meta_service.cpp
@@ -1749,10 +1749,12 @@ void
MetaServiceImpl::get_tablet_stats(::google::protobuf::RpcController* contro
static bool check_delete_bitmap_lock(MetaServiceCode& code, std::string& msg,
std::stringstream& ss,
std::unique_ptr<Transaction>& txn,
int64_t table_id,
int64_t lock_id, int64_t lock_initiator,
std::string& lock_key,
- DeleteBitmapUpdateLockPB& lock_info) {
+ DeleteBitmapUpdateLockPB& lock_info,
+ bool snapshot_read = false) {
std::string lock_val;
- LOG(INFO) << "check_delete_bitmap_lock, table_id=" << table_id << " key="
<< hex(lock_key);
- auto err = txn->get(lock_key, &lock_val);
+ LOG(INFO) << "check_delete_bitmap_lock, table_id=" << table_id << " key="
<< hex(lock_key)
+ << ", lock_id=" << lock_id << ", snapshot_read=" <<
snapshot_read;
+ auto err = txn->get(lock_key, &lock_val, snapshot_read);
TEST_SYNC_POINT_CALLBACK("check_delete_bitmap_lock.inject_get_lock_key_err",
&err);
if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
msg = "lock id key not found";
@@ -1963,12 +1965,13 @@ void
MetaServiceImpl::update_delete_bitmap(google::protobuf::RpcController* cont
}
bool unlock = request->has_unlock() ? request->unlock() : false;
+ bool snapshot_read = request->lock_id() ==
COMPACTION_DELETE_BITMAP_LOCK_ID;
if (!unlock) {
// 1. Check whether the lock expires
std::string lock_key =
meta_delete_bitmap_update_lock_key({instance_id, table_id, -1});
DeleteBitmapUpdateLockPB lock_info;
if (!check_delete_bitmap_lock(code, msg, ss, txn, table_id,
request->lock_id(),
- request->initiator(), lock_key,
lock_info)) {
+ request->initiator(), lock_key,
lock_info, snapshot_read)) {
LOG(WARNING) << "failed to check delete bitmap lock, table_id=" <<
table_id
<< " request lock_id=" << request->lock_id()
<< " request initiator=" << request->initiator() << "
msg " << msg;
@@ -2079,7 +2082,8 @@ void
MetaServiceImpl::update_delete_bitmap(google::protobuf::RpcController* cont
meta_delete_bitmap_update_lock_key({instance_id,
table_id, -1});
DeleteBitmapUpdateLockPB lock_info;
if (!check_delete_bitmap_lock(code, msg, ss, txn, table_id,
request->lock_id(),
- request->initiator(), lock_key,
lock_info)) {
+ request->initiator(), lock_key,
lock_info,
+ snapshot_read)) {
LOG(WARNING) << "failed to check delete bitmap lock,
table_id=" << table_id
<< " request lock_id=" << request->lock_id()
<< " request initiator=" <<
request->initiator() << " msg " << msg;
diff --git a/cloud/src/meta-service/meta_service.h
b/cloud/src/meta-service/meta_service.h
index 6df09bd2c20..57f88d51dfe 100644
--- a/cloud/src/meta-service/meta_service.h
+++ b/cloud/src/meta-service/meta_service.h
@@ -39,6 +39,8 @@ namespace doris::cloud {
class Transaction;
constexpr std::string_view BUILT_IN_STORAGE_VAULT_NAME =
"built_in_storage_vault";
+static constexpr int COMPACTION_DELETE_BITMAP_LOCK_ID = -1;
+static constexpr int SCHEMA_CHANGE_DELETE_BITMAP_LOCK_ID = -2;
void internal_get_rowset(Transaction* txn, int64_t start, int64_t end,
const std::string& instance_id, int64_t tablet_id,
MetaServiceCode& code,
diff --git a/cloud/src/meta-service/meta_service_job.cpp
b/cloud/src/meta-service/meta_service_job.cpp
index 29f1c9993fd..3dd89afbb61 100644
--- a/cloud/src/meta-service/meta_service_job.cpp
+++ b/cloud/src/meta-service/meta_service_job.cpp
@@ -46,9 +46,6 @@ static inline constexpr size_t get_file_name_offset(const T
(&s)[S], size_t i =
namespace doris::cloud {
-static constexpr int COMPACTION_DELETE_BITMAP_LOCK_ID = -1;
-static constexpr int SCHEMA_CHANGE_DELETE_BITMAP_LOCK_ID = -2;
-
// check compaction input_versions are valid during schema change.
// If the schema change job doesnt have alter version, it dont need to check
// because the schema change job is come from old version BE.
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]