This is an automated email from the ASF dual-hosted git repository.
dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new 9124197bf73 [feat](cloud) Add a config for cloud txn lazy commit fuzzy
test (#50314) (#50544)
9124197bf73 is described below
commit 9124197bf73947059112000f524d62e7efbdefd2
Author: Lei Zhang <[email protected]>
AuthorDate: Wed Apr 30 15:58:14 2025 +0800
[feat](cloud) Add a config for cloud txn lazy commit fuzzy test (#50314)
(#50544)
* when setting `enable_cloud_txn_lazy_commit_fuzzy_test=true`,
`commit_txn` will be fifty percent using `commit_txn_eventually`
---
cloud/src/common/config.h | 2 ++
cloud/src/meta-service/meta_service_txn.cpp | 40 ++++++++++++++++++----
cloud/test/txn_lazy_commit_test.cpp | 35 +++++++++++++++++++
.../pipeline/cloud_p0/conf/ms_custom.conf | 1 +
.../pipeline/cloud_p1/conf/ms_custom.conf | 1 +
5 files changed, 72 insertions(+), 7 deletions(-)
diff --git a/cloud/src/common/config.h b/cloud/src/common/config.h
index 25e52e01a75..70d8ca8849f 100644
--- a/cloud/src/common/config.h
+++ b/cloud/src/common/config.h
@@ -262,6 +262,8 @@ CONF_Int32(txn_lazy_max_rowsets_per_batch, "1000");
// max TabletIndexPB num for batch get
CONF_Int32(max_tablet_index_num_per_batch, "1000");
+CONF_Bool(enable_cloud_txn_lazy_commit_fuzzy_test, "false");
+
CONF_Bool(enable_check_instance_id, "true");
// Check if ip eq 127.0.0.1, ms/recycler exit
diff --git a/cloud/src/meta-service/meta_service_txn.cpp
b/cloud/src/meta-service/meta_service_txn.cpp
index b251ec64ed0..21e7dbe1a0c 100644
--- a/cloud/src/meta-service/meta_service_txn.cpp
+++ b/cloud/src/meta-service/meta_service_txn.cpp
@@ -1703,7 +1703,7 @@ void commit_txn_eventually(
DCHECK(txn_info.txn_id() == txn_id);
if (txn_info.status() == TxnStatusPB::TXN_STATUS_ABORTED) {
code = MetaServiceCode::TXN_ALREADY_ABORTED;
- ss << "transaction is already aborted: db_id=" << db_id << "
txn_id=" << txn_id;
+ ss << "transaction [" << txn_id << "] is already aborted, db_id="
<< db_id;
msg = ss.str();
LOG(WARNING) << msg;
return;
@@ -2546,6 +2546,17 @@ void commit_txn_with_sub_txn(const CommitTxnRequest*
request, CommitTxnResponse*
response->mutable_txn_info()->CopyFrom(txn_info);
} // end commit_txn_with_sub_txn
+static bool fuzzy_random() {
+ return std::chrono::steady_clock::now().time_since_epoch().count() & 0x01;
+}
+
+static bool force_txn_lazy_commit() {
+ if (config::enable_cloud_txn_lazy_commit_fuzzy_test) [[unlikely]] {
+ return fuzzy_random();
+ }
+ return false;
+}
+
void MetaServiceImpl::commit_txn(::google::protobuf::RpcController* controller,
const CommitTxnRequest* request,
CommitTxnResponse* response,
::google::protobuf::Closure* done) {
@@ -2581,26 +2592,41 @@ void
MetaServiceImpl::commit_txn(::google::protobuf::RpcController* controller,
}
TxnErrorCode err = TxnErrorCode::TXN_OK;
- bool allow_txn_lazy_commit =
+ bool enable_txn_lazy_commit_feature =
(request->has_is_2pc() && !request->is_2pc() &&
request->has_enable_txn_lazy_commit() &&
request->enable_txn_lazy_commit() &&
config::enable_cloud_txn_lazy_commit);
- if (!allow_txn_lazy_commit ||
- (tmp_rowsets_meta.size() <= config::txn_lazy_commit_rowsets_thresold))
{
+ while ((!enable_txn_lazy_commit_feature ||
+ (tmp_rowsets_meta.size() <=
config::txn_lazy_commit_rowsets_thresold))) {
+ if (force_txn_lazy_commit()) {
+ LOG(INFO) << "fuzzy test force_txn_lazy_commit, txn_id=" << txn_id;
+ break;
+ }
+
commit_txn_immediately(request, response, txn_kv_,
txn_lazy_committer_, code, msg,
instance_id, db_id, tmp_rowsets_meta, err);
- if ((MetaServiceCode::OK == code) ||
(TxnErrorCode::TXN_BYTES_TOO_LARGE != err) ||
- !allow_txn_lazy_commit) {
+
+ if (MetaServiceCode::OK == code) {
+ return;
+ }
+
+ if (TxnErrorCode::TXN_BYTES_TOO_LARGE != err) {
+ return;
+ }
+
+ if (!enable_txn_lazy_commit_feature) {
if (err == TxnErrorCode::TXN_BYTES_TOO_LARGE) {
msg += ", likely due to committing too many tablets. "
"Please reduce the number of partitions involved in the
load.";
}
return;
}
+
DCHECK(code != MetaServiceCode::OK);
- DCHECK(allow_txn_lazy_commit);
+ DCHECK(enable_txn_lazy_commit_feature);
DCHECK(err == TxnErrorCode::TXN_BYTES_TOO_LARGE);
LOG(INFO) << "txn_id=" << txn_id << " fallthrough
commit_txn_eventually";
+ break;
}
LOG(INFO) << "txn_id=" << txn_id << " commit_txn_eventually"
diff --git a/cloud/test/txn_lazy_commit_test.cpp
b/cloud/test/txn_lazy_commit_test.cpp
index 0f284508a3f..5737e6e9eef 100644
--- a/cloud/test/txn_lazy_commit_test.cpp
+++ b/cloud/test/txn_lazy_commit_test.cpp
@@ -41,6 +41,7 @@
#include "meta-service/mem_txn_kv.h"
#include "meta-service/meta_service.h"
#include "meta-service/meta_service_helper.h"
+#include "meta-service/meta_service_txn.cpp"
#include "meta-service/txn_kv_error.h"
#include "mock_resource_manager.h"
#include "rate-limiter/rate_limiter.h"
@@ -1899,5 +1900,39 @@ TEST(TxnLazyCommitTest, RowsetMetaSizeExceedTest) {
ASSERT_EQ(res.status().code(), MetaServiceCode::PROTOBUF_PARSE_ERR);
}
}
+TEST(TxnLazyCommitTest, FuzzyRandom) {
+ int counter = 0;
+ for (size_t i = 0; i < 100000; i++) {
+ if (fuzzy_random()) {
+ counter++;
+ }
+ }
+ LOG(INFO) << "fuzzy_random counter: " << counter;
+ ASSERT_GT(counter, 30000);
+ ASSERT_LT(counter, 70000);
+}
+TEST(TxnLazyCommitTest, ForceTxnLazyCommit) {
+ int counter = 0;
+ config::enable_cloud_txn_lazy_commit_fuzzy_test = false;
+ for (size_t i = 0; i < 100000; i++) {
+ if (force_txn_lazy_commit()) {
+ counter++;
+ }
+ }
+ LOG(INFO) << "force_txn_lazy_commit counter: " << counter;
+ ASSERT_EQ(counter, 0);
+
+ config::enable_cloud_txn_lazy_commit_fuzzy_test = true;
+ counter = 0;
+ for (size_t i = 0; i < 100000; i++) {
+ if (force_txn_lazy_commit()) {
+ counter++;
+ }
+ }
+ LOG(INFO) << "force_txn_lazy_commit counter: " << counter;
+ ASSERT_GT(counter, 30000);
+ ASSERT_LT(counter, 70000);
+ config::enable_cloud_txn_lazy_commit_fuzzy_test = false;
+}
} // namespace doris::cloud
diff --git a/regression-test/pipeline/cloud_p0/conf/ms_custom.conf
b/regression-test/pipeline/cloud_p0/conf/ms_custom.conf
index 1c88dc87938..39b9d6c348d 100644
--- a/regression-test/pipeline/cloud_p0/conf/ms_custom.conf
+++ b/regression-test/pipeline/cloud_p0/conf/ms_custom.conf
@@ -1,2 +1,3 @@
# below lines will be appended to the default doris_cloud.conf when deploying
meta service
meta_schema_value_version = 1
+enable_cloud_txn_lazy_commit_fuzzy_test = true
diff --git a/regression-test/pipeline/cloud_p1/conf/ms_custom.conf
b/regression-test/pipeline/cloud_p1/conf/ms_custom.conf
index d5056833bac..8a2f8734fb4 100644
--- a/regression-test/pipeline/cloud_p1/conf/ms_custom.conf
+++ b/regression-test/pipeline/cloud_p1/conf/ms_custom.conf
@@ -1 +1,2 @@
# below lines will be appended to the default doris_cloud.conf when deploying
meta service
+enable_cloud_txn_lazy_commit_fuzzy_test = true
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]