This is an automated email from the ASF dual-hosted git repository.
morrysnow pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.1 by this push:
new 9b1e935d641 branch-3.1: [opt](storage) Add log and metric when
aws/azure sdk do retry operation #51485 (#52025)
9b1e935d641 is described below
commit 9b1e935d641df9f1c54c2fdafeb64af5b37f7e43
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Fri Jun 20 17:15:13 2025 +0800
branch-3.1: [opt](storage) Add log and metric when aws/azure sdk do retry
operation #51485 (#52025)
Cherry-picked from #51485
Co-authored-by: Lei Zhang <[email protected]>
---
be/src/common/config.cpp | 13 ++++++++-
be/src/common/config.h | 7 +++++
be/src/util/s3_util.cpp | 36 ++++++++++++++++++++++-
be/test/io/fs/azure_obj_storage_client_test.cpp | 18 +++++++-----
cloud/src/common/config.h | 12 +++++++-
cloud/src/recycler/s3_accessor.cpp | 31 ++++++++++++++++++--
common/cpp/aws_logger.h | 2 +-
common/cpp/obj_retry_strategy.cpp | 38 ++++++++++++++++---------
common/cpp/obj_retry_strategy.h | 8 ++----
conf/be.conf | 9 +++++-
10 files changed, 141 insertions(+), 33 deletions(-)
diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp
index 04e04aa4b4b..4ce28d89f0e 100644
--- a/be/src/common/config.cpp
+++ b/be/src/common/config.cpp
@@ -839,7 +839,18 @@ DEFINE_mInt32(zone_map_row_num_threshold, "20");
// Info = 4,
// Debug = 5,
// Trace = 6
-DEFINE_Int32(aws_log_level, "2");
+DEFINE_Int32(aws_log_level, "3");
+DEFINE_Validator(aws_log_level,
+ [](const int config) -> bool { return config >= 0 && config
<= 6; });
+
+// azure sdk log level
+// Verbose = 1,
+// Informational = 2,
+// Warning = 3,
+// Error = 4
+DEFINE_Int32(azure_log_level, "3");
+DEFINE_Validator(azure_log_level,
+ [](const int config) -> bool { return config >= 1 && config
<= 4; });
// the buffer size when read data from remote storage like s3
DEFINE_mInt32(remote_storage_read_buffer_mb, "16");
diff --git a/be/src/common/config.h b/be/src/common/config.h
index 51a275a55b8..fc4377254f8 100644
--- a/be/src/common/config.h
+++ b/be/src/common/config.h
@@ -885,6 +885,13 @@ DECLARE_mInt32(zone_map_row_num_threshold);
// Trace = 6
DECLARE_Int32(aws_log_level);
+// azure sdk log level
+// Verbose = 1,
+// Informational = 2,
+// Warning = 3,
+// Error = 4
+DECLARE_Int32(azure_log_level);
+
// the buffer size when read data from remote storage like s3
DECLARE_mInt32(remote_storage_read_buffer_mb);
diff --git a/be/src/util/s3_util.cpp b/be/src/util/s3_util.cpp
index ce8d9c2d6c2..f6505583337 100644
--- a/be/src/util/s3_util.cpp
+++ b/be/src/util/s3_util.cpp
@@ -32,6 +32,7 @@
#include <atomic>
#ifdef USE_AZURE
+#include <azure/core/diagnostics/logger.hpp>
#include <azure/storage/blobs/blob_container_client.hpp>
#endif
#include <cstdlib>
@@ -150,6 +151,33 @@ S3ClientFactory::S3ClientFactory() {
config::s3_put_token_per_second,
config::s3_put_bucket_tokens,
config::s3_put_token_limit,
metric_func_factory(put_rate_limit_ns,
put_rate_limit_exceed_req_num))};
+
+#ifdef USE_AZURE
+ auto azureLogLevel =
+
static_cast<Azure::Core::Diagnostics::Logger::Level>(config::azure_log_level);
+ Azure::Core::Diagnostics::Logger::SetLevel(azureLogLevel);
+ Azure::Core::Diagnostics::Logger::SetListener(
+ [&](Azure::Core::Diagnostics::Logger::Level level, const
std::string& message) {
+ switch (level) {
+ case Azure::Core::Diagnostics::Logger::Level::Verbose:
+ LOG(INFO) << message;
+ break;
+ case Azure::Core::Diagnostics::Logger::Level::Informational:
+ LOG(INFO) << message;
+ break;
+ case Azure::Core::Diagnostics::Logger::Level::Warning:
+ LOG(WARNING) << message;
+ break;
+ case Azure::Core::Diagnostics::Logger::Level::Error:
+ LOG(ERROR) << message;
+ break;
+ default:
+ LOG(WARNING) << "Unknown level: " <<
static_cast<int>(level)
+ << ", message: " << message;
+ break;
+ }
+ });
+#endif
}
S3ClientFactory::~S3ClientFactory() {
@@ -204,7 +232,13 @@ std::shared_ptr<io::ObjStorageClient>
S3ClientFactory::_create_azure_client(
}
}
- auto containerClient =
std::make_shared<Azure::Storage::Blobs::BlobContainerClient>(uri, cred);
+ Azure::Storage::Blobs::BlobClientOptions options;
+
options.Retry.StatusCodes.insert(Azure::Core::Http::HttpStatusCode::TooManyRequests);
+ options.Retry.MaxRetries = config::max_s3_client_retry;
+
options.PerRetryPolicies.emplace_back(std::make_unique<AzureRetryRecordPolicy>());
+
+ auto containerClient =
std::make_shared<Azure::Storage::Blobs::BlobContainerClient>(
+ uri, cred, std::move(options));
LOG_INFO("create one azure client with {}", s3_conf.to_string());
return
std::make_shared<io::AzureObjStorageClient>(std::move(containerClient));
#else
diff --git a/be/test/io/fs/azure_obj_storage_client_test.cpp
b/be/test/io/fs/azure_obj_storage_client_test.cpp
index 1297e7c75f7..4ef1770128c 100644
--- a/be/test/io/fs/azure_obj_storage_client_test.cpp
+++ b/be/test/io/fs/azure_obj_storage_client_test.cpp
@@ -21,6 +21,7 @@
#include "io/fs/file_system.h"
#include "io/fs/obj_storage_client.h"
+#include "util/s3_util.h"
#ifdef USE_AZURE
#include <azure/storage/blobs.hpp>
@@ -49,13 +50,16 @@ protected:
std::string accountKey = std::getenv("AZURE_ACCOUNT_KEY");
std::string containerName = std::getenv("AZURE_CONTAINER_NAME");
- auto cred =
std::make_shared<Azure::Storage::StorageSharedKeyCredential>(accountName,
-
accountKey);
- const std::string uri =
- fmt::format("https://{}.blob.core.windows.net/{}",
accountName, containerName);
- auto containerClient = std::make_shared<BlobContainerClient>(uri,
cred);
- AzureObjStorageClientTest::obj_storage_client =
-
std::make_shared<io::AzureObjStorageClient>(std::move(containerClient));
+ // Initialize Azure SDK
+ [[maybe_unused]] auto& s3ClientFactory = S3ClientFactory::instance();
+
+ AzureObjStorageClientTest::obj_storage_client =
S3ClientFactory::instance().create(
+ {.endpoint = fmt::format("https://{}.blob.core.windows.net",
accountName),
+ .region = "dummy-region",
+ .ak = accountName,
+ .sk = accountKey,
+ .bucket = containerName,
+ .provider = io::ObjStorageType::AZURE});
}
void SetUp() override {
diff --git a/cloud/src/common/config.h b/cloud/src/common/config.h
index 857b4292c35..84d889a059e 100644
--- a/cloud/src/common/config.h
+++ b/cloud/src/common/config.h
@@ -285,7 +285,17 @@ CONF_Strings(recycler_storage_vault_white_list, "");
// Info = 4,
// Debug = 5,
// Trace = 6
-CONF_Int32(aws_log_level, "2");
+CONF_Int32(aws_log_level, "3");
+CONF_Validator(aws_log_level, [](const int config) -> bool { return config >=
0 && config <= 6; });
+
+// azure sdk log level
+// Verbose = 1,
+// Informational = 2,
+// Warning = 3,
+// Error = 4
+CONF_Int32(azure_log_level, "3");
+CONF_Validator(azure_log_level,
+ [](const int config) -> bool { return config >= 1 && config <=
4; });
// ca_cert_file is in this path by default, Normally no modification is
required
// ca cert default path is different from different OS
diff --git a/cloud/src/recycler/s3_accessor.cpp
b/cloud/src/recycler/s3_accessor.cpp
index ada9a4a1e8c..3c36f5a01bb 100644
--- a/cloud/src/recycler/s3_accessor.cpp
+++ b/cloud/src/recycler/s3_accessor.cpp
@@ -29,6 +29,7 @@
#include <algorithm>
#ifdef USE_AZURE
+#include <azure/core/diagnostics/logger.hpp>
#include <azure/storage/blobs/blob_container_client.hpp>
#include <azure/storage/common/storage_credential.hpp>
#endif
@@ -120,6 +121,33 @@ public:
return std::make_shared<DorisAWSLogger>(logLevel);
};
Aws::InitAPI(aws_options_);
+
+#ifdef USE_AZURE
+ auto azureLogLevel =
+
static_cast<Azure::Core::Diagnostics::Logger::Level>(config::azure_log_level);
+ Azure::Core::Diagnostics::Logger::SetLevel(azureLogLevel);
+ Azure::Core::Diagnostics::Logger::SetListener(
+ [&](Azure::Core::Diagnostics::Logger::Level level, const
std::string& message) {
+ switch (level) {
+ case Azure::Core::Diagnostics::Logger::Level::Verbose:
+ LOG(INFO) << message;
+ break;
+ case
Azure::Core::Diagnostics::Logger::Level::Informational:
+ LOG(INFO) << message;
+ break;
+ case Azure::Core::Diagnostics::Logger::Level::Warning:
+ LOG(WARNING) << message;
+ break;
+ case Azure::Core::Diagnostics::Logger::Level::Error:
+ LOG(ERROR) << message;
+ break;
+ default:
+ LOG(WARNING) << "Unknown level: " <<
static_cast<int>(level)
+ << ", message: " << message;
+ break;
+ }
+ });
+#endif
}
~S3Environment() { Aws::ShutdownAPI(aws_options_); }
@@ -308,8 +336,7 @@ int S3Accessor::init() {
// Within the RetryPolicy, the nextPolicy is called multiple times
inside a loop.
// All policies in the PerRetryPolicies are downstream of the
RetryPolicy.
// Therefore, you only need to add a policy to check if the response
code is 429 and if the retry count meets the condition, it can record the retry
count.
- options.PerRetryPolicies.emplace_back(
-
std::make_unique<AzureRetryRecordPolicy>(config::max_s3_client_retry));
+
options.PerRetryPolicies.emplace_back(std::make_unique<AzureRetryRecordPolicy>());
auto container_client =
std::make_shared<Azure::Storage::Blobs::BlobContainerClient>(
uri_, cred, std::move(options));
// uri format for debug:
${scheme}://${ak}.blob.core.windows.net/${bucket}/${prefix}
diff --git a/common/cpp/aws_logger.h b/common/cpp/aws_logger.h
index ca607cab056..85734d13e14 100644
--- a/common/cpp/aws_logger.h
+++ b/common/cpp/aws_logger.h
@@ -19,7 +19,7 @@
#include <aws/core/utils/logging/LogLevel.h>
#include <aws/core/utils/logging/LogSystemInterface.h>
-#include <glog/logging.h> // IWYU pragma: export
+#include <glog/logging.h>
class DorisAWSLogger final : public Aws::Utils::Logging::LogSystemInterface {
public:
diff --git a/common/cpp/obj_retry_strategy.cpp
b/common/cpp/obj_retry_strategy.cpp
index 0226d1af28f..6da4c23980a 100644
--- a/common/cpp/obj_retry_strategy.cpp
+++ b/common/cpp/obj_retry_strategy.cpp
@@ -19,10 +19,11 @@
#include <aws/core/http/HttpResponse.h>
#include <bvar/reducer.h>
+#include <glog/logging.h>
namespace doris {
-bvar::Adder<int64_t>
s3_too_many_request_retry_cnt("s3_too_many_request_retry_cnt");
+bvar::Adder<int64_t> object_request_retry_count("object_request_retry_count");
S3CustomRetryStrategy::S3CustomRetryStrategy(int maxRetries) :
DefaultRetryStrategy(maxRetries) {}
@@ -35,33 +36,42 @@ bool S3CustomRetryStrategy::ShouldRetry(const
Aws::Client::AWSError<Aws::Client:
}
if (Aws::Http::IsRetryableHttpResponseCode(error.GetResponseCode()) ||
error.ShouldRetry()) {
- s3_too_many_request_retry_cnt << 1;
+ object_request_retry_count << 1;
+ LOG(INFO) << "retry due to error: " << error << ", attempt: " <<
attemptedRetries + 1 << "/"
+ << m_maxRetries;
return true;
}
return false;
}
#ifdef USE_AZURE
-AzureRetryRecordPolicy::AzureRetryRecordPolicy(int retry_cnt) :
retry_cnt(retry_cnt) {}
-
-AzureRetryRecordPolicy::~AzureRetryRecordPolicy() = default;
std::unique_ptr<Azure::Core::Http::RawResponse> AzureRetryRecordPolicy::Send(
Azure::Core::Http::Request& request,
Azure::Core::Http::Policies::NextHttpPolicy nextPolicy,
Azure::Core::Context const& context) const {
- auto resp = nextPolicy.Send(request, context);
- if (retry_cnt != 0 &&
- resp->GetStatusCode() ==
Azure::Core::Http::HttpStatusCode::TooManyRequests) {
- retry_cnt--;
- s3_too_many_request_retry_cnt << 1;
+ //
https://learn.microsoft.com/en-us/azure/developer/cpp/sdk/fundamentals/http-pipelines-and-retries
+
+ std::unique_ptr<Azure::Core::Http::RawResponse> response =
nextPolicy.Send(request, context);
+ int32_t retry_count =
+
Azure::Core::Http::Policies::_internal::RetryPolicy::GetRetryCount(context);
+
+ if (static_cast<int>(response->GetStatusCode()) > 299 ||
+ static_cast<int>(response->GetStatusCode()) < 200) {
+ if (retry_count > 0) {
+ object_request_retry_count << 1;
+ }
+
+ // If the response is not successful, we log the retry attempt and
status code.
+ LOG(INFO) << "azure retry retry_count: " << retry_count
+ << ", status code: " <<
static_cast<int>(response->GetStatusCode())
+ << ", reason: " << response->GetReasonPhrase();
}
- return resp;
+
+ return response;
}
std::unique_ptr<AzureRetryRecordPolicy::HttpPolicy>
AzureRetryRecordPolicy::Clone() const {
- auto ret = std::make_unique<AzureRetryRecordPolicy>(*this);
- ret->retry_cnt = 0;
- return ret;
+ return std::make_unique<AzureRetryRecordPolicy>(*this);
}
#endif
} // namespace doris
\ No newline at end of file
diff --git a/common/cpp/obj_retry_strategy.h b/common/cpp/obj_retry_strategy.h
index b081ca91a22..dd98f871716 100644
--- a/common/cpp/obj_retry_strategy.h
+++ b/common/cpp/obj_retry_strategy.h
@@ -37,16 +37,14 @@ public:
#ifdef USE_AZURE
class AzureRetryRecordPolicy final : public
Azure::Core::Http::Policies::HttpPolicy {
public:
- AzureRetryRecordPolicy(int retry_cnt);
- ~AzureRetryRecordPolicy() override;
+ AzureRetryRecordPolicy() = default;
+ ~AzureRetryRecordPolicy() override = default;
+
std::unique_ptr<HttpPolicy> Clone() const override;
std::unique_ptr<Azure::Core::Http::RawResponse> Send(
Azure::Core::Http::Request& request,
Azure::Core::Http::Policies::NextHttpPolicy nextPolicy,
Azure::Core::Context const& context) const override;
-
-private:
- mutable int retry_cnt;
};
#endif
} // namespace doris
\ No newline at end of file
diff --git a/conf/be.conf b/conf/be.conf
index 961b41b51fd..28207c16962 100644
--- a/conf/be.conf
+++ b/conf/be.conf
@@ -88,6 +88,13 @@ sys_log_level = INFO
# Debug = 5,
# Trace = 6
# Default to turn off aws sdk log, because aws sdk errors that need to be
cared will be output through Doris logs
-aws_log_level=2
+aws_log_level = 3
+
+# azure sdk log level
+# Verbose = 1,
+# Informational = 2,
+# Warning = 3,
+# Error = 4
+azure_log_level = 3
## If you are not running in aws cloud, you can disable EC2 metadata
AWS_EC2_METADATA_DISABLED=true
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]