This is an automated email from the ASF dual-hosted git repository.
dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new 054ef7a1b80 [Fix]fix report query statistics to FE cores (#49711)
054ef7a1b80 is described below
commit 054ef7a1b8033a10bbde3f7c45554fcbe1a81927
Author: wangbo <[email protected]>
AuthorDate: Thu Apr 3 13:33:41 2025 +0800
[Fix]fix report query statistics to FE cores (#49711)
### What problem does this PR solve?
When using thrift to connect to FE, if TException happens, client should
be reopened, or core may happens.
### Release note
None
---
be/src/runtime/runtime_query_statistics_mgr.cpp | 62 ++++++++++++++-----------
1 file changed, 36 insertions(+), 26 deletions(-)
diff --git a/be/src/runtime/runtime_query_statistics_mgr.cpp
b/be/src/runtime/runtime_query_statistics_mgr.cpp
index 0d3c976fedd..eb16cbd003a 100644
--- a/be/src/runtime/runtime_query_statistics_mgr.cpp
+++ b/be/src/runtime/runtime_query_statistics_mgr.cpp
@@ -94,6 +94,14 @@ static Status _do_report_exec_stats_rpc(const
TNetworkAddress& coor_addr,
PrintThriftNetworkAddress(coor_addr), e.what());
}
return Status::RpcError("Send stats failed");
+ } catch (apache::thrift::TException& e) {
+ LOG_WARNING("Failed to report query profile to {}, reason: {} ",
+ PrintThriftNetworkAddress(coor_addr), e.what());
+ std::this_thread::sleep_for(
+
std::chrono::milliseconds(config::thrift_client_retry_interval_ms * 2));
+ // just reopen to disable this connection
+ static_cast<void>(rpc_client.reopen(config::thrift_rpc_timeout_ms));
+ return Status::RpcError("Transport exception when report query
profile");
} catch (std::exception& e) {
LOG_WARNING(
"Failed to report query profile to {}, reason: {}, you can see
fe log for details.",
@@ -415,36 +423,38 @@ void
RuntimeQueryStatisticsMgr::report_runtime_query_statistics() {
TReportExecStatusResult res;
Status rpc_status;
try {
- coord->reportExecStatus(res, params);
- rpc_result[addr] = true;
- } catch (apache::thrift::TApplicationException& e) {
- LOG(WARNING) << "[report_query_statistics]fe " << add_str
- << " throw exception when report statistics, reason:"
<< e.what()
- << " , you can see fe log for details.";
- } catch (apache::thrift::transport::TTransportException& e) {
- LOG(WARNING) << "[report_query_statistics]report workload runtime
statistics to "
- << add_str << " failed, reason: " << e.what();
- rpc_status = coord.reopen(config::thrift_rpc_timeout_ms);
- if (!rpc_status.ok()) {
- LOG(WARNING) << "[report_query_statistics]reopen thrift client
failed when report "
- "workload runtime statistics to"
- << add_str;
- } else {
- try {
+ try {
+ coord->reportExecStatus(res, params);
+ rpc_result[addr] = true;
+ } catch (apache::thrift::transport::TTransportException& e) {
+ LOG_WARNING(
+ "[report_query_statistics] report to fe {} failed,
reason:{}, try reopen.",
+ add_str, e.what());
+ rpc_status = coord.reopen(config::thrift_rpc_timeout_ms);
+ if (!rpc_status.ok()) {
+ LOG_WARNING(
+ "[report_query_statistics]reopen thrift client
failed when report "
+ "workload runtime statistics to {}, reason: {}",
+ add_str, rpc_status.to_string());
+ } else {
coord->reportExecStatus(res, params);
rpc_result[addr] = true;
- } catch (apache::thrift::transport::TTransportException& e2) {
- LOG(WARNING)
- << "[report_query_statistics]retry report workload
runtime stats to "
- << add_str << " failed, reason: " << e2.what();
- } catch (std::exception& e) {
- LOG_WARNING(
- "[report_query_statistics]unknow exception when
report workload "
- "runtime statistics to {}, "
- "reason:{}. ",
- add_str, e.what());
}
}
+ } catch (apache::thrift::TApplicationException& e) {
+ LOG_WARNING(
+ "[report_query_statistics]fe {} throw exception when
report statistics, "
+ "reason:{}, you can see fe log for details.",
+ add_str, e.what());
+ } catch (apache::thrift::TException& e) {
+ LOG_WARNING(
+ "[report_query_statistics]report workload runtime
statistics to {} failed, "
+ "reason: {}",
+ add_str, e.what());
+ std::this_thread::sleep_for(
+
std::chrono::milliseconds(config::thrift_client_retry_interval_ms * 2));
+ // just reopen to disable this connection
+ static_cast<void>(coord.reopen(config::thrift_rpc_timeout_ms));
} catch (std::exception& e) {
LOG_WARNING(
"[report_query_statistics]unknown exception when report
workload runtime "
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]