This is an automated email from the ASF dual-hosted git repository.
gavinchou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 80776ef5c78 [Opt](profile) More profile for
`OlapScanLocalState::hold_tablets` (#50564)
80776ef5c78 is described below
commit 80776ef5c78fa171303d0c446c7e2fa40644a549
Author: bobhan1 <[email protected]>
AuthorDate: Thu May 8 23:45:27 2025 +0800
[Opt](profile) More profile for `OlapScanLocalState::hold_tablets` (#50564)
add
`SyncRowsetTabletMetaCacheHitCount`/`SyncRowsetTabletMetaCacheMissCount`/`SyncRowsetGetRemoteTabletMetaRpcTime`/`SyncRowsetTabletsRowsetsTotatCount`
---
be/src/cloud/cloud_meta_mgr.cpp | 14 ++++++-----
be/src/cloud/cloud_tablet.h | 8 +++++--
be/src/cloud/cloud_tablet_mgr.cpp | 15 +++++++++++-
be/src/pipeline/exec/olap_scan_operator.cpp | 36 ++++++++++++++++++++++-------
be/src/pipeline/exec/olap_scan_operator.h | 4 ++++
5 files changed, 60 insertions(+), 17 deletions(-)
diff --git a/be/src/cloud/cloud_meta_mgr.cpp b/be/src/cloud/cloud_meta_mgr.cpp
index ab143d88c64..b9ca0e8c16f 100644
--- a/be/src/cloud/cloud_meta_mgr.cpp
+++ b/be/src/cloud/cloud_meta_mgr.cpp
@@ -532,8 +532,9 @@ Status
CloudMetaMgr::sync_tablet_rowsets_unlocked(CloudTablet* tablet,
? GetRowsetRequest::NO_DICT
: GetRowsetRequest::RETURN_DICT);
VLOG_DEBUG << "send GetRowsetRequest: " << req.ShortDebugString();
-
+ auto start = std::chrono::steady_clock::now();
stub->get_rowset(&cntl, &req, &resp, nullptr);
+ auto end = std::chrono::steady_clock::now();
int64_t latency = cntl.latency_us();
_get_rowset_latency << latency;
int retry_times = config::meta_service_rpc_retry_times;
@@ -576,7 +577,8 @@ Status
CloudMetaMgr::sync_tablet_rowsets_unlocked(CloudTablet* tablet,
tablet->last_sync_time_s = now;
if (sync_stats) {
- sync_stats->get_remote_rowsets_rpc_ms += latency / 1000;
+ sync_stats->get_remote_rowsets_rpc_ns +=
+ std::chrono::duration_cast<std::chrono::nanoseconds>(end -
start).count();
sync_stats->get_remote_rowsets_num += resp.rowset_meta().size();
}
@@ -818,9 +820,9 @@ Status CloudMetaMgr::sync_tablet_delete_bitmap(CloudTablet*
tablet, int64_t old_
VLOG_DEBUG << "send GetDeleteBitmapRequest: " << req.ShortDebugString();
- auto start = std::chrono::high_resolution_clock::now();
+ auto start = std::chrono::steady_clock::now();
auto st = retry_rpc("get delete bitmap", req, &res,
&MetaService_Stub::get_delete_bitmap);
- auto end = std::chrono::high_resolution_clock::now();
+ auto end = std::chrono::steady_clock::now();
if (st.code() == ErrorCode::THRIFT_RPC_ERROR) {
return st;
}
@@ -868,8 +870,8 @@ Status CloudMetaMgr::sync_tablet_delete_bitmap(CloudTablet*
tablet, int64_t old_
rowset_ids.size(), segment_ids.size(), vers.size(),
delete_bitmaps.size());
}
if (sync_stats) {
- sync_stats->get_remote_delete_bitmap_rpc_ms +=
- std::chrono::duration_cast<std::chrono::milliseconds>(end -
start).count();
+ sync_stats->get_remote_delete_bitmap_rpc_ns +=
+ std::chrono::duration_cast<std::chrono::nanoseconds>(end -
start).count();
sync_stats->get_remote_delete_bitmap_key_count +=
delete_bitmaps.size();
for (const auto& dbm : delete_bitmaps) {
sync_stats->get_remote_delete_bitmap_bytes += dbm.length();
diff --git a/be/src/cloud/cloud_tablet.h b/be/src/cloud/cloud_tablet.h
index 7aa287cad89..f9158e1af2f 100644
--- a/be/src/cloud/cloud_tablet.h
+++ b/be/src/cloud/cloud_tablet.h
@@ -28,13 +28,17 @@ class CloudStorageEngine;
struct SyncRowsetStats {
int64_t get_remote_rowsets_num {0};
- int64_t get_remote_rowsets_rpc_ms {0};
+ int64_t get_remote_rowsets_rpc_ns {0};
int64_t get_local_delete_bitmap_rowsets_num {0};
int64_t get_remote_delete_bitmap_rowsets_num {0};
int64_t get_remote_delete_bitmap_key_count {0};
int64_t get_remote_delete_bitmap_bytes {0};
- int64_t get_remote_delete_bitmap_rpc_ms {0};
+ int64_t get_remote_delete_bitmap_rpc_ns {0};
+
+ int64_t get_remote_tablet_meta_rpc_ns {0};
+ int64_t tablet_meta_cache_hit {0};
+ int64_t tablet_meta_cache_miss {0};
};
struct SyncOptions {
diff --git a/be/src/cloud/cloud_tablet_mgr.cpp
b/be/src/cloud/cloud_tablet_mgr.cpp
index eae728f9526..c9d3696420b 100644
--- a/be/src/cloud/cloud_tablet_mgr.cpp
+++ b/be/src/cloud/cloud_tablet_mgr.cpp
@@ -19,6 +19,8 @@
#include <bthread/countdown_event.h>
+#include <chrono>
+
#include "cloud/cloud_meta_mgr.h"
#include "cloud/cloud_storage_engine.h"
#include "cloud/cloud_tablet.h"
@@ -169,10 +171,19 @@ Result<std::shared_ptr<CloudTablet>>
CloudTabletMgr::get_tablet(int64_t tablet_i
CacheKey key(tablet_id_str);
auto* handle = _cache->lookup(key);
if (handle == nullptr) {
+ if (sync_stats) {
+ ++sync_stats->tablet_meta_cache_miss;
+ }
auto load_tablet = [this, &key, warmup_data, sync_delete_bitmap,
sync_stats](int64_t tablet_id) ->
std::shared_ptr<CloudTablet> {
TabletMetaSharedPtr tablet_meta;
+ auto start = std::chrono::steady_clock::now();
auto st = _engine.meta_mgr().get_tablet_meta(tablet_id,
&tablet_meta);
+ auto end = std::chrono::steady_clock::now();
+ if (sync_stats) {
+ sync_stats->get_remote_tablet_meta_rpc_ns +=
+
std::chrono::duration_cast<std::chrono::nanoseconds>(end - start).count();
+ }
if (!st.ok()) {
LOG(WARNING) << "failed to tablet " << tablet_id << ": " << st;
return nullptr;
@@ -208,7 +219,9 @@ Result<std::shared_ptr<CloudTablet>>
CloudTabletMgr::get_tablet(int64_t tablet_i
set_tablet_access_time_ms(tablet.get());
return tablet;
}
-
+ if (sync_stats) {
+ ++sync_stats->tablet_meta_cache_hit;
+ }
CloudTablet* tablet_raw_ptr =
reinterpret_cast<Value*>(_cache->value(handle))->tablet.get();
set_tablet_access_time_ms(tablet_raw_ptr);
auto tablet = std::shared_ptr<CloudTablet>(tablet_raw_ptr, [this,
handle](CloudTablet* tablet) {
diff --git a/be/src/pipeline/exec/olap_scan_operator.cpp
b/be/src/pipeline/exec/olap_scan_operator.cpp
index 0a37cc9e465..58a7730e8e5 100644
--- a/be/src/pipeline/exec/olap_scan_operator.cpp
+++ b/be/src/pipeline/exec/olap_scan_operator.cpp
@@ -20,6 +20,7 @@
#include <fmt/format.h>
#include <memory>
+#include <numeric>
#include "cloud/cloud_meta_mgr.h"
#include "cloud/cloud_storage_engine.h"
@@ -68,12 +69,22 @@ Status OlapScanLocalState::_init_profile() {
if (config::is_cloud_mode()) {
static const char* sync_rowset_timer_name = "SyncRowsetTime";
_sync_rowset_timer = ADD_TIMER(_scanner_profile,
sync_rowset_timer_name);
+ _sync_rowset_tablet_meta_cache_hit =
+ ADD_CHILD_COUNTER(_scanner_profile,
"SyncRowsetTabletMetaCacheHitCount",
+ TUnit::UNIT, sync_rowset_timer_name);
+ _sync_rowset_tablet_meta_cache_miss =
+ ADD_CHILD_COUNTER(_scanner_profile,
"SyncRowsetTabletMetaCacheMissCount",
+ TUnit::UNIT, sync_rowset_timer_name);
+ _sync_rowset_get_remote_tablet_meta_rpc_timer = ADD_CHILD_TIMER(
+ _scanner_profile, "SyncRowsetGetRemoteTabletMetaRpcTime",
sync_rowset_timer_name);
+ _sync_rowset_tablets_rowsets_total_num =
+ ADD_CHILD_COUNTER(_scanner_profile,
"SyncRowsetTabletsRowsetsTotatCount",
+ TUnit::UNIT, sync_rowset_timer_name);
_sync_rowset_get_remote_rowsets_num =
ADD_CHILD_COUNTER(_scanner_profile,
"SyncRowsetGetRemoteRowsetsCount", TUnit::UNIT,
sync_rowset_timer_name);
- _sync_rowset_get_remote_rowsets_rpc_timer =
- ADD_CHILD_COUNTER(_scanner_profile,
"SyncRowsetGetRemoteRowsetsRpcMs",
- TUnit::TIME_MS, sync_rowset_timer_name);
+ _sync_rowset_get_remote_rowsets_rpc_timer = ADD_CHILD_TIMER(
+ _scanner_profile, "SyncRowsetGetRemoteRowsetsRpcTime",
sync_rowset_timer_name);
_sync_rowset_get_local_delete_bitmap_rowsets_num =
ADD_CHILD_COUNTER(_scanner_profile,
"SyncRowsetGetLocalDeleteBitmapRowsetsCount",
TUnit::UNIT, sync_rowset_timer_name);
@@ -86,9 +97,8 @@ Status OlapScanLocalState::_init_profile() {
_sync_rowset_get_remote_delete_bitmap_bytes =
ADD_CHILD_COUNTER(_scanner_profile,
"SyncRowsetGetRemoteDeleteBitmapBytes",
TUnit::BYTES, sync_rowset_timer_name);
- _sync_rowset_get_remote_delete_bitmap_rpc_timer =
- ADD_CHILD_COUNTER(_scanner_profile,
"SyncRowsetGetRemoteDeleteBitmapRpcMs",
- TUnit::TIME_MS, sync_rowset_timer_name);
+ _sync_rowset_get_remote_delete_bitmap_rpc_timer = ADD_CHILD_TIMER(
+ _scanner_profile, "SyncRowsetGetRemoteDeleteBitmapRpcTime",
sync_rowset_timer_name);
}
_block_init_timer = ADD_TIMER(_segment_profile, "BlockInitTime");
_block_init_seek_timer = ADD_TIMER(_segment_profile, "BlockInitSeekTime");
@@ -478,10 +488,20 @@ Status OlapScanLocalState::hold_tablets() {
cloud::bthread_fork_join(tasks,
config::init_scanner_sync_rowsets_parallelism));
}
COUNTER_UPDATE(_sync_rowset_timer, duration_ns);
+ auto total_rowsets = std::accumulate(
+ _tablets.cbegin(), _tablets.cend(), 0LL,
+ [](long long acc, const auto& tabletWithVersion) {
+ return acc +
tabletWithVersion.tablet->tablet_meta()->all_rs_metas().size();
+ });
+ COUNTER_UPDATE(_sync_rowset_tablets_rowsets_total_num, total_rowsets);
for (const auto& sync_stats : sync_statistics) {
+ COUNTER_UPDATE(_sync_rowset_tablet_meta_cache_hit,
sync_stats.tablet_meta_cache_hit);
+ COUNTER_UPDATE(_sync_rowset_tablet_meta_cache_miss,
sync_stats.tablet_meta_cache_miss);
+ COUNTER_UPDATE(_sync_rowset_get_remote_tablet_meta_rpc_timer,
+ sync_stats.get_remote_tablet_meta_rpc_ns);
COUNTER_UPDATE(_sync_rowset_get_remote_rowsets_num,
sync_stats.get_remote_rowsets_num);
COUNTER_UPDATE(_sync_rowset_get_remote_rowsets_rpc_timer,
- sync_stats.get_remote_rowsets_rpc_ms);
+ sync_stats.get_remote_rowsets_rpc_ns);
COUNTER_UPDATE(_sync_rowset_get_local_delete_bitmap_rowsets_num,
sync_stats.get_local_delete_bitmap_rowsets_num);
COUNTER_UPDATE(_sync_rowset_get_remote_delete_bitmap_rowsets_num,
@@ -491,7 +511,7 @@ Status OlapScanLocalState::hold_tablets() {
COUNTER_UPDATE(_sync_rowset_get_remote_delete_bitmap_bytes,
sync_stats.get_remote_delete_bitmap_bytes);
COUNTER_UPDATE(_sync_rowset_get_remote_delete_bitmap_rpc_timer,
- sync_stats.get_remote_delete_bitmap_rpc_ms);
+ sync_stats.get_remote_delete_bitmap_rpc_ns);
}
} else {
for (size_t i = 0; i < _scan_ranges.size(); i++) {
diff --git a/be/src/pipeline/exec/olap_scan_operator.h
b/be/src/pipeline/exec/olap_scan_operator.h
index 78a4f847e09..85febfa5286 100644
--- a/be/src/pipeline/exec/olap_scan_operator.h
+++ b/be/src/pipeline/exec/olap_scan_operator.h
@@ -132,6 +132,10 @@ private:
RuntimeProfile::Counter* _block_fetch_timer = nullptr;
RuntimeProfile::Counter* _delete_bitmap_get_agg_timer = nullptr;
RuntimeProfile::Counter* _sync_rowset_timer = nullptr;
+ RuntimeProfile::Counter* _sync_rowset_get_remote_tablet_meta_rpc_timer =
nullptr;
+ RuntimeProfile::Counter* _sync_rowset_tablet_meta_cache_hit = nullptr;
+ RuntimeProfile::Counter* _sync_rowset_tablet_meta_cache_miss = nullptr;
+ RuntimeProfile::Counter* _sync_rowset_tablets_rowsets_total_num = nullptr;
RuntimeProfile::Counter* _sync_rowset_get_remote_rowsets_num = nullptr;
RuntimeProfile::Counter* _sync_rowset_get_remote_rowsets_rpc_timer =
nullptr;
RuntimeProfile::Counter* _sync_rowset_get_local_delete_bitmap_rowsets_num
= nullptr;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]