This is an automated email from the ASF dual-hosted git repository.

gavinchou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new ca25452330e [fix](filecache) avoid duplicated FileCache counter 
accumulation in NewOlapScanner (#61072)
ca25452330e is described below

commit ca25452330eca55b6e1533a49066d2a258013817
Author: zhengyu <[email protected]>
AuthorDate: Wed Jun 3 14:34:40 2026 +0800

    [fix](filecache) avoid duplicated FileCache counter accumulation in 
NewOlapScanner (#61072)
    
    - decouple realtime FileCache profile updates from the
    local/remote-bytes branch and update once per cycle when file cache is
    enabled
    - reset file_cache_stats as a whole (`= {}`) after realtime reporting,
    instead of resetting only bytes_read_from_local/bytes_read_from_remote
    - prevent inflated FileCache profile counters caused by repeated
    accumulation (e.g. LockWaitTimer, CacheGetOrSetTimer,
    BytesWriteIntoCache)
---
 be/src/exec/scan/olap_scanner.cpp | 42 ++++++++++++++++++++++++++++++---------
 be/src/exec/scan/olap_scanner.h   |  3 ---
 2 files changed, 33 insertions(+), 12 deletions(-)

diff --git a/be/src/exec/scan/olap_scanner.cpp 
b/be/src/exec/scan/olap_scanner.cpp
index b051b7a6eb7..33f225864da 100644
--- a/be/src/exec/scan/olap_scanner.cpp
+++ b/be/src/exec/scan/olap_scanner.cpp
@@ -129,6 +129,26 @@ static std::string read_columns_to_string(TabletSchemaSPtr 
tablet_schema,
     return read_columns_string;
 }
 
+static bool has_file_cache_statistics(const io::FileCacheStatistics& stats) {
+    return stats.num_local_io_total != 0 || stats.num_remote_io_total != 0 ||
+           stats.num_peer_io_total != 0 || stats.local_io_timer != 0 ||
+           stats.bytes_read_from_local != 0 || stats.bytes_read_from_remote != 
0 ||
+           stats.bytes_read_from_peer != 0 || stats.remote_io_timer != 0 ||
+           stats.peer_io_timer != 0 || stats.remote_wait_timer != 0 ||
+           stats.write_cache_io_timer != 0 || stats.bytes_write_into_cache != 
0 ||
+           stats.num_skip_cache_io_total != 0 || 
stats.read_cache_file_directly_timer != 0 ||
+           stats.cache_get_or_set_timer != 0 || stats.lock_wait_timer != 0 ||
+           stats.get_timer != 0 || stats.set_timer != 0 ||
+           stats.inverted_index_num_local_io_total != 0 ||
+           stats.inverted_index_num_remote_io_total != 0 ||
+           stats.inverted_index_num_peer_io_total != 0 ||
+           stats.inverted_index_bytes_read_from_local != 0 ||
+           stats.inverted_index_bytes_read_from_remote != 0 ||
+           stats.inverted_index_bytes_read_from_peer != 0 ||
+           stats.inverted_index_local_io_timer != 0 || 
stats.inverted_index_remote_io_timer != 0 ||
+           stats.inverted_index_peer_io_timer != 0 || 
stats.inverted_index_io_timer != 0;
+}
+
 Status OlapScanner::_prepare_impl() {
     auto* local_state = static_cast<OlapScanLocalState*>(_local_state);
     auto& tablet = _tablet_reader_params.tablet;
@@ -707,25 +727,30 @@ void OlapScanner::update_realtime_counters() {
                 stats.compressed_bytes_read);
     } else {
         
_state->get_query_ctx()->resource_ctx()->io_context()->update_scan_bytes_from_local_storage(
-                stats.file_cache_stats.bytes_read_from_local - 
_bytes_read_from_local);
+                stats.file_cache_stats.bytes_read_from_local);
         _state->get_query_ctx()
                 ->resource_ctx()
                 ->io_context()
                 ->update_scan_bytes_from_remote_storage(
-                        stats.file_cache_stats.bytes_read_from_remote - 
_bytes_read_from_remote);
+                        stats.file_cache_stats.bytes_read_from_remote);
 
         DorisMetrics::instance()->query_scan_bytes_from_local->increment(
-                stats.file_cache_stats.bytes_read_from_local - 
_bytes_read_from_local);
+                stats.file_cache_stats.bytes_read_from_local);
         DorisMetrics::instance()->query_scan_bytes_from_remote->increment(
-                stats.file_cache_stats.bytes_read_from_remote - 
_bytes_read_from_remote);
+                stats.file_cache_stats.bytes_read_from_remote);
+    }
+
+    if (has_file_cache_statistics(stats.file_cache_stats)) {
+        io::FileCacheProfileReporter 
cache_profile(local_state->_segment_profile.get());
+        cache_profile.update(&stats.file_cache_stats);
+        
_state->get_query_ctx()->resource_ctx()->io_context()->update_bytes_write_into_cache(
+                stats.file_cache_stats.bytes_write_into_cache);
     }
 
     _tablet_reader->mutable_stats()->compressed_bytes_read = 0;
     _tablet_reader->mutable_stats()->uncompressed_bytes_read = 0;
     _tablet_reader->mutable_stats()->raw_rows_read = 0;
-
-    _bytes_read_from_local = 
_tablet_reader->stats().file_cache_stats.bytes_read_from_local;
-    _bytes_read_from_remote = 
_tablet_reader->stats().file_cache_stats.bytes_read_from_remote;
+    _tablet_reader->mutable_stats()->file_cache_stats = {};
 }
 
 void OlapScanner::_collect_profile_before_close() {
@@ -855,8 +880,7 @@ void OlapScanner::_collect_profile_before_close() {
     inverted_index_profile.update(local_state->_index_filter_profile.get(),
                                   &stats.inverted_index_stats);
 
-    // only cloud deploy mode will use file cache.
-    if (config::is_cloud_mode() && config::enable_file_cache) {
+    if (has_file_cache_statistics(stats.file_cache_stats)) {
         io::FileCacheProfileReporter 
cache_profile(local_state->_segment_profile.get());
         cache_profile.update(&stats.file_cache_stats);
         
_state->get_query_ctx()->resource_ctx()->io_context()->update_bytes_write_into_cache(
diff --git a/be/src/exec/scan/olap_scanner.h b/be/src/exec/scan/olap_scanner.h
index 71a74262057..e7c9598b864 100644
--- a/be/src/exec/scan/olap_scanner.h
+++ b/be/src/exec/scan/olap_scanner.h
@@ -108,9 +108,6 @@ private:
     TabletReader::ReaderParams _tablet_reader_params;
     std::unique_ptr<TabletReader> _tablet_reader;
 
-    int64_t _bytes_read_from_local = 0;
-    int64_t _bytes_read_from_remote = 0;
-
 public:
     std::vector<ColumnId> _return_columns;
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to