This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 1b890338f88 [fix](cache stat) add builder stat to scanner (#56165)
1b890338f88 is described below

commit 1b890338f88d641f7d2b2281ebed529849ce47ec
Author: Yongqiang YANG <[email protected]>
AuthorDate: Thu Sep 18 22:35:48 2025 +0800

    [fix](cache stat) add builder stat to scanner (#56165)
---
 be/src/olap/parallel_scanner_builder.cpp    |  2 +-
 be/src/olap/parallel_scanner_builder.h      |  3 +++
 be/src/olap/rowset/beta_rowset.cpp          |  7 ++++---
 be/src/olap/rowset/beta_rowset.h            |  3 ++-
 be/src/olap/rowset/beta_rowset_reader.cpp   |  2 +-
 be/src/pipeline/exec/olap_scan_operator.cpp | 11 +++++++++++
 be/test/olap/rowid_conversion_test.cpp      |  3 ++-
 be/test/olap/segcompaction_mow_test.cpp     |  3 ++-
 be/test/olap/segcompaction_test.cpp         |  9 ++++++---
 9 files changed, 32 insertions(+), 11 deletions(-)

diff --git a/be/src/olap/parallel_scanner_builder.cpp 
b/be/src/olap/parallel_scanner_builder.cpp
index 8514b96f09e..6b3940bcb87 100644
--- a/be/src/olap/parallel_scanner_builder.cpp
+++ b/be/src/olap/parallel_scanner_builder.cpp
@@ -226,7 +226,7 @@ Status ParallelScannerBuilder::_load() {
 
             auto beta_rowset = std::dynamic_pointer_cast<BetaRowset>(rowset);
             std::vector<uint32_t> segment_rows;
-            RETURN_IF_ERROR(beta_rowset->get_segment_num_rows(&segment_rows));
+            RETURN_IF_ERROR(beta_rowset->get_segment_num_rows(&segment_rows, 
&_builder_stats));
             auto segment_count = rowset->num_segments();
             for (int64_t i = 0; i != segment_count; i++) {
                 _all_segments_rows[rowset_id].emplace_back(segment_rows[i]);
diff --git a/be/src/olap/parallel_scanner_builder.h 
b/be/src/olap/parallel_scanner_builder.h
index 225c27e66d7..35839884757 100644
--- a/be/src/olap/parallel_scanner_builder.h
+++ b/be/src/olap/parallel_scanner_builder.h
@@ -66,6 +66,8 @@ public:
 
     void set_optimize_index_scan_parallelism(bool v) { 
_optimize_index_scan_parallelism = v; }
 
+    const OlapReaderStatistics* builder_stats() const { return 
&_builder_stats; }
+
 private:
     Status _load();
 
@@ -96,6 +98,7 @@ private:
     bool _optimize_index_scan_parallelism {false};
 
     std::shared_ptr<RuntimeProfile> _scanner_profile;
+    OlapReaderStatistics _builder_stats;
     RuntimeState* _state;
     int64_t _limit;
     bool _is_dup_mow_key;
diff --git a/be/src/olap/rowset/beta_rowset.cpp 
b/be/src/olap/rowset/beta_rowset.cpp
index 0269346df87..3032e0b7cbc 100644
--- a/be/src/olap/rowset/beta_rowset.cpp
+++ b/be/src/olap/rowset/beta_rowset.cpp
@@ -71,19 +71,20 @@ Status BetaRowset::init() {
     return Status::OK(); // no op
 }
 
-Status BetaRowset::get_segment_num_rows(std::vector<uint32_t>* segment_rows) {
+Status BetaRowset::get_segment_num_rows(std::vector<uint32_t>* segment_rows,
+                                        OlapReaderStatistics* read_stats) {
     // `ROWSET_UNLOADING` is state for closed() called but owned by some 
readers.
     // So here `ROWSET_UNLOADING` is allowed.
     DCHECK_NE(_rowset_state_machine.rowset_state(), ROWSET_UNLOADED);
 
-    RETURN_IF_ERROR(_load_segment_rows_once.call([this] {
+    RETURN_IF_ERROR(_load_segment_rows_once.call([this, read_stats] {
         auto segment_count = num_segments();
         _segments_rows.resize(segment_count);
         for (int64_t i = 0; i != segment_count; ++i) {
             SegmentCacheHandle segment_cache_handle;
             RETURN_IF_ERROR(SegmentLoader::instance()->load_segment(
                     std::static_pointer_cast<BetaRowset>(shared_from_this()), 
i,
-                    &segment_cache_handle, false, false));
+                    &segment_cache_handle, false, false, read_stats));
             const auto& tmp_segments = segment_cache_handle.get_segments();
             _segments_rows[i] = tmp_segments[0]->num_rows();
         }
diff --git a/be/src/olap/rowset/beta_rowset.h b/be/src/olap/rowset/beta_rowset.h
index f3a8230383d..f0cd5c966cd 100644
--- a/be/src/olap/rowset/beta_rowset.h
+++ b/be/src/olap/rowset/beta_rowset.h
@@ -91,7 +91,8 @@ public:
     Status show_nested_index_file(rapidjson::Value* rowset_value,
                                   rapidjson::Document::AllocatorType& 
allocator);
 
-    Status get_segment_num_rows(std::vector<uint32_t>* segment_rows);
+    Status get_segment_num_rows(std::vector<uint32_t>* segment_rows,
+                                OlapReaderStatistics* read_stats);
 
 protected:
     BetaRowset(const TabletSchemaSPtr& schema, const RowsetMetaSharedPtr& 
rowset_meta,
diff --git a/be/src/olap/rowset/beta_rowset_reader.cpp 
b/be/src/olap/rowset/beta_rowset_reader.cpp
index 0d2dccf5063..2a2eea96ceb 100644
--- a/be/src/olap/rowset/beta_rowset_reader.cpp
+++ b/be/src/olap/rowset/beta_rowset_reader.cpp
@@ -246,7 +246,7 @@ Status 
BetaRowsetReader::get_segment_iterators(RowsetReaderContext* read_context
     if (_read_context->record_rowids && _read_context->rowid_conversion) {
         // init segment rowid map for rowid conversion
         std::vector<uint32_t> segment_rows;
-        RETURN_IF_ERROR(_rowset->get_segment_num_rows(&segment_rows));
+        RETURN_IF_ERROR(_rowset->get_segment_num_rows(&segment_rows, _stats));
         
RETURN_IF_ERROR(_read_context->rowid_conversion->init_segment_map(rowset()->rowset_id(),
                                                                           
segment_rows));
     }
diff --git a/be/src/pipeline/exec/olap_scan_operator.cpp 
b/be/src/pipeline/exec/olap_scan_operator.cpp
index 1a33a3a7e77..16870de81a9 100644
--- a/be/src/pipeline/exec/olap_scan_operator.cpp
+++ b/be/src/pipeline/exec/olap_scan_operator.cpp
@@ -27,6 +27,7 @@
 #include "cloud/cloud_tablet.h"
 #include "cloud/cloud_tablet_hotspot.h"
 #include "cloud/config.h"
+#include "io/cache/block_file_cache_profile.h"
 #include "olap/parallel_scanner_builder.h"
 #include "olap/rowset/segment_v2/ann_index/ann_topn_runtime.h"
 #include "olap/storage_engine.h"
@@ -87,6 +88,7 @@ Status OlapScanLocalState::_init_profile() {
     // Rows read from storage.
     // Include the rows read from doris page cache.
     _scan_rows = ADD_COUNTER(custom_profile(), "ScanRows", TUnit::UNIT);
+
     // 1. init segment profile
     _segment_profile.reset(new RuntimeProfile("SegmentIterator"));
     _scanner_profile->add_child(_segment_profile.get(), true, nullptr);
@@ -507,6 +509,15 @@ Status 
OlapScanLocalState::_init_scanners(std::list<vectorized::ScannerSPtr>* sc
             RETURN_IF_ERROR(olap_scanner->init(state(), _conjuncts));
         }
 
+        const OlapReaderStatistics* stats = scanner_builder.builder_stats();
+        io::FileCacheProfileReporter cache_profile(_segment_profile.get());
+        cache_profile.update(&stats->file_cache_stats);
+
+        DorisMetrics::instance()->query_scan_bytes_from_local->increment(
+                stats->file_cache_stats.bytes_read_from_local);
+        DorisMetrics::instance()->query_scan_bytes_from_remote->increment(
+                stats->file_cache_stats.bytes_read_from_remote);
+
         return Status::OK();
     }
 
diff --git a/be/test/olap/rowid_conversion_test.cpp 
b/be/test/olap/rowid_conversion_test.cpp
index 54261f365ea..a07f1c4a0ab 100644
--- a/be/test/olap/rowid_conversion_test.cpp
+++ b/be/test/olap/rowid_conversion_test.cpp
@@ -383,7 +383,8 @@ protected:
         EXPECT_EQ(out_rowset->rowset_meta()->num_rows(), output_data.size());
         auto beta_rowset = std::dynamic_pointer_cast<BetaRowset>(out_rowset);
         std::vector<uint32_t> segment_num_rows;
-        EXPECT_TRUE(beta_rowset->get_segment_num_rows(&segment_num_rows).ok());
+        OlapReaderStatistics statistics;
+        EXPECT_TRUE(beta_rowset->get_segment_num_rows(&segment_num_rows, 
&statistics).ok());
         if (has_delete_handler) {
             // All keys less than 1000 are deleted by delete handler
             for (auto& item : output_data) {
diff --git a/be/test/olap/segcompaction_mow_test.cpp 
b/be/test/olap/segcompaction_mow_test.cpp
index aeb51508206..2a4302b6269 100644
--- a/be/test/olap/segcompaction_mow_test.cpp
+++ b/be/test/olap/segcompaction_mow_test.cpp
@@ -281,7 +281,8 @@ protected:
             EXPECT_EQ(num_rows_read, expect_total_rows - rows_mark_deleted);
             auto beta_rowset = std::dynamic_pointer_cast<BetaRowset>(rowset);
             std::vector<uint32_t> segment_num_rows;
-            
EXPECT_TRUE(beta_rowset->get_segment_num_rows(&segment_num_rows).ok());
+            OlapReaderStatistics stats;
+            EXPECT_TRUE(beta_rowset->get_segment_num_rows(&segment_num_rows, 
&stats).ok());
             size_t total_num_rows = 0;
             for (const auto& i : segment_num_rows) {
                 total_num_rows += i;
diff --git a/be/test/olap/segcompaction_test.cpp 
b/be/test/olap/segcompaction_test.cpp
index 4a1dfc63fd9..065d2119060 100644
--- a/be/test/olap/segcompaction_test.cpp
+++ b/be/test/olap/segcompaction_test.cpp
@@ -388,7 +388,8 @@ TEST_F(SegCompactionTest, SegCompactionThenRead) {
             EXPECT_EQ(num_rows_read, num_segments * rows_per_segment);
             auto beta_rowset = std::dynamic_pointer_cast<BetaRowset>(rowset);
             std::vector<uint32_t> segment_num_rows;
-            
EXPECT_TRUE(beta_rowset->get_segment_num_rows(&segment_num_rows).ok());
+            OlapReaderStatistics stats;
+            EXPECT_TRUE(beta_rowset->get_segment_num_rows(&segment_num_rows, 
&stats).ok());
             size_t total_num_rows = 0;
             for (const auto& i : segment_num_rows) {
                 total_num_rows += i;
@@ -894,7 +895,8 @@ TEST_F(SegCompactionTest, 
SegCompactionThenReadUniqueTableSmall) {
             EXPECT_GE(rowset->rowset_meta()->num_rows(), num_rows_read);
             auto beta_rowset = std::dynamic_pointer_cast<BetaRowset>(rowset);
             std::vector<uint32_t> segment_num_rows;
-            
EXPECT_TRUE(beta_rowset->get_segment_num_rows(&segment_num_rows).ok());
+            OlapReaderStatistics stats;
+            EXPECT_TRUE(beta_rowset->get_segment_num_rows(&segment_num_rows, 
&stats).ok());
             size_t total_num_rows = 0;
             for (const auto& i : segment_num_rows) {
                 total_num_rows += i;
@@ -1159,7 +1161,8 @@ TEST_F(SegCompactionTest, 
SegCompactionThenReadAggTableSmall) {
             EXPECT_GE(rowset->rowset_meta()->num_rows(), num_rows_read);
             auto beta_rowset = std::dynamic_pointer_cast<BetaRowset>(rowset);
             std::vector<uint32_t> segment_num_rows;
-            
EXPECT_TRUE(beta_rowset->get_segment_num_rows(&segment_num_rows).ok());
+            OlapReaderStatistics stats;
+            EXPECT_TRUE(beta_rowset->get_segment_num_rows(&segment_num_rows, 
&stats).ok());
             size_t total_num_rows = 0;
             for (const auto& i : segment_num_rows) {
                 total_num_rows += i;


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to