This is an automated email from the ASF dual-hosted git repository.
dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 1b890338f88 [fix](cache stat) add builder stat to scanner (#56165)
1b890338f88 is described below
commit 1b890338f88d641f7d2b2281ebed529849ce47ec
Author: Yongqiang YANG <[email protected]>
AuthorDate: Thu Sep 18 22:35:48 2025 +0800
[fix](cache stat) add builder stat to scanner (#56165)
---
be/src/olap/parallel_scanner_builder.cpp | 2 +-
be/src/olap/parallel_scanner_builder.h | 3 +++
be/src/olap/rowset/beta_rowset.cpp | 7 ++++---
be/src/olap/rowset/beta_rowset.h | 3 ++-
be/src/olap/rowset/beta_rowset_reader.cpp | 2 +-
be/src/pipeline/exec/olap_scan_operator.cpp | 11 +++++++++++
be/test/olap/rowid_conversion_test.cpp | 3 ++-
be/test/olap/segcompaction_mow_test.cpp | 3 ++-
be/test/olap/segcompaction_test.cpp | 9 ++++++---
9 files changed, 32 insertions(+), 11 deletions(-)
diff --git a/be/src/olap/parallel_scanner_builder.cpp
b/be/src/olap/parallel_scanner_builder.cpp
index 8514b96f09e..6b3940bcb87 100644
--- a/be/src/olap/parallel_scanner_builder.cpp
+++ b/be/src/olap/parallel_scanner_builder.cpp
@@ -226,7 +226,7 @@ Status ParallelScannerBuilder::_load() {
auto beta_rowset = std::dynamic_pointer_cast<BetaRowset>(rowset);
std::vector<uint32_t> segment_rows;
- RETURN_IF_ERROR(beta_rowset->get_segment_num_rows(&segment_rows));
+ RETURN_IF_ERROR(beta_rowset->get_segment_num_rows(&segment_rows,
&_builder_stats));
auto segment_count = rowset->num_segments();
for (int64_t i = 0; i != segment_count; i++) {
_all_segments_rows[rowset_id].emplace_back(segment_rows[i]);
diff --git a/be/src/olap/parallel_scanner_builder.h
b/be/src/olap/parallel_scanner_builder.h
index 225c27e66d7..35839884757 100644
--- a/be/src/olap/parallel_scanner_builder.h
+++ b/be/src/olap/parallel_scanner_builder.h
@@ -66,6 +66,8 @@ public:
void set_optimize_index_scan_parallelism(bool v) {
_optimize_index_scan_parallelism = v; }
+ const OlapReaderStatistics* builder_stats() const { return
&_builder_stats; }
+
private:
Status _load();
@@ -96,6 +98,7 @@ private:
bool _optimize_index_scan_parallelism {false};
std::shared_ptr<RuntimeProfile> _scanner_profile;
+ OlapReaderStatistics _builder_stats;
RuntimeState* _state;
int64_t _limit;
bool _is_dup_mow_key;
diff --git a/be/src/olap/rowset/beta_rowset.cpp
b/be/src/olap/rowset/beta_rowset.cpp
index 0269346df87..3032e0b7cbc 100644
--- a/be/src/olap/rowset/beta_rowset.cpp
+++ b/be/src/olap/rowset/beta_rowset.cpp
@@ -71,19 +71,20 @@ Status BetaRowset::init() {
return Status::OK(); // no op
}
-Status BetaRowset::get_segment_num_rows(std::vector<uint32_t>* segment_rows) {
+Status BetaRowset::get_segment_num_rows(std::vector<uint32_t>* segment_rows,
+ OlapReaderStatistics* read_stats) {
// `ROWSET_UNLOADING` is state for closed() called but owned by some
readers.
// So here `ROWSET_UNLOADING` is allowed.
DCHECK_NE(_rowset_state_machine.rowset_state(), ROWSET_UNLOADED);
- RETURN_IF_ERROR(_load_segment_rows_once.call([this] {
+ RETURN_IF_ERROR(_load_segment_rows_once.call([this, read_stats] {
auto segment_count = num_segments();
_segments_rows.resize(segment_count);
for (int64_t i = 0; i != segment_count; ++i) {
SegmentCacheHandle segment_cache_handle;
RETURN_IF_ERROR(SegmentLoader::instance()->load_segment(
std::static_pointer_cast<BetaRowset>(shared_from_this()),
i,
- &segment_cache_handle, false, false));
+ &segment_cache_handle, false, false, read_stats));
const auto& tmp_segments = segment_cache_handle.get_segments();
_segments_rows[i] = tmp_segments[0]->num_rows();
}
diff --git a/be/src/olap/rowset/beta_rowset.h b/be/src/olap/rowset/beta_rowset.h
index f3a8230383d..f0cd5c966cd 100644
--- a/be/src/olap/rowset/beta_rowset.h
+++ b/be/src/olap/rowset/beta_rowset.h
@@ -91,7 +91,8 @@ public:
Status show_nested_index_file(rapidjson::Value* rowset_value,
rapidjson::Document::AllocatorType&
allocator);
- Status get_segment_num_rows(std::vector<uint32_t>* segment_rows);
+ Status get_segment_num_rows(std::vector<uint32_t>* segment_rows,
+ OlapReaderStatistics* read_stats);
protected:
BetaRowset(const TabletSchemaSPtr& schema, const RowsetMetaSharedPtr&
rowset_meta,
diff --git a/be/src/olap/rowset/beta_rowset_reader.cpp
b/be/src/olap/rowset/beta_rowset_reader.cpp
index 0d2dccf5063..2a2eea96ceb 100644
--- a/be/src/olap/rowset/beta_rowset_reader.cpp
+++ b/be/src/olap/rowset/beta_rowset_reader.cpp
@@ -246,7 +246,7 @@ Status
BetaRowsetReader::get_segment_iterators(RowsetReaderContext* read_context
if (_read_context->record_rowids && _read_context->rowid_conversion) {
// init segment rowid map for rowid conversion
std::vector<uint32_t> segment_rows;
- RETURN_IF_ERROR(_rowset->get_segment_num_rows(&segment_rows));
+ RETURN_IF_ERROR(_rowset->get_segment_num_rows(&segment_rows, _stats));
RETURN_IF_ERROR(_read_context->rowid_conversion->init_segment_map(rowset()->rowset_id(),
segment_rows));
}
diff --git a/be/src/pipeline/exec/olap_scan_operator.cpp
b/be/src/pipeline/exec/olap_scan_operator.cpp
index 1a33a3a7e77..16870de81a9 100644
--- a/be/src/pipeline/exec/olap_scan_operator.cpp
+++ b/be/src/pipeline/exec/olap_scan_operator.cpp
@@ -27,6 +27,7 @@
#include "cloud/cloud_tablet.h"
#include "cloud/cloud_tablet_hotspot.h"
#include "cloud/config.h"
+#include "io/cache/block_file_cache_profile.h"
#include "olap/parallel_scanner_builder.h"
#include "olap/rowset/segment_v2/ann_index/ann_topn_runtime.h"
#include "olap/storage_engine.h"
@@ -87,6 +88,7 @@ Status OlapScanLocalState::_init_profile() {
// Rows read from storage.
// Include the rows read from doris page cache.
_scan_rows = ADD_COUNTER(custom_profile(), "ScanRows", TUnit::UNIT);
+
// 1. init segment profile
_segment_profile.reset(new RuntimeProfile("SegmentIterator"));
_scanner_profile->add_child(_segment_profile.get(), true, nullptr);
@@ -507,6 +509,15 @@ Status
OlapScanLocalState::_init_scanners(std::list<vectorized::ScannerSPtr>* sc
RETURN_IF_ERROR(olap_scanner->init(state(), _conjuncts));
}
+ const OlapReaderStatistics* stats = scanner_builder.builder_stats();
+ io::FileCacheProfileReporter cache_profile(_segment_profile.get());
+ cache_profile.update(&stats->file_cache_stats);
+
+ DorisMetrics::instance()->query_scan_bytes_from_local->increment(
+ stats->file_cache_stats.bytes_read_from_local);
+ DorisMetrics::instance()->query_scan_bytes_from_remote->increment(
+ stats->file_cache_stats.bytes_read_from_remote);
+
return Status::OK();
}
diff --git a/be/test/olap/rowid_conversion_test.cpp
b/be/test/olap/rowid_conversion_test.cpp
index 54261f365ea..a07f1c4a0ab 100644
--- a/be/test/olap/rowid_conversion_test.cpp
+++ b/be/test/olap/rowid_conversion_test.cpp
@@ -383,7 +383,8 @@ protected:
EXPECT_EQ(out_rowset->rowset_meta()->num_rows(), output_data.size());
auto beta_rowset = std::dynamic_pointer_cast<BetaRowset>(out_rowset);
std::vector<uint32_t> segment_num_rows;
- EXPECT_TRUE(beta_rowset->get_segment_num_rows(&segment_num_rows).ok());
+ OlapReaderStatistics statistics;
+ EXPECT_TRUE(beta_rowset->get_segment_num_rows(&segment_num_rows,
&statistics).ok());
if (has_delete_handler) {
// All keys less than 1000 are deleted by delete handler
for (auto& item : output_data) {
diff --git a/be/test/olap/segcompaction_mow_test.cpp
b/be/test/olap/segcompaction_mow_test.cpp
index aeb51508206..2a4302b6269 100644
--- a/be/test/olap/segcompaction_mow_test.cpp
+++ b/be/test/olap/segcompaction_mow_test.cpp
@@ -281,7 +281,8 @@ protected:
EXPECT_EQ(num_rows_read, expect_total_rows - rows_mark_deleted);
auto beta_rowset = std::dynamic_pointer_cast<BetaRowset>(rowset);
std::vector<uint32_t> segment_num_rows;
-
EXPECT_TRUE(beta_rowset->get_segment_num_rows(&segment_num_rows).ok());
+ OlapReaderStatistics stats;
+ EXPECT_TRUE(beta_rowset->get_segment_num_rows(&segment_num_rows,
&stats).ok());
size_t total_num_rows = 0;
for (const auto& i : segment_num_rows) {
total_num_rows += i;
diff --git a/be/test/olap/segcompaction_test.cpp
b/be/test/olap/segcompaction_test.cpp
index 4a1dfc63fd9..065d2119060 100644
--- a/be/test/olap/segcompaction_test.cpp
+++ b/be/test/olap/segcompaction_test.cpp
@@ -388,7 +388,8 @@ TEST_F(SegCompactionTest, SegCompactionThenRead) {
EXPECT_EQ(num_rows_read, num_segments * rows_per_segment);
auto beta_rowset = std::dynamic_pointer_cast<BetaRowset>(rowset);
std::vector<uint32_t> segment_num_rows;
-
EXPECT_TRUE(beta_rowset->get_segment_num_rows(&segment_num_rows).ok());
+ OlapReaderStatistics stats;
+ EXPECT_TRUE(beta_rowset->get_segment_num_rows(&segment_num_rows,
&stats).ok());
size_t total_num_rows = 0;
for (const auto& i : segment_num_rows) {
total_num_rows += i;
@@ -894,7 +895,8 @@ TEST_F(SegCompactionTest,
SegCompactionThenReadUniqueTableSmall) {
EXPECT_GE(rowset->rowset_meta()->num_rows(), num_rows_read);
auto beta_rowset = std::dynamic_pointer_cast<BetaRowset>(rowset);
std::vector<uint32_t> segment_num_rows;
-
EXPECT_TRUE(beta_rowset->get_segment_num_rows(&segment_num_rows).ok());
+ OlapReaderStatistics stats;
+ EXPECT_TRUE(beta_rowset->get_segment_num_rows(&segment_num_rows,
&stats).ok());
size_t total_num_rows = 0;
for (const auto& i : segment_num_rows) {
total_num_rows += i;
@@ -1159,7 +1161,8 @@ TEST_F(SegCompactionTest,
SegCompactionThenReadAggTableSmall) {
EXPECT_GE(rowset->rowset_meta()->num_rows(), num_rows_read);
auto beta_rowset = std::dynamic_pointer_cast<BetaRowset>(rowset);
std::vector<uint32_t> segment_num_rows;
-
EXPECT_TRUE(beta_rowset->get_segment_num_rows(&segment_num_rows).ok());
+ OlapReaderStatistics stats;
+ EXPECT_TRUE(beta_rowset->get_segment_num_rows(&segment_num_rows,
&stats).ok());
size_t total_num_rows = 0;
for (const auto& i : segment_num_rows) {
total_num_rows += i;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]