This is an automated email from the ASF dual-hosted git repository.
dataroaring pushed a commit to branch branch-3.0.5
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.0.5 by this push:
new 460460a55ec [enhancement](statistics) Add segment footer open stats to
profile (#50326) (#50416)
460460a55ec is described below
commit 460460a55ec6635346c2f699f72dcea28c3f6d6e
Author: Siyang Tang <[email protected]>
AuthorDate: Fri Apr 25 19:08:21 2025 +0800
[enhancement](statistics) Add segment footer open stats to profile (#50326)
(#50416)
### What problem does this PR solve?
pick: #50326
---
be/src/olap/rowset/beta_rowset.cpp | 5 +++--
be/src/olap/rowset/beta_rowset.h | 4 +++-
be/src/olap/rowset/beta_rowset_reader.cpp | 2 +-
be/src/olap/rowset/segment_v2/segment.cpp | 21 +++++++++++----------
be/src/olap/rowset/segment_v2/segment.h | 10 ++++++----
be/src/olap/segment_loader.cpp | 2 +-
6 files changed, 25 insertions(+), 19 deletions(-)
diff --git a/be/src/olap/rowset/beta_rowset.cpp
b/be/src/olap/rowset/beta_rowset.cpp
index 9f33f363e99..df936f5f4ae 100644
--- a/be/src/olap/rowset/beta_rowset.cpp
+++ b/be/src/olap/rowset/beta_rowset.cpp
@@ -163,7 +163,8 @@ Status BetaRowset::load_segments(int64_t seg_id_begin,
int64_t seg_id_end,
return Status::OK();
}
-Status BetaRowset::load_segment(int64_t seg_id, segment_v2::SegmentSharedPtr*
segment) {
+Status BetaRowset::load_segment(int64_t seg_id, segment_v2::SegmentSharedPtr*
segment,
+ OlapReaderStatistics* stats) {
auto fs = _rowset_meta->fs();
if (!fs) {
return Status::Error<INIT_FAILED>("get fs failed");
@@ -181,7 +182,7 @@ Status BetaRowset::load_segment(int64_t seg_id,
segment_v2::SegmentSharedPtr* se
auto s = segment_v2::Segment::open(fs, seg_path,
_rowset_meta->tablet_id(), seg_id, rowset_id(),
_schema, reader_options, segment,
-
_rowset_meta->inverted_index_file_info(seg_id));
+
_rowset_meta->inverted_index_file_info(seg_id), stats);
if (!s.ok()) {
LOG(WARNING) << "failed to open segment. " << seg_path << " under
rowset " << rowset_id()
<< " : " << s.to_string();
diff --git a/be/src/olap/rowset/beta_rowset.h b/be/src/olap/rowset/beta_rowset.h
index 0b22d122741..32d8f3500a8 100644
--- a/be/src/olap/rowset/beta_rowset.h
+++ b/be/src/olap/rowset/beta_rowset.h
@@ -26,6 +26,7 @@
#include <vector>
#include "common/status.h"
+#include "olap/olap_common.h"
#include "olap/rowset/rowset.h"
#include "olap/rowset/rowset_meta.h"
#include "olap/rowset/rowset_reader.h"
@@ -76,7 +77,8 @@ public:
Status load_segments(int64_t seg_id_begin, int64_t seg_id_end,
std::vector<segment_v2::SegmentSharedPtr>* segments);
- Status load_segment(int64_t seg_id, segment_v2::SegmentSharedPtr* segment);
+ Status load_segment(int64_t seg_id, segment_v2::SegmentSharedPtr* segment,
+ OlapReaderStatistics* stats = nullptr);
Status get_segments_size(std::vector<size_t>* segments_size);
diff --git a/be/src/olap/rowset/beta_rowset_reader.cpp
b/be/src/olap/rowset/beta_rowset_reader.cpp
index 9a4d71587a0..cd3994ec072 100644
--- a/be/src/olap/rowset/beta_rowset_reader.cpp
+++ b/be/src/olap/rowset/beta_rowset_reader.cpp
@@ -231,7 +231,7 @@ Status
BetaRowsetReader::get_segment_iterators(RowsetReaderContext* read_context
SCOPED_RAW_TIMER(&_stats->rowset_reader_load_segments_timer_ns);
RETURN_IF_ERROR(SegmentLoader::instance()->load_segments(
_rowset, &segment_cache_handle, should_use_cache,
- /*need_load_pk_index_and_bf*/ false));
+ /*need_load_pk_index_and_bf*/ false, _stats));
}
// create iterator for each segment
diff --git a/be/src/olap/rowset/segment_v2/segment.cpp
b/be/src/olap/rowset/segment_v2/segment.cpp
index 298f4684e84..86884d7adce 100644
--- a/be/src/olap/rowset/segment_v2/segment.cpp
+++ b/be/src/olap/rowset/segment_v2/segment.cpp
@@ -81,9 +81,9 @@ class InvertedIndexIterator;
Status Segment::open(io::FileSystemSPtr fs, const std::string& path, int64_t
tablet_id,
uint32_t segment_id, RowsetId rowset_id, TabletSchemaSPtr
tablet_schema,
const io::FileReaderOptions& reader_options,
std::shared_ptr<Segment>* output,
- InvertedIndexFileInfo idx_file_info) {
+ InvertedIndexFileInfo idx_file_info,
OlapReaderStatistics* stats) {
auto s = _open(fs, path, segment_id, rowset_id, tablet_schema,
reader_options, output,
- idx_file_info);
+ idx_file_info, stats);
if (!s.ok()) {
if (!config::is_cloud_mode()) {
auto res = ExecEnv::get_tablet(tablet_id);
@@ -101,14 +101,14 @@ Status Segment::open(io::FileSystemSPtr fs, const
std::string& path, int64_t tab
Status Segment::_open(io::FileSystemSPtr fs, const std::string& path, uint32_t
segment_id,
RowsetId rowset_id, TabletSchemaSPtr tablet_schema,
const io::FileReaderOptions& reader_options,
std::shared_ptr<Segment>* output,
- InvertedIndexFileInfo idx_file_info) {
+ InvertedIndexFileInfo idx_file_info,
OlapReaderStatistics* stats) {
io::FileReaderSPtr file_reader;
RETURN_IF_ERROR(fs->open_file(path, &file_reader, &reader_options));
std::shared_ptr<Segment> segment(
new Segment(segment_id, rowset_id, std::move(tablet_schema),
idx_file_info));
segment->_fs = fs;
segment->_file_reader = std::move(file_reader);
- auto st = segment->_open();
+ auto st = segment->_open(stats);
TEST_INJECTION_POINT_CALLBACK("Segment::open:corruption", &st);
if (st.is<ErrorCode::CORRUPTION>() &&
reader_options.cache_type == io::FileCachePolicy::FILE_BLOCK_CACHE) {
@@ -121,7 +121,7 @@ Status Segment::_open(io::FileSystemSPtr fs, const
std::string& path, uint32_t s
RETURN_IF_ERROR(fs->open_file(path, &file_reader, &reader_options));
segment->_file_reader = std::move(file_reader);
- st = segment->_open();
+ st = segment->_open(stats);
TEST_INJECTION_POINT_CALLBACK("Segment::open:corruption1", &st);
if (st.is<ErrorCode::CORRUPTION>()) { // corrupt again
LOG(WARNING) << "failed to try to read remote source file again
with cache support,"
@@ -134,7 +134,7 @@ Status Segment::_open(io::FileSystemSPtr fs, const
std::string& path, uint32_t s
opt.cache_type = io::FileCachePolicy::NO_CACHE; // skip cache
RETURN_IF_ERROR(fs->open_file(path, &file_reader, &opt));
segment->_file_reader = std::move(file_reader);
- st = segment->_open();
+ st = segment->_open(stats);
if (!st.ok()) {
LOG(WARNING) << "failed to try to read remote source file
directly,"
<< " file path: " << path
@@ -176,9 +176,9 @@ void Segment::update_metadata_size() {
_tracked_meta_mem_usage = _meta_mem_usage;
}
-Status Segment::_open() {
+Status Segment::_open(OlapReaderStatistics* stats) {
_footer_pb = std::make_unique<SegmentFooterPB>();
- RETURN_IF_ERROR(_parse_footer(_footer_pb.get()));
+ RETURN_IF_ERROR(_parse_footer(_footer_pb.get(), stats));
_pk_index_meta.reset(_footer_pb->has_primary_key_index_meta()
? new
PrimaryKeyIndexMetaPB(_footer_pb->primary_key_index_meta())
: nullptr);
@@ -387,7 +387,7 @@ Status Segment::_write_error_file(size_t file_size, size_t
offset, size_t bytes_
return Status::OK(); // already exists
};
-Status Segment::_parse_footer(SegmentFooterPB* footer) {
+Status Segment::_parse_footer(SegmentFooterPB* footer, OlapReaderStatistics*
stats) {
// Footer := SegmentFooterPB, FooterPBSize(4), FooterPBChecksum(4),
MagicNumber(4)
auto file_size = _file_reader->size();
if (file_size < 12) {
@@ -399,7 +399,8 @@ Status Segment::_parse_footer(SegmentFooterPB* footer) {
uint8_t fixed_buf[12];
size_t bytes_read = 0;
// TODO(plat1ko): Support session variable `enable_file_cache`
- io::IOContext io_ctx {.is_index_data = true};
+ io::IOContext io_ctx {.is_index_data = true,
+ .file_cache_stats = stats ? &stats->file_cache_stats
: nullptr};
RETURN_IF_ERROR(
_file_reader->read_at(file_size - 12, Slice(fixed_buf, 12),
&bytes_read, &io_ctx));
DCHECK_EQ(bytes_read, 12);
diff --git a/be/src/olap/rowset/segment_v2/segment.h
b/be/src/olap/rowset/segment_v2/segment.h
index b8478d60cd5..de024b30a99 100644
--- a/be/src/olap/rowset/segment_v2/segment.h
+++ b/be/src/olap/rowset/segment_v2/segment.h
@@ -82,7 +82,8 @@ public:
static Status open(io::FileSystemSPtr fs, const std::string& path, int64_t
tablet_id,
uint32_t segment_id, RowsetId rowset_id,
TabletSchemaSPtr tablet_schema,
const io::FileReaderOptions& reader_options,
- std::shared_ptr<Segment>* output, InvertedIndexFileInfo
idx_file_info = {});
+ std::shared_ptr<Segment>* output, InvertedIndexFileInfo
idx_file_info = {},
+ OlapReaderStatistics* stats = nullptr);
static io::UInt128Wrapper file_cache_key(std::string_view rowset_id,
uint32_t seg_id);
io::UInt128Wrapper file_cache_key() const {
@@ -220,10 +221,11 @@ private:
static Status _open(io::FileSystemSPtr fs, const std::string& path,
uint32_t segment_id,
RowsetId rowset_id, TabletSchemaSPtr tablet_schema,
const io::FileReaderOptions& reader_options,
- std::shared_ptr<Segment>* output,
InvertedIndexFileInfo idx_file_info);
+ std::shared_ptr<Segment>* output,
InvertedIndexFileInfo idx_file_info,
+ OlapReaderStatistics* stats = nullptr);
// open segment file and read the minimum amount of necessary information
(footer)
- Status _open();
- Status _parse_footer(SegmentFooterPB* footer);
+ Status _open(OlapReaderStatistics* stats);
+ Status _parse_footer(SegmentFooterPB* footer, OlapReaderStatistics* stats
= nullptr);
Status _create_column_readers(const SegmentFooterPB& footer);
Status _load_pk_bloom_filter(OlapReaderStatistics* stats);
ColumnReader* _get_column_reader(const TabletColumn& col);
diff --git a/be/src/olap/segment_loader.cpp b/be/src/olap/segment_loader.cpp
index 4240f7e250a..56af67f487a 100644
--- a/be/src/olap/segment_loader.cpp
+++ b/be/src/olap/segment_loader.cpp
@@ -71,7 +71,7 @@ Status SegmentLoader::load_segments(const
BetaRowsetSharedPtr& rowset,
}
// If the segment is not healthy, then will create a new segment and
will replace the unhealthy one in SegmentCache.
segment_v2::SegmentSharedPtr segment;
- RETURN_IF_ERROR(rowset->load_segment(i, &segment));
+ RETURN_IF_ERROR(rowset->load_segment(i, &segment, index_load_stats));
if (need_load_pk_index_and_bf) {
RETURN_IF_ERROR(segment->load_pk_index_and_bf(index_load_stats));
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]