This is an automated email from the ASF dual-hosted git repository. dataroaring pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 171404228f [improvement](vertical compaction) cache segment in vertical compaction (#16101) 171404228f is described below commit 171404228f7b80d161c2aad6a3224234512c3c5a Author: yixiutt <102007456+yixi...@users.noreply.github.com> AuthorDate: Fri Jan 20 16:38:23 2023 +0800 [improvement](vertical compaction) cache segment in vertical compaction (#16101) 1.In vertical compaction, segments will be loaded for every column group, so we should cache segment ptr to avoid too many repeated io. 2.fix vertical compaction data size bug --- be/src/olap/rowset/beta_rowset_reader.cpp | 10 ++++++---- be/src/olap/rowset/beta_rowset_reader.h | 3 ++- be/src/olap/rowset/rowset_reader.h | 3 ++- be/src/olap/rowset/segment_v2/segment_writer.cpp | 3 +++ be/src/vec/olap/vertical_block_reader.cpp | 4 +++- 5 files changed, 16 insertions(+), 7 deletions(-) diff --git a/be/src/olap/rowset/beta_rowset_reader.cpp b/be/src/olap/rowset/beta_rowset_reader.cpp index 6612fff758..3b45b3cf3f 100644 --- a/be/src/olap/rowset/beta_rowset_reader.cpp +++ b/be/src/olap/rowset/beta_rowset_reader.cpp @@ -50,7 +50,8 @@ bool BetaRowsetReader::update_profile(RuntimeProfile* profile) { } Status BetaRowsetReader::get_segment_iterators(RowsetReaderContext* read_context, - std::vector<RowwiseIterator*>* out_iters) { + std::vector<RowwiseIterator*>* out_iters, + bool use_cache) { RETURN_NOT_OK(_rowset->load()); _context = read_context; if (_context->stats != nullptr) { @@ -163,9 +164,10 @@ Status BetaRowsetReader::get_segment_iterators(RowsetReaderContext* read_context _read_options.runtime_state = read_context->runtime_state; // load segments - RETURN_NOT_OK(SegmentLoader::instance()->load_segments( - _rowset, &_segment_cache_handle, - read_context->reader_type == ReaderType::READER_QUERY)); + // use cache is true when do vertica compaction + bool should_use_cache = use_cache || read_context->reader_type == ReaderType::READER_QUERY; + RETURN_NOT_OK(SegmentLoader::instance()->load_segments(_rowset, &_segment_cache_handle, + should_use_cache)); // create iterator for each segment std::vector<std::unique_ptr<RowwiseIterator>> seg_iterators; diff --git a/be/src/olap/rowset/beta_rowset_reader.h b/be/src/olap/rowset/beta_rowset_reader.h index 889a7c2742..dea6814558 100644 --- a/be/src/olap/rowset/beta_rowset_reader.h +++ b/be/src/olap/rowset/beta_rowset_reader.h @@ -34,7 +34,8 @@ public: Status init(RowsetReaderContext* read_context) override; Status get_segment_iterators(RowsetReaderContext* read_context, - std::vector<RowwiseIterator*>* out_iters) override; + std::vector<RowwiseIterator*>* out_iters, + bool use_cache = false) override; void reset_read_options() override; Status next_block(vectorized::Block* block) override; Status next_block_view(vectorized::BlockView* block_view) override; diff --git a/be/src/olap/rowset/rowset_reader.h b/be/src/olap/rowset/rowset_reader.h index d1601d379a..4186088fe5 100644 --- a/be/src/olap/rowset/rowset_reader.h +++ b/be/src/olap/rowset/rowset_reader.h @@ -44,7 +44,8 @@ public: virtual Status init(RowsetReaderContext* read_context) = 0; virtual Status get_segment_iterators(RowsetReaderContext* read_context, - std::vector<RowwiseIterator*>* out_iters) = 0; + std::vector<RowwiseIterator*>* out_iters, + bool use_cache = false) = 0; virtual void reset_read_options() = 0; virtual Status next_block(vectorized::Block* block) = 0; diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp b/be/src/olap/rowset/segment_v2/segment_writer.cpp index ec62749c5c..c3e066db72 100644 --- a/be/src/olap/rowset/segment_v2/segment_writer.cpp +++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp @@ -433,6 +433,9 @@ Status SegmentWriter::finalize_columns_index(uint64_t* index_size) { Status SegmentWriter::finalize_footer(uint64_t* segment_file_size) { RETURN_IF_ERROR(_write_footer()); + // finish + RETURN_IF_ERROR(_file_writer->finalize()); + *segment_file_size = _file_writer->bytes_appended(); return Status::OK(); } diff --git a/be/src/vec/olap/vertical_block_reader.cpp b/be/src/vec/olap/vertical_block_reader.cpp index dc7d3b58c6..897d001c0d 100644 --- a/be/src/vec/olap/vertical_block_reader.cpp +++ b/be/src/vec/olap/vertical_block_reader.cpp @@ -55,7 +55,9 @@ Status VerticalBlockReader::_get_segment_iterators(const ReaderParams& read_para _reader_context.is_vertical_compaction = true; for (auto& rs_reader : rs_readers) { // segment iterator will be inited here - RETURN_NOT_OK(rs_reader->get_segment_iterators(&_reader_context, segment_iters)); + // In vertical compaction, every group will load segment so we should cache + // segment to avoid tot many s3 head request + RETURN_NOT_OK(rs_reader->get_segment_iterators(&_reader_context, segment_iters, true)); // if segments overlapping, all segment iterator should be inited in // heap merge iterator. If segments are none overlapping, only first segment of this // rowset will be inited and push to heap, other segment will be inited later when current --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org