This is an automated email from the ASF dual-hosted git repository. dataroaring pushed a commit to branch branch-1.2-lts in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-1.2-lts by this push: new a986c181c2 [improvement](vertical compaction) cache segment in vertical compaction (#16101) a986c181c2 is described below commit a986c181c2490fb405075a8bbee8fd7c1f2dd54f Author: yixiutt <102007456+yixi...@users.noreply.github.com> AuthorDate: Fri Jan 20 16:38:23 2023 +0800 [improvement](vertical compaction) cache segment in vertical compaction (#16101) 1.In vertical compaction, segments will be loaded for every column group, so we should cache segment ptr to avoid too many repeated io. 2.fix vertical compaction data size bug --- be/src/olap/rowset/beta_rowset_reader.cpp | 10 ++++++---- be/src/olap/rowset/beta_rowset_reader.h | 3 ++- be/src/olap/rowset/rowset_reader.h | 3 ++- be/src/olap/rowset/segment_v2/segment_writer.cpp | 1 + be/src/vec/olap/vertical_block_reader.cpp | 4 +++- 5 files changed, 14 insertions(+), 7 deletions(-) diff --git a/be/src/olap/rowset/beta_rowset_reader.cpp b/be/src/olap/rowset/beta_rowset_reader.cpp index eae3f403d1..8fb0fca4a9 100644 --- a/be/src/olap/rowset/beta_rowset_reader.cpp +++ b/be/src/olap/rowset/beta_rowset_reader.cpp @@ -45,7 +45,8 @@ void BetaRowsetReader::reset_read_options() { } Status BetaRowsetReader::get_segment_iterators(RowsetReaderContext* read_context, - std::vector<RowwiseIterator*>* out_iters) { + std::vector<RowwiseIterator*>* out_iters, + bool use_cache) { RETURN_NOT_OK(_rowset->load()); _context = read_context; if (_context->stats != nullptr) { @@ -162,9 +163,10 @@ Status BetaRowsetReader::get_segment_iterators(RowsetReaderContext* read_context _read_options.io_ctx.reader_type = read_context->reader_type; // load segments - RETURN_NOT_OK(SegmentLoader::instance()->load_segments( - _rowset, &_segment_cache_handle, - read_context->reader_type == ReaderType::READER_QUERY)); + // use cache is true when do vertica compaction + bool should_use_cache = use_cache || read_context->reader_type == ReaderType::READER_QUERY; + RETURN_NOT_OK(SegmentLoader::instance()->load_segments(_rowset, &_segment_cache_handle, + should_use_cache)); // create iterator for each segment std::vector<std::unique_ptr<RowwiseIterator>> seg_iterators; diff --git a/be/src/olap/rowset/beta_rowset_reader.h b/be/src/olap/rowset/beta_rowset_reader.h index e2f888cb12..d09cf5445f 100644 --- a/be/src/olap/rowset/beta_rowset_reader.h +++ b/be/src/olap/rowset/beta_rowset_reader.h @@ -36,7 +36,8 @@ public: Status init(RowsetReaderContext* read_context) override; Status get_segment_iterators(RowsetReaderContext* read_context, - std::vector<RowwiseIterator*>* out_iters) override; + std::vector<RowwiseIterator*>* out_iters, + bool use_cache = false) override; void reset_read_options() override; // It's ok, because we only get ref here, the block's owner is this reader. diff --git a/be/src/olap/rowset/rowset_reader.h b/be/src/olap/rowset/rowset_reader.h index a189ef73dc..98c423119a 100644 --- a/be/src/olap/rowset/rowset_reader.h +++ b/be/src/olap/rowset/rowset_reader.h @@ -45,7 +45,8 @@ public: virtual Status init(RowsetReaderContext* read_context) = 0; virtual Status get_segment_iterators(RowsetReaderContext* read_context, - std::vector<RowwiseIterator*>* out_iters) = 0; + std::vector<RowwiseIterator*>* out_iters, + bool use_cache = false) = 0; virtual void reset_read_options() = 0; // read next block data into *block. diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp b/be/src/olap/rowset/segment_v2/segment_writer.cpp index 1efe66f97a..025bfed484 100644 --- a/be/src/olap/rowset/segment_v2/segment_writer.cpp +++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp @@ -382,6 +382,7 @@ Status SegmentWriter::finalize_columns(uint64_t* index_size) { Status SegmentWriter::finalize_footer(uint64_t* segment_file_size) { RETURN_IF_ERROR(_write_footer()); + // finish RETURN_IF_ERROR(_file_writer->finalize()); *segment_file_size = _file_writer->bytes_appended(); return Status::OK(); diff --git a/be/src/vec/olap/vertical_block_reader.cpp b/be/src/vec/olap/vertical_block_reader.cpp index 1a1271b047..42abc8a898 100644 --- a/be/src/vec/olap/vertical_block_reader.cpp +++ b/be/src/vec/olap/vertical_block_reader.cpp @@ -53,7 +53,9 @@ Status VerticalBlockReader::_get_segment_iterators(const ReaderParams& read_para _reader_context.is_vertical_compaction = true; for (auto& rs_reader : rs_readers) { // segment iterator will be inited here - RETURN_NOT_OK(rs_reader->get_segment_iterators(&_reader_context, segment_iters)); + // In vertical compaction, every group will load segment so we should cache + // segment to avoid tot many s3 head request + RETURN_NOT_OK(rs_reader->get_segment_iterators(&_reader_context, segment_iters, true)); // if segments overlapping, all segment iterator should be inited in // heap merge iterator. If segments are none overlapping, only first segment of this // rowset will be inited and push to heap, other segment will be inited later when current --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org