This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-1.2-lts
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-1.2-lts by this push:
     new a986c181c2 [improvement](vertical compaction) cache segment in 
vertical compaction (#16101)
a986c181c2 is described below

commit a986c181c2490fb405075a8bbee8fd7c1f2dd54f
Author: yixiutt <102007456+yixi...@users.noreply.github.com>
AuthorDate: Fri Jan 20 16:38:23 2023 +0800

    [improvement](vertical compaction) cache segment in vertical compaction 
(#16101)
    
    1.In vertical compaction, segments will be loaded for every column group, so
    we should cache segment ptr to avoid too many repeated io.
    2.fix vertical compaction data size bug
---
 be/src/olap/rowset/beta_rowset_reader.cpp        | 10 ++++++----
 be/src/olap/rowset/beta_rowset_reader.h          |  3 ++-
 be/src/olap/rowset/rowset_reader.h               |  3 ++-
 be/src/olap/rowset/segment_v2/segment_writer.cpp |  1 +
 be/src/vec/olap/vertical_block_reader.cpp        |  4 +++-
 5 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/be/src/olap/rowset/beta_rowset_reader.cpp 
b/be/src/olap/rowset/beta_rowset_reader.cpp
index eae3f403d1..8fb0fca4a9 100644
--- a/be/src/olap/rowset/beta_rowset_reader.cpp
+++ b/be/src/olap/rowset/beta_rowset_reader.cpp
@@ -45,7 +45,8 @@ void BetaRowsetReader::reset_read_options() {
 }
 
 Status BetaRowsetReader::get_segment_iterators(RowsetReaderContext* 
read_context,
-                                               std::vector<RowwiseIterator*>* 
out_iters) {
+                                               std::vector<RowwiseIterator*>* 
out_iters,
+                                               bool use_cache) {
     RETURN_NOT_OK(_rowset->load());
     _context = read_context;
     if (_context->stats != nullptr) {
@@ -162,9 +163,10 @@ Status 
BetaRowsetReader::get_segment_iterators(RowsetReaderContext* read_context
     _read_options.io_ctx.reader_type = read_context->reader_type;
 
     // load segments
-    RETURN_NOT_OK(SegmentLoader::instance()->load_segments(
-            _rowset, &_segment_cache_handle,
-            read_context->reader_type == ReaderType::READER_QUERY));
+    // use cache is true when do vertica compaction
+    bool should_use_cache = use_cache || read_context->reader_type == 
ReaderType::READER_QUERY;
+    RETURN_NOT_OK(SegmentLoader::instance()->load_segments(_rowset, 
&_segment_cache_handle,
+                                                           should_use_cache));
 
     // create iterator for each segment
     std::vector<std::unique_ptr<RowwiseIterator>> seg_iterators;
diff --git a/be/src/olap/rowset/beta_rowset_reader.h 
b/be/src/olap/rowset/beta_rowset_reader.h
index e2f888cb12..d09cf5445f 100644
--- a/be/src/olap/rowset/beta_rowset_reader.h
+++ b/be/src/olap/rowset/beta_rowset_reader.h
@@ -36,7 +36,8 @@ public:
     Status init(RowsetReaderContext* read_context) override;
 
     Status get_segment_iterators(RowsetReaderContext* read_context,
-                                 std::vector<RowwiseIterator*>* out_iters) 
override;
+                                 std::vector<RowwiseIterator*>* out_iters,
+                                 bool use_cache = false) override;
     void reset_read_options() override;
 
     // It's ok, because we only get ref here, the block's owner is this reader.
diff --git a/be/src/olap/rowset/rowset_reader.h 
b/be/src/olap/rowset/rowset_reader.h
index a189ef73dc..98c423119a 100644
--- a/be/src/olap/rowset/rowset_reader.h
+++ b/be/src/olap/rowset/rowset_reader.h
@@ -45,7 +45,8 @@ public:
     virtual Status init(RowsetReaderContext* read_context) = 0;
 
     virtual Status get_segment_iterators(RowsetReaderContext* read_context,
-                                         std::vector<RowwiseIterator*>* 
out_iters) = 0;
+                                         std::vector<RowwiseIterator*>* 
out_iters,
+                                         bool use_cache = false) = 0;
     virtual void reset_read_options() = 0;
 
     // read next block data into *block.
diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp 
b/be/src/olap/rowset/segment_v2/segment_writer.cpp
index 1efe66f97a..025bfed484 100644
--- a/be/src/olap/rowset/segment_v2/segment_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp
@@ -382,6 +382,7 @@ Status SegmentWriter::finalize_columns(uint64_t* 
index_size) {
 
 Status SegmentWriter::finalize_footer(uint64_t* segment_file_size) {
     RETURN_IF_ERROR(_write_footer());
+    // finish
     RETURN_IF_ERROR(_file_writer->finalize());
     *segment_file_size = _file_writer->bytes_appended();
     return Status::OK();
diff --git a/be/src/vec/olap/vertical_block_reader.cpp 
b/be/src/vec/olap/vertical_block_reader.cpp
index 1a1271b047..42abc8a898 100644
--- a/be/src/vec/olap/vertical_block_reader.cpp
+++ b/be/src/vec/olap/vertical_block_reader.cpp
@@ -53,7 +53,9 @@ Status VerticalBlockReader::_get_segment_iterators(const 
ReaderParams& read_para
     _reader_context.is_vertical_compaction = true;
     for (auto& rs_reader : rs_readers) {
         // segment iterator will be inited here
-        RETURN_NOT_OK(rs_reader->get_segment_iterators(&_reader_context, 
segment_iters));
+        // In vertical compaction, every group will load segment so we should 
cache
+        // segment to avoid tot many s3 head request
+        RETURN_NOT_OK(rs_reader->get_segment_iterators(&_reader_context, 
segment_iters, true));
         // if segments overlapping, all segment iterator should be inited in
         // heap merge iterator. If segments are none overlapping, only first 
segment of this
         // rowset will be inited and push to heap, other segment will be 
inited later when current


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to