This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new 74b5ef0980e [fix](read) remove logic of estimating count of rows to 
read in segment iterator to avoid wrong result of unique key (#29109) (#29110)
74b5ef0980e is described below

commit 74b5ef0980e18cd3d0132199db4962955f08a8e7
Author: TengJianPing <18241664+jackte...@users.noreply.github.com>
AuthorDate: Wed Dec 27 12:55:26 2023 +0800

    [fix](read) remove logic of estimating count of rows to read in segment 
iterator to avoid wrong result of unique key (#29109) (#29110)
---
 be/src/olap/rowset/segment_v2/segment_iterator.cpp | 24 ----------------------
 be/src/olap/rowset/segment_v2/segment_iterator.h   |  5 -----
 2 files changed, 29 deletions(-)

diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp 
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index 4e3d6cfae9c..44ff03c9671 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -259,8 +259,6 @@ SegmentIterator::SegmentIterator(std::shared_ptr<Segment> 
segment, SchemaSPtr sc
           _lazy_materialization_read(false),
           _lazy_inited(false),
           _inited(false),
-          _estimate_row_size(true),
-          _wait_times_estimate_row_size(10),
           _pool(new ObjectPool) {}
 
 Status SegmentIterator::init(const StorageReadOptions& opts) {
@@ -2034,13 +2032,6 @@ Status 
SegmentIterator::_next_batch_internal(vectorized::Block* block) {
 
     _current_batch_rows_read = 0;
     uint32_t nrows_read_limit = _opts.block_row_max;
-    if (_wait_times_estimate_row_size > 0) {
-        // first time, read 100 rows to estimate average row size, to avoid 
oom caused by a single batch being too large.
-        // If no valid data is read for the first time, block_row_max is read 
each time thereafter.
-        // Avoid low performance when valid data cannot be read all the time
-        nrows_read_limit = std::min(nrows_read_limit, (uint32_t)100);
-        _wait_times_estimate_row_size--;
-    }
     RETURN_IF_ERROR(_read_columns_by_index(
             nrows_read_limit, _current_batch_rows_read,
             _lazy_materialization_read || _opts.record_rowids || 
_is_need_expr_eval));
@@ -2180,9 +2171,6 @@ Status 
SegmentIterator::_next_batch_internal(vectorized::Block* block) {
             // shrink char_type suffix zero data
             block->shrink_char_type_column_suffix_zero(_char_type_idx);
 
-            if (UNLIKELY(_estimate_row_size) && block->rows() > 0) {
-                _update_max_row(block);
-            }
             return Status::OK();
         }
 
@@ -2208,10 +2196,6 @@ Status 
SegmentIterator::_next_batch_internal(vectorized::Block* block) {
     // shrink char_type suffix zero data
     block->shrink_char_type_column_suffix_zero(_char_type_idx);
 
-    if (UNLIKELY(_estimate_row_size) && block->rows() > 0) {
-        _update_max_row(block);
-    }
-
     // reverse block row order
     if (_opts.read_orderby_key_reverse) {
         size_t num_rows = block->rows();
@@ -2362,14 +2346,6 @@ void 
SegmentIterator::_convert_dict_code_for_predicate_if_necessary_impl(
     }
 }
 
-void SegmentIterator::_update_max_row(const vectorized::Block* block) {
-    _estimate_row_size = false;
-    auto avg_row_size = block->bytes() / block->rows();
-
-    int block_row_max = config::doris_scan_block_max_mb / avg_row_size;
-    _opts.block_row_max = std::min(block_row_max, _opts.block_row_max);
-}
-
 Status SegmentIterator::current_block_row_locations(std::vector<RowLocation>* 
block_row_locations) {
     DCHECK(_opts.record_rowids);
     DCHECK_GE(_block_rowids.size(), _current_batch_rows_read);
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.h 
b/be/src/olap/rowset/segment_v2/segment_iterator.h
index b84189c106d..375747907fb 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.h
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.h
@@ -251,8 +251,6 @@ private:
 
     void _convert_dict_code_for_predicate_if_necessary_impl(ColumnPredicate* 
predicate);
 
-    void _update_max_row(const vectorized::Block* block);
-
     bool _check_apply_by_bitmap_index(ColumnPredicate* pred);
     bool _check_apply_by_inverted_index(ColumnPredicate* pred, bool 
pred_in_compound = false);
 
@@ -385,9 +383,6 @@ private:
     // the actual init process is delayed to the first call to next_batch()
     bool _lazy_inited;
     bool _inited;
-    bool _estimate_row_size;
-    // Read up to 100 rows at a time while waiting for the estimated row size.
-    int _wait_times_estimate_row_size;
 
     StorageReadOptions _opts;
     // make a copy of `_opts.column_predicates` in order to make local changes


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to