This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch branch-2.0 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push: new 74b5ef0980e [fix](read) remove logic of estimating count of rows to read in segment iterator to avoid wrong result of unique key (#29109) (#29110) 74b5ef0980e is described below commit 74b5ef0980e18cd3d0132199db4962955f08a8e7 Author: TengJianPing <18241664+jackte...@users.noreply.github.com> AuthorDate: Wed Dec 27 12:55:26 2023 +0800 [fix](read) remove logic of estimating count of rows to read in segment iterator to avoid wrong result of unique key (#29109) (#29110) --- be/src/olap/rowset/segment_v2/segment_iterator.cpp | 24 ---------------------- be/src/olap/rowset/segment_v2/segment_iterator.h | 5 ----- 2 files changed, 29 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index 4e3d6cfae9c..44ff03c9671 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -259,8 +259,6 @@ SegmentIterator::SegmentIterator(std::shared_ptr<Segment> segment, SchemaSPtr sc _lazy_materialization_read(false), _lazy_inited(false), _inited(false), - _estimate_row_size(true), - _wait_times_estimate_row_size(10), _pool(new ObjectPool) {} Status SegmentIterator::init(const StorageReadOptions& opts) { @@ -2034,13 +2032,6 @@ Status SegmentIterator::_next_batch_internal(vectorized::Block* block) { _current_batch_rows_read = 0; uint32_t nrows_read_limit = _opts.block_row_max; - if (_wait_times_estimate_row_size > 0) { - // first time, read 100 rows to estimate average row size, to avoid oom caused by a single batch being too large. - // If no valid data is read for the first time, block_row_max is read each time thereafter. - // Avoid low performance when valid data cannot be read all the time - nrows_read_limit = std::min(nrows_read_limit, (uint32_t)100); - _wait_times_estimate_row_size--; - } RETURN_IF_ERROR(_read_columns_by_index( nrows_read_limit, _current_batch_rows_read, _lazy_materialization_read || _opts.record_rowids || _is_need_expr_eval)); @@ -2180,9 +2171,6 @@ Status SegmentIterator::_next_batch_internal(vectorized::Block* block) { // shrink char_type suffix zero data block->shrink_char_type_column_suffix_zero(_char_type_idx); - if (UNLIKELY(_estimate_row_size) && block->rows() > 0) { - _update_max_row(block); - } return Status::OK(); } @@ -2208,10 +2196,6 @@ Status SegmentIterator::_next_batch_internal(vectorized::Block* block) { // shrink char_type suffix zero data block->shrink_char_type_column_suffix_zero(_char_type_idx); - if (UNLIKELY(_estimate_row_size) && block->rows() > 0) { - _update_max_row(block); - } - // reverse block row order if (_opts.read_orderby_key_reverse) { size_t num_rows = block->rows(); @@ -2362,14 +2346,6 @@ void SegmentIterator::_convert_dict_code_for_predicate_if_necessary_impl( } } -void SegmentIterator::_update_max_row(const vectorized::Block* block) { - _estimate_row_size = false; - auto avg_row_size = block->bytes() / block->rows(); - - int block_row_max = config::doris_scan_block_max_mb / avg_row_size; - _opts.block_row_max = std::min(block_row_max, _opts.block_row_max); -} - Status SegmentIterator::current_block_row_locations(std::vector<RowLocation>* block_row_locations) { DCHECK(_opts.record_rowids); DCHECK_GE(_block_rowids.size(), _current_batch_rows_read); diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.h b/be/src/olap/rowset/segment_v2/segment_iterator.h index b84189c106d..375747907fb 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.h +++ b/be/src/olap/rowset/segment_v2/segment_iterator.h @@ -251,8 +251,6 @@ private: void _convert_dict_code_for_predicate_if_necessary_impl(ColumnPredicate* predicate); - void _update_max_row(const vectorized::Block* block); - bool _check_apply_by_bitmap_index(ColumnPredicate* pred); bool _check_apply_by_inverted_index(ColumnPredicate* pred, bool pred_in_compound = false); @@ -385,9 +383,6 @@ private: // the actual init process is delayed to the first call to next_batch() bool _lazy_inited; bool _inited; - bool _estimate_row_size; - // Read up to 100 rows at a time while waiting for the estimated row size. - int _wait_times_estimate_row_size; StorageReadOptions _opts; // make a copy of `_opts.column_predicates` in order to make local changes --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org