This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push:
new 9d5af7febd8 [opt](inverted index) Optimization of the initialization
process in topn (#38870)
9d5af7febd8 is described below
commit 9d5af7febd8d27596d0141e1e88eb23aec614a4a
Author: zzzxl <[email protected]>
AuthorDate: Mon Aug 5 18:26:00 2024 +0800
[opt](inverted index) Optimization of the initialization process in topn
(#38870)
pick https://github.com/apache/doris/pull/37722
---
be/src/olap/rowset/segment_v2/segment_iterator.cpp | 15 +++++++++------
be/src/olap/rowset/segment_v2/segment_iterator.h | 3 ++-
2 files changed, 11 insertions(+), 7 deletions(-)
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index e2ce44d4d5b..f1d77893222 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -1879,7 +1879,8 @@ Status SegmentIterator::_read_columns(const
std::vector<ColumnId>& column_ids,
}
Status SegmentIterator::_init_current_block(
- vectorized::Block* block, std::vector<vectorized::MutableColumnPtr>&
current_columns) {
+ vectorized::Block* block, std::vector<vectorized::MutableColumnPtr>&
current_columns,
+ uint32_t nrows_read_limit) {
block->clear_column_data(_schema->num_column_ids());
for (size_t i = 0; i < _schema->num_column_ids(); i++) {
@@ -1899,7 +1900,7 @@ Status SegmentIterator::_init_current_block(
column_desc->path() == nullptr ? "" :
column_desc->path()->get_path());
// TODO reuse
current_columns[cid] = file_column_type->create_column();
- current_columns[cid]->reserve(_opts.block_row_max);
+ current_columns[cid]->reserve(nrows_read_limit);
} else {
// the column in block must clear() here to insert new data
if (_is_pred_column[cid] ||
@@ -1918,7 +1919,7 @@ Status SegmentIterator::_init_current_block(
} else if (column_desc->type() ==
FieldType::OLAP_FIELD_TYPE_DATETIME) {
current_columns[cid]->set_datetime_type();
}
- current_columns[cid]->reserve(_opts.block_row_max);
+ current_columns[cid]->reserve(nrows_read_limit);
}
}
}
@@ -2332,14 +2333,16 @@ Status
SegmentIterator::_next_batch_internal(vectorized::Block* block) {
}
}
}
- RETURN_IF_ERROR(_init_current_block(block, _current_return_columns));
- _converted_column_ids.assign(_schema->columns().size(), 0);
- _current_batch_rows_read = 0;
uint32_t nrows_read_limit = _opts.block_row_max;
if (_can_opt_topn_reads()) {
nrows_read_limit = std::min(static_cast<uint32_t>(_opts.topn_limit),
nrows_read_limit);
}
+
+ RETURN_IF_ERROR(_init_current_block(block, _current_return_columns,
nrows_read_limit));
+ _converted_column_ids.assign(_schema->columns().size(), 0);
+
+ _current_batch_rows_read = 0;
RETURN_IF_ERROR(_read_columns_by_index(
nrows_read_limit, _current_batch_rows_read,
_lazy_materialization_read || _opts.record_rowids ||
_is_need_expr_eval));
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.h
b/be/src/olap/rowset/segment_v2/segment_iterator.h
index 6383a9435e8..a062216ab0b 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.h
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.h
@@ -222,7 +222,8 @@ private:
bool set_block_rowid);
void _replace_version_col(size_t num_rows);
Status _init_current_block(vectorized::Block* block,
- std::vector<vectorized::MutableColumnPtr>&
non_pred_vector);
+ std::vector<vectorized::MutableColumnPtr>&
non_pred_vector,
+ uint32_t nrows_read_limit);
uint16_t _evaluate_vectorization_predicate(uint16_t* sel_rowid_idx,
uint16_t selected_size);
uint16_t _evaluate_short_circuit_predicate(uint16_t* sel_rowid_idx,
uint16_t selected_size);
void _output_non_pred_columns(vectorized::Block* block);
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]