mrhhsg commented on code in PR #59263:
URL: https://github.com/apache/doris/pull/59263#discussion_r3319070648
##########
be/src/storage/segment/column_reader.cpp:
##########
@@ -997,8 +1035,22 @@ Status MapFileColumnIterator::next_batch(size_t* n,
MutableColumnPtr& dst, bool*
auto& column_map = assert_cast<ColumnMap&, TypeCheckOnRelease::DISABLE>(
dst->is_nullable() ?
static_cast<ColumnNullable&>(*dst).get_nested_column() : *dst);
- auto column_offsets_ptr =
IColumn::mutate(std::move(column_map.get_offsets_ptr()));
- Defer defer_offsets {[&] { column_map.get_offsets_ptr() =
std::move(column_offsets_ptr); }};
+ const bool read_meta_columns = need_to_read_meta_columns();
+ MutableColumnPtr column_offsets_ptr;
+ if (read_meta_columns) {
+ column_offsets_ptr =
IColumn::mutate(std::move(column_map.get_offsets_ptr()));
+ } else {
+ const auto base_offset =
Review Comment:
这里没有直接复用 `dst` offsets:`dst` offsets 只描述 predicate/filter 后的输出形态,不包含当前
source key/value ordinal;lazy 阶段补读子列时仍需要从 storage offsets 计算要 seek 的 source
element range。代码里已补充注释说明这个原因。
##########
be/src/storage/segment/column_reader.cpp:
##########
@@ -1081,39 +1135,70 @@ Status MapFileColumnIterator::read_by_rowids(const
rowid_t* rowids, const size_t
if (count == 0) {
return Status::OK();
}
+
// resolve ColumnMap and nullable wrapper
auto& column_map = assert_cast<ColumnMap&, TypeCheckOnRelease::DISABLE>(
dst->is_nullable() ?
static_cast<ColumnNullable&>(*dst).get_nested_column() : *dst);
- auto offsets_ptr =
IColumn::mutate(std::move(column_map.get_offsets_ptr()));
- Defer defer_offsets {[&] { column_map.get_offsets_ptr() =
std::move(offsets_ptr); }};
+ const bool read_meta_columns = need_to_read_meta_columns();
+ MutableColumnPtr offsets_ptr;
+ if (read_meta_columns) {
+ offsets_ptr = IColumn::mutate(std::move(column_map.get_offsets_ptr()));
+ } else {
+ const auto base_offset =
+ column_map.get_offsets().empty() ? 0 :
column_map.get_offsets().back();
+ offsets_ptr = ColumnMap::COffsets::create();
+ assert_cast<ColumnOffset64&, TypeCheckOnRelease::DISABLE>(*offsets_ptr)
+ .insert_value(base_offset);
+ }
+ Defer defer_offsets {[&] {
+ if (read_meta_columns) {
+ column_map.get_offsets_ptr() = std::move(offsets_ptr);
+ }
+ }};
auto& offsets = static_cast<ColumnArray::ColumnOffsets&>(*offsets_ptr);
size_t base = offsets.get_data().empty() ? 0 : offsets.get_data().back();
// 1. bulk read null-map if nullable
std::vector<uint8_t> null_mask; // 0: not null, 1: null
- if (_map_reader->is_nullable()) {
- // For nullable map columns, the destination column must also be
nullable.
+ if (read_meta_columns) {
+ if (_map_reader->is_nullable()) {
+ // For nullable map columns, the destination column must also be
nullable.
+ if (UNLIKELY(!dst->is_nullable())) {
+ return Status::InternalError(
+ "unexpected non-nullable destination column for
nullable map reader");
+ }
+ MutableColumnPtr null_map_ptr =
+
static_cast<ColumnNullable&>(*dst).get_null_map_column_ptr();
+ size_t null_before = null_map_ptr->size();
+ RETURN_IF_ERROR(_null_iterator->read_by_rowids(rowids, count,
null_map_ptr));
+ // extract a light-weight view to decide element reads
+ auto& null_map_col = assert_cast<ColumnUInt8&>(*null_map_ptr);
+ const auto* src = null_map_col.get_data().data() + null_before;
+ null_mask.assign(src, src + count);
+ } else if (dst->is_nullable()) {
+ // in not-null to null linked-schemachange mode,
+ // actually we do not change dat data include meta in footer,
+ // so may dst from changed meta which is nullable but old data is
not nullable,
+ // if so, we should set null_map to all null by default
+ MutableColumnPtr null_map_ptr =
+
static_cast<ColumnNullable&>(*dst).get_null_map_column_ptr();
+ auto& null_map = assert_cast<ColumnUInt8&>(*null_map_ptr);
+ null_map.insert_many_vals(0, count);
+ }
+ } else if (_map_reader->is_nullable()) {
+ // In lazy mode the parent null map has already been materialized
during predicate read.
+ // Read a temporary null map here only to compute child element ranges
correctly.
if (UNLIKELY(!dst->is_nullable())) {
return Status::InternalError(
"unexpected non-nullable destination column for nullable
map reader");
}
- auto null_map_ptr =
static_cast<ColumnNullable&>(*dst).get_null_map_column_ptr();
- size_t null_before = null_map_ptr->size();
- auto* null_map_col = null_map_ptr.get();
- MutableColumnPtr null_map_column = std::move(null_map_ptr);
- RETURN_IF_ERROR(_null_iterator->read_by_rowids(rowids, count,
null_map_column));
- // extract a light-weight view to decide element reads
- null_mask.reserve(count);
- for (size_t i = 0; i < count; ++i) {
- null_mask.push_back(null_map_col->get_element(null_before + i));
- }
- } else if (dst->is_nullable()) {
- // in not-null to null linked-schemachange mode,
- // actually we do not change dat data include meta in footer,
- // so may dst from changed meta which is nullable but old data is not
nullable,
- // if so, we should set null_map to all null by default
- auto null_map_ptr =
static_cast<ColumnNullable&>(*dst).get_null_map_column_ptr();
- null_map_ptr->insert_many_vals(0, count);
+
+ MutableColumnPtr null_map_ptr = ColumnVector<TYPE_BOOLEAN>::create();
Review Comment:
已改为复用 `dst` 中已经物化并过滤后的 null map,避免再通过 `_null_iterator` 重新读取同一份 meta column。
##########
be/src/storage/segment/segment_iterator.cpp:
##########
@@ -2841,8 +2855,19 @@ Status
SegmentIterator::_read_columns_by_rowids(std::vector<ColumnId>& read_colu
"SegmentIterator meet invalid column, return columns size
{}, cid {}",
_current_return_columns.size(), cid);
}
- RETURN_IF_ERROR(_column_iterators[cid]->read_by_rowids(rowids.data(),
select_size,
-
_current_return_columns[cid]));
+
+ auto* column_iter = _column_iterators[cid].get();
+ if (read_for_predicate &&
_support_lazy_read_pruned_columns.contains(cid)) {
+
column_iter->set_reading_mode(ColumnIterator::ReadingMode::PREDICATE);
+ } else {
+ column_iter->set_reading_mode(ColumnIterator::ReadingMode::NORMAL);
+ }
+ Defer defer {[column_iter]() {
Review Comment:
已新增 `ScopedColumnIteratorReadingMode`,统一封装临时 reading mode 设置和恢复,并替换多处
open-coded `Defer`。
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]