mrhhsg commented on code in PR #59263:
URL: https://github.com/apache/doris/pull/59263#discussion_r3319069126


##########
be/src/storage/segment/column_reader.cpp:
##########
@@ -855,6 +855,25 @@ Status 
ColumnReader::new_struct_iterator(ColumnIteratorUPtr* iterator,
     return Status::OK();
 }
 
+void ColumnIterator::_convert_to_place_holder_column(MutableColumnPtr& dst, 
size_t count) {
+    if (_reading_flag != ReadingFlag::SKIP_READING && _reading_mode == 
ReadingMode::PREDICATE) {
+        _has_place_holder_column = true;
+    }
+
+    if (_reading_mode != ReadingMode::LAZY) {
+        dst->insert_many_defaults(count);
+    }
+}
+
+void ColumnIterator::_recovery_from_place_holder_column(MutableColumnPtr& dst) 
{
+    if (_reading_mode == ReadingMode::LAZY) {

Review Comment:
   已改成单个 `if (_reading_mode == ReadingMode::LAZY && _has_place_holder_column)` 
判断,避免嵌套 if。



##########
be/src/storage/segment/column_reader.cpp:
##########
@@ -882,6 +901,37 @@ Result<TColumnAccessPaths> 
ColumnIterator::_get_sub_access_paths(
     return sub_access_paths;
 }
 
+Result<TColumnAccessPaths> ColumnIterator::_process_sub_access_paths(
+        const TColumnAccessPaths& access_paths, const bool is_predicate) {
+    TColumnAccessPaths sub_access_paths = access_paths;

Review Comment:
   已把 `_get_sub_access_paths` 改成按值传参,直接在参数副本上处理,避免函数内再额外 copy。



##########
be/src/storage/segment/column_reader.h:
##########
@@ -424,18 +426,82 @@ class ColumnIterator {
     bool read_offset_only() const { return _read_mode == 
ReadMode::OFFSET_ONLY; }
     bool read_null_map_only() const { return _read_mode == 
ReadMode::NULL_MAP_ONLY; }
 
+    enum class ReadingMode : int {
+        NORMAL, // default mode
+        PREDICATE,
+        LAZY
+    };
+
+    virtual void set_reading_mode(ReadingMode mode) {
+        _reading_mode = mode;
+        if (mode == ReadingMode::PREDICATE) {
+            _has_place_holder_column = false;
+        }
+    }
+
+    virtual bool need_to_read() const {
+        switch (_reading_mode) {
+        case ReadingMode::NORMAL:
+            return _reading_flag != ReadingFlag::SKIP_READING;
+        case ReadingMode::PREDICATE:
+            return _reading_flag == ReadingFlag::READING_FOR_PREDICATE;
+        case ReadingMode::LAZY:
+            return _reading_flag == ReadingFlag::NEED_TO_READ;
+        default:
+            return false;
+        }
+    }
+
+    // Whether need to read meta columns, such as null map column, offset 
column.
+    bool need_to_read_meta_columns() const {
+        if (_reading_flag == ReadingFlag::SKIP_READING) {
+            return false;
+        }
+        switch (_reading_mode) {
+        case ReadingMode::NORMAL:
+        case ReadingMode::PREDICATE:
+            return true;
+        case ReadingMode::LAZY:
+            return _reading_flag != ReadingFlag::READING_FOR_PREDICATE;
+        }
+        return false;
+    }
+
+    virtual void finalize_lazy_mode(MutableColumnPtr& dst) {
+        _recovery_from_place_holder_column(dst);
+    }
+
+    virtual void set_reading_flag_recursively(ReadingFlag flag) { 
set_reading_flag(flag); }

Review Comment:
   这里保留递归命名并补了注释:`set_reading_flag()` 只改当前 
iterator,`set_reading_flag_recursively()` 用于一次性设置整棵子树;reading mode 
目前只有递归设置的使用场景,所以没有再增加一个不使用的非递归同名接口。



##########
be/src/storage/segment/column_reader.h:
##########
@@ -424,18 +426,82 @@ class ColumnIterator {
     bool read_offset_only() const { return _read_mode == 
ReadMode::OFFSET_ONLY; }
     bool read_null_map_only() const { return _read_mode == 
ReadMode::NULL_MAP_ONLY; }
 
+    enum class ReadingMode : int {
+        NORMAL, // default mode
+        PREDICATE,
+        LAZY
+    };
+
+    virtual void set_reading_mode(ReadingMode mode) {
+        _reading_mode = mode;
+        if (mode == ReadingMode::PREDICATE) {
+            _has_place_holder_column = false;
+        }
+    }
+
+    virtual bool need_to_read() const {
+        switch (_reading_mode) {
+        case ReadingMode::NORMAL:
+            return _reading_flag != ReadingFlag::SKIP_READING;
+        case ReadingMode::PREDICATE:
+            return _reading_flag == ReadingFlag::READING_FOR_PREDICATE;
+        case ReadingMode::LAZY:
+            return _reading_flag == ReadingFlag::NEED_TO_READ;
+        default:
+            return false;
+        }
+    }
+
+    // Whether need to read meta columns, such as null map column, offset 
column.
+    bool need_to_read_meta_columns() const {
+        if (_reading_flag == ReadingFlag::SKIP_READING) {
+            return false;
+        }
+        switch (_reading_mode) {
+        case ReadingMode::NORMAL:
+        case ReadingMode::PREDICATE:
+            return true;
+        case ReadingMode::LAZY:
+            return _reading_flag != ReadingFlag::READING_FOR_PREDICATE;
+        }
+        return false;
+    }
+
+    virtual void finalize_lazy_mode(MutableColumnPtr& dst) {
+        _recovery_from_place_holder_column(dst);
+    }
+
+    virtual void set_reading_flag_recursively(ReadingFlag flag) { 
set_reading_flag(flag); }
+
+    // Whether this iterator or any nested iterator has data that must be 
materialized
+    // in lazy mode. Predicate-only and meta-only branches are read before 
filtering and
+    // must not be re-read in the lazy phase.
+    virtual bool has_lazy_read_target() const { return _reading_flag == 
ReadingFlag::NEED_TO_READ; }
+
+    bool is_pruned() const { return _pruned; }

Review Comment:
   已删除冗余的 `is_pruned()` 条件,并在代码注释里说明:`reading_flag() == READING_FOR_PREDICATE 
&& has_lazy_read_target()` 已经隐含发生过 nested-column pruning。



##########
be/src/storage/segment/column_reader.h:
##########
@@ -424,18 +426,81 @@ class ColumnIterator {
     bool read_offset_only() const { return _read_mode == 
ReadMode::OFFSET_ONLY; }
     bool read_null_map_only() const { return _read_mode == 
ReadMode::NULL_MAP_ONLY; }
 
+    enum class ReadingMode : int {
+        NORMAL, // default mode
+        PREDICATE,
+        LAZY
+    };
+
+    virtual void set_reading_mode(ReadingMode mode) {
+        _reading_mode = mode;
+        if (mode == ReadingMode::PREDICATE) {
+            _has_place_holder_column = false;
+        }
+    }
+
+    virtual bool need_to_read() const {
+        switch (_reading_mode) {
+        case ReadingMode::NORMAL:
+            return _reading_flag != ReadingFlag::SKIP_READING;
+        case ReadingMode::PREDICATE:
+            return _reading_flag == ReadingFlag::READING_FOR_PREDICATE;
+        case ReadingMode::LAZY:
+            return _reading_flag == ReadingFlag::NEED_TO_READ;
+        default:
+            return false;
+        }
+    }
+
+    // Whether need to read meta columns, such as null map column, offset 
column.
+    bool need_to_read_meta_columns() const {
+        if (_reading_flag == ReadingFlag::SKIP_READING) {

Review Comment:
   已改为 `return ColumnIterator::need_to_read();`。这里是刻意调用基类实现,避免走 
Map/Struct/Array 在 LAZY 模式下包含 child target 的 override 逻辑;meta columns 是否追加只应由当前 
iterator 自身的 reading flag/mode 决定。



##########
be/src/storage/segment/column_reader.cpp:
##########
@@ -855,14 +855,34 @@ Status 
ColumnReader::new_struct_iterator(ColumnIteratorUPtr* iterator,
     return Status::OK();
 }
 
+void ColumnIterator::_convert_to_place_holder_column(MutableColumnPtr& dst, 
size_t count) {
+    if (_reading_mode == ReadingMode::LAZY) {
+        return;
+    } else if (_reading_flag != ReadingFlag::SKIP_READING &&

Review Comment:
   已将 placeholder 场景收敛为 `_reading_flag == 
ReadingFlag::NEED_TO_READ`,并补了注释说明:只有非 predicate、但 lazy 阶段还需要物化的列,才会在 predicate 
phase 插入 placeholder;`READING_FOR_PREDICATE` 正常读取,`SKIP/NORMAL` 不参与 lazy 
materialization。



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to