Re: [PR] [Feature-Variant](Variant Type) support variant type query and index [doris]

via GitHub Thu, 09 Nov 2023 23:57:51 -0800


github-actions[bot] commented on code in PR #26749:
URL: https://github.com/apache/doris/pull/26749#discussion_r1389037811



##########
be/src/olap/match_predicate.cpp:
##########
@@ -39,34 +44,37 @@ PredicateType MatchPredicate::type() const {
     return PredicateType::MATCH;
 }
 
-Status MatchPredicate::evaluate(const Schema& schema, InvertedIndexIterator* 
iterator,
-                                uint32_t num_rows, roaring::Roaring* bitmap) 
const {
+Status MatchPredicate::evaluate(const vectorized::NameAndTypePair& 
name_with_type,
+                                InvertedIndexIterator* iterator, uint32_t 
num_rows,
+                                roaring::Roaring* bitmap) const {
     if (iterator == nullptr) {
         return Status::OK();
     }
     if (_skip_evaluate(iterator)) {
         return Status::Error<ErrorCode::INVERTED_INDEX_EVALUATE_SKIPPED>(
                 "match predicate evaluate skipped.");
     }
-    auto column_desc = schema.column(_column_id);
+    auto type = name_with_type.second;
+    const std::string& name = name_with_type.first;
     roaring::Roaring roaring;
     auto inverted_index_query_type = 
_to_inverted_index_query_type(_match_type);
-
-    if (is_string_type(column_desc->type()) ||
-        (column_desc->type() == FieldType::OLAP_FIELD_TYPE_ARRAY &&
-         is_string_type(column_desc->get_sub_field(0)->type_info()->type()))) {
+    TypeDescriptor column_desc = type->get_type_as_type_descriptor();
+    if (is_string_type(column_desc.type)) {
         StringRef match_value;
         int32_t length = _value.length();
         char* buffer = const_cast<char*>(_value.c_str());
         match_value.replace(buffer, length); //is it safe?
         RETURN_IF_ERROR(iterator->read_from_inverted_index(
-                column_desc->name(), &match_value, inverted_index_query_type, 
num_rows, &roaring));
-    } else if (column_desc->type() == FieldType::OLAP_FIELD_TYPE_ARRAY &&
-               
is_numeric_type(column_desc->get_sub_field(0)->type_info()->type())) {
-        char buf[column_desc->get_sub_field(0)->type_info()->size()];
-        RETURN_IF_ERROR(column_desc->get_sub_field(0)->from_string(buf, 
_value));
-        RETURN_IF_ERROR(iterator->read_from_inverted_index(
-                column_desc->name(), buf, inverted_index_query_type, num_rows, 
&roaring, true));
+                name, &match_value, inverted_index_query_type, num_rows, 
&roaring));
+    } else if (column_desc.type == TYPE_ARRAY &&
+               is_numeric_type(
+                       
TabletColumn::get_field_type_by_type(column_desc.children[0].type))) {
+        char buf[column_desc.children[0].len];

Review Comment:
   warning: do not declare C-style arrays, use std::array<> instead 
[modernize-avoid-c-arrays]
   ```cpp
           char buf[column_desc.children[0].len];
           ^
   ```
   



##########
be/src/olap/rowset/rowset_writer_context.h:
##########
@@ -57,12 +57,13 @@ struct RowsetWriterContext {
     RowsetId rowset_id;
     int64_t tablet_id;
     int64_t tablet_schema_hash;
-    int64_t index_id;
     int64_t partition_id;
+    int64_t index_id;

Review Comment:
   warning: use default member initializer for 'index_id' 
[modernize-use-default-member-init]
   
   be/src/olap/rowset/rowset_writer_context.h:44:
   ```diff
   -               index_id(0),
   +               ,
   ```
   
   ```suggestion
       int64_t index_id{0};
   ```
   



##########
be/src/olap/rowset/segment_v2/column_reader.cpp:
##########
@@ -1462,5 +1481,73 @@
     }
 }
 
+Status VariantRootColumnIterator::next_batch(size_t* n, 
vectorized::MutableColumnPtr& dst,

Review Comment:
   warning: pointer parameter 'n' can be pointer to const 
[readability-non-const-parameter]
   
   ```suggestion
   Status VariantRootColumnIterator::next_batch(const size_t* n, 
vectorized::MutableColumnPtr& dst,
   ```
   



##########
be/src/olap/rowset/segment_v2/column_reader.cpp:
##########
@@ -1462,5 +1481,73 @@ void 
DefaultValueColumnIterator::_insert_many_default(vectorized::MutableColumnP
     }
 }
 
+Status VariantRootColumnIterator::next_batch(size_t* n, 
vectorized::MutableColumnPtr& dst,

Review Comment:
   warning: method 'next_batch' can be made static 
[readability-convert-member-functions-to-static]
   
   ```suggestion
   static Status VariantRootColumnIterator::next_batch(size_t* n, 
vectorized::MutableColumnPtr& dst,
   ```
   



##########
be/src/olap/rowset/segment_v2/hierarchical_data_reader.h:
##########
@@ -0,0 +1,237 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+#include <unordered_map>
+
+#include "io/io_common.h"
+#include "olap/field.h"
+#include "olap/iterators.h"
+#include "olap/rowset/segment_v2/column_reader.h"
+#include "olap/schema.h"
+#include "olap/tablet_schema.h"
+#include "vec/columns/column.h"
+#include "vec/columns/column_nullable.h"
+#include "vec/columns/column_object.h"
+#include "vec/columns/subcolumn_tree.h"
+#include "vec/common/assert_cast.h"
+#include "vec/data_types/data_type_object.h"
+#include "vec/data_types/data_type_string.h"
+#include "vec/json/path_in_data.h"
+
+namespace doris {
+namespace segment_v2 {
+
+struct StreamReader {
+    vectorized::MutableColumnPtr column;
+    std::unique_ptr<ColumnIterator> iterator;
+    std::shared_ptr<const vectorized::IDataType> type;
+    bool inited = false;
+    size_t rows_read = 0;
+    StreamReader() = default;
+    StreamReader(vectorized::MutableColumnPtr&& col, 
std::unique_ptr<ColumnIterator>&& it,
+                 std::shared_ptr<const vectorized::IDataType> t)
+            : column(std::move(col)), iterator(std::move(it)), type(t) {}
+};
+
+// path -> StreamReader
+using SubstreamReaderTree = vectorized::SubcolumnsTree<StreamReader>;
+
+// path -> SubcolumnReader
+struct SubcolumnReader {
+    std::unique_ptr<ColumnReader> reader;
+    std::shared_ptr<const vectorized::IDataType> file_column_type;
+};
+using SubcolumnColumnReaders = vectorized::SubcolumnsTree<SubcolumnReader>;
+
+// Reader for hierarchical data for variant, merge with root(sparse encoded 
columns)
+class HierarchicalDataReader : public ColumnIterator {
+public:
+    HierarchicalDataReader(const vectorized::PathInData& path, bool 
output_as_raw_json = false)
+            : _path(path), _output_as_raw_json(output_as_raw_json) {}
+
+    static Status create(std::unique_ptr<ColumnIterator>* reader,
+                         const SubcolumnColumnReaders::Node* target_node,
+                         const SubcolumnColumnReaders::Node* root, bool 
output_as_raw_json = false);
+
+    Status init(const ColumnIteratorOptions& opts) override;
+
+    Status seek_to_first() override;
+
+    Status seek_to_ordinal(ordinal_t ord) override;
+
+    Status next_batch(size_t* n, vectorized::MutableColumnPtr& dst, bool* 
has_null) override;
+
+    Status read_by_rowids(const rowid_t* rowids, const size_t count,
+                          vectorized::MutableColumnPtr& dst) override;
+
+    ordinal_t get_current_ordinal() const override;
+
+    Status add_stream(const SubcolumnColumnReaders::Node* node);
+
+    void set_root(std::unique_ptr<StreamReader>&& root) { _root_reader = 
std::move(root); }
+
+private:
+    SubstreamReaderTree _substream_reader;
+    std::unique_ptr<StreamReader> _root_reader;
+    size_t _rows_read = 0;
+    vectorized::PathInData _path;
+    bool _output_as_raw_json = false;
+
+    template <typename NodeFunction>
+    Status tranverse(NodeFunction&& node_func) {
+        for (auto& entry : _substream_reader) {
+            RETURN_IF_ERROR(node_func(*entry));
+        }
+        return Status::OK();
+    }
+    // process read
+    template <typename ReadFunction>
+    Status process_read(ReadFunction&& read_func, 
vectorized::MutableColumnPtr& dst, size_t nrows) {
+        // // Read all sub columns, and merge with root column
+        vectorized::ColumnNullable* nullable_column = nullptr;
+        if (dst->is_nullable()) {
+            nullable_column = 
assert_cast<vectorized::ColumnNullable*>(dst.get());
+        }
+        auto& variant = nullable_column == nullptr ? 
assert_cast<vectorized::ColumnObject&>(*dst)
+                                                   : 
assert_cast<vectorized::ColumnObject&>(
+                                                             
nullable_column->get_nested_column());
+
+        // read data
+        // read root first if it is not read before
+        RETURN_IF_ERROR(read_func(*_root_reader, {}, _root_reader->type));
+
+        // read container columns
+        RETURN_IF_ERROR(tranverse([&](SubstreamReaderTree::Node& node) {
+            RETURN_IF_ERROR(read_func(node.data, node.path, node.data.type));
+            return Status::OK();
+        }));
+
+        // build variant as container
+        auto container = vectorized::ColumnObject::create(true, false);
+        auto& container_variant = 
assert_cast<vectorized::ColumnObject&>(*container);
+
+        // add root first
+        if (_path.get_parts().size() == 1) {
+            auto& root_var =
+                    _root_reader->column->is_nullable()
+                            ? assert_cast<vectorized::ColumnObject&>(
+                                      assert_cast<vectorized::ColumnNullable&>(
+                                              *_root_reader->column)
+                                              .get_nested_column())
+                            : 
assert_cast<vectorized::ColumnObject&>(*_root_reader->column);
+            auto column = root_var.get_root();
+            auto type = root_var.get_root_type();
+            container_variant.add_sub_column({}, std::move(column), type);
+        }
+
+        RETURN_IF_ERROR(tranverse([&](SubstreamReaderTree::Node& node) {
+            vectorized::MutableColumnPtr column = node.data.column->get_ptr();
+            bool add = container_variant.add_sub_column(node.path.pop_front(), 
std::move(column),
+                                                        node.data.type);
+            if (!add) {
+                return Status::InternalError("Duplicated {}, type {}", 
node.path.get_path(),
+                                             node.data.type->get_name());
+            }
+            return Status::OK();
+        }));
+
+        if (_output_as_raw_json) {
+            auto col_to = vectorized::ColumnString::create();
+            col_to->reserve(nrows * 2);
+            vectorized::VectorBufferWriter write_buffer(*col_to.get());
+            auto type = std::make_shared<vectorized::DataTypeObject>();
+            for (size_t i = 0; i < nrows; ++i) {
+                type->to_string(container_variant, i, write_buffer);
+                write_buffer.commit();
+            }
+            CHECK(variant.empty());
+            
variant.create_root(std::make_shared<vectorized::DataTypeString>(), 
std::move(col_to));
+        } else {
+            // TODO select v:b -> v.b / v.b.c but v.d maybe in v
+            // copy container variant to dst variant, todo avoid copy
+            variant.insert_range_from(container_variant, 0, nrows);
+        }
+
+        // variant.set_num_rows(nrows);
+        _rows_read += nrows;
+        variant.finalize();
+#ifndef NDEBUG
+        variant.check_consistency();
+#endif
+        // clear data in nodes
+        RETURN_IF_ERROR(tranverse([&](SubstreamReaderTree::Node& node) {
+            node.data.column->clear();
+            return Status::OK();
+        }));
+        container->clear();
+        if (_root_reader->column->is_nullable()) {
+            // fill nullmap
+            DCHECK(dst->is_nullable());
+            vectorized::ColumnUInt8& dst_null_map =
+                    
assert_cast<vectorized::ColumnNullable&>(*dst).get_null_map_column();
+            vectorized::ColumnUInt8& src_null_map =
+                    
assert_cast<vectorized::ColumnNullable&>(*_root_reader->column)
+                            .get_null_map_column();
+            dst_null_map.insert_range_from(src_null_map, 0, 
src_null_map.size());
+            // clear nullmap and inner data
+            src_null_map.clear();
+            assert_cast<vectorized::ColumnObject&>(
+                    
assert_cast<vectorized::ColumnNullable&>(*_root_reader->column)
+                            .get_nested_column())
+                    .clear_subcolumns_data();
+        } else {
+            vectorized::ColumnObject& root_column =
+                    
assert_cast<vectorized::ColumnObject&>(*_root_reader->column);
+            root_column.clear_subcolumns_data();
+        }
+        return Status::OK();
+    }
+};
+
+// Extract from root column of variant, since root column of variant
+// encodes sparse columns that are not materialized
+class ExtractReader : public ColumnIterator {
+public:
+    ExtractReader(const TabletColumn& col, std::unique_ptr<StreamReader>&& 
root_reader)
+            : _col(col), _root_reader(std::move(root_reader)) {}
+
+    Status init(const ColumnIteratorOptions& opts) override;
+
+    Status seek_to_first() override;
+
+    Status seek_to_ordinal(ordinal_t ord) override;
+
+    Status next_batch(size_t* n, vectorized::MutableColumnPtr& dst, bool* 
has_null) override;
+
+    Status read_by_rowids(const rowid_t* rowids, const size_t count,

Review Comment:
   warning: parameter 'count' is const-qualified in the function declaration; 
const-qualification of parameters only has an effect in function definitions 
[readability-avoid-const-params-in-decls]
   
   ```suggestion
       Status read_by_rowids(const rowid_t* rowids, size_t count,
   ```
   



##########
be/src/olap/rowset/segment_v2/hierarchical_data_reader.h:
##########
@@ -0,0 +1,237 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+#include <unordered_map>
+
+#include "io/io_common.h"
+#include "olap/field.h"
+#include "olap/iterators.h"
+#include "olap/rowset/segment_v2/column_reader.h"
+#include "olap/schema.h"
+#include "olap/tablet_schema.h"
+#include "vec/columns/column.h"
+#include "vec/columns/column_nullable.h"
+#include "vec/columns/column_object.h"
+#include "vec/columns/subcolumn_tree.h"
+#include "vec/common/assert_cast.h"
+#include "vec/data_types/data_type_object.h"
+#include "vec/data_types/data_type_string.h"
+#include "vec/json/path_in_data.h"
+
+namespace doris {
+namespace segment_v2 {
+
+struct StreamReader {
+    vectorized::MutableColumnPtr column;
+    std::unique_ptr<ColumnIterator> iterator;
+    std::shared_ptr<const vectorized::IDataType> type;
+    bool inited = false;
+    size_t rows_read = 0;
+    StreamReader() = default;
+    StreamReader(vectorized::MutableColumnPtr&& col, 
std::unique_ptr<ColumnIterator>&& it,
+                 std::shared_ptr<const vectorized::IDataType> t)
+            : column(std::move(col)), iterator(std::move(it)), type(t) {}
+};
+
+// path -> StreamReader
+using SubstreamReaderTree = vectorized::SubcolumnsTree<StreamReader>;
+
+// path -> SubcolumnReader
+struct SubcolumnReader {
+    std::unique_ptr<ColumnReader> reader;
+    std::shared_ptr<const vectorized::IDataType> file_column_type;
+};
+using SubcolumnColumnReaders = vectorized::SubcolumnsTree<SubcolumnReader>;
+
+// Reader for hierarchical data for variant, merge with root(sparse encoded 
columns)
+class HierarchicalDataReader : public ColumnIterator {
+public:
+    HierarchicalDataReader(const vectorized::PathInData& path, bool 
output_as_raw_json = false)
+            : _path(path), _output_as_raw_json(output_as_raw_json) {}
+
+    static Status create(std::unique_ptr<ColumnIterator>* reader,
+                         const SubcolumnColumnReaders::Node* target_node,
+                         const SubcolumnColumnReaders::Node* root, bool 
output_as_raw_json = false);
+
+    Status init(const ColumnIteratorOptions& opts) override;
+
+    Status seek_to_first() override;
+
+    Status seek_to_ordinal(ordinal_t ord) override;
+
+    Status next_batch(size_t* n, vectorized::MutableColumnPtr& dst, bool* 
has_null) override;
+
+    Status read_by_rowids(const rowid_t* rowids, const size_t count,

Review Comment:
   warning: parameter 'count' is const-qualified in the function declaration; 
const-qualification of parameters only has an effect in function definitions 
[readability-avoid-const-params-in-decls]
   
   ```suggestion
       Status read_by_rowids(const rowid_t* rowids, size_t count,
   ```
   



##########
be/src/olap/rowset/segment_v2/column_reader.h:
##########
@@ -595,6 +597,38 @@ class RowIdColumnIterator : public ColumnIterator {
     int32_t _segment_id = 0;
 };
 
+class VariantRootColumnIterator : public ColumnIterator {
+public:
+    VariantRootColumnIterator() = delete;
+
+    explicit VariantRootColumnIterator(FileColumnIterator* iter) { 
_inner_iter.reset(iter); }
+
+    ~VariantRootColumnIterator() override = default;
+
+    Status init(const ColumnIteratorOptions& opts) override { return 
_inner_iter->init(opts); }
+
+    Status seek_to_first() override { return _inner_iter->seek_to_first(); }
+
+    Status seek_to_ordinal(ordinal_t ord_idx) override {
+        return _inner_iter->seek_to_ordinal(ord_idx);
+    }
+
+    Status next_batch(size_t* n, vectorized::MutableColumnPtr& dst) {

Review Comment:
   warning: method 'next_batch' can be made static 
[readability-convert-member-functions-to-static]
   
   ```suggestion
       static Status next_batch(size_t* n, vectorized::MutableColumnPtr& dst) {
   ```
   



##########
be/src/olap/rowset/segment_v2/column_reader.h:
##########
@@ -595,6 +597,38 @@
     int32_t _segment_id = 0;
 };
 
+class VariantRootColumnIterator : public ColumnIterator {
+public:
+    VariantRootColumnIterator() = delete;
+
+    explicit VariantRootColumnIterator(FileColumnIterator* iter) { 
_inner_iter.reset(iter); }
+
+    ~VariantRootColumnIterator() override = default;
+
+    Status init(const ColumnIteratorOptions& opts) override { return 
_inner_iter->init(opts); }
+
+    Status seek_to_first() override { return _inner_iter->seek_to_first(); }
+
+    Status seek_to_ordinal(ordinal_t ord_idx) override {
+        return _inner_iter->seek_to_ordinal(ord_idx);
+    }
+
+    Status next_batch(size_t* n, vectorized::MutableColumnPtr& dst) {
+        bool has_null;
+        return next_batch(n, dst, &has_null);
+    }
+
+    Status next_batch(size_t* n, vectorized::MutableColumnPtr& dst, bool* 
has_null) override;
+
+    Status read_by_rowids(const rowid_t* rowids, const size_t count,

Review Comment:
   warning: parameter 'count' is const-qualified in the function declaration; 
const-qualification of parameters only has an effect in function definitions 
[readability-avoid-const-params-in-decls]
   
   ```suggestion
       Status read_by_rowids(const rowid_t* rowids, size_t count,
   ```
   



##########
be/src/olap/rowset/segment_v2/column_reader.cpp:
##########
@@ -1462,5 +1481,73 @@
     }
 }
 
+Status VariantRootColumnIterator::next_batch(size_t* n, 
vectorized::MutableColumnPtr& dst,
+                                             bool* has_null) {
+    size_t size = dst->size();
+    auto& obj =
+            dst->is_nullable()
+                    ? assert_cast<vectorized::ColumnObject&>(
+                              
assert_cast<vectorized::ColumnNullable&>(*dst).get_nested_column())
+                    : assert_cast<vectorized::ColumnObject&>(*dst);
+    if (obj.is_null_root()) {
+        obj.create_root();
+    }
+    auto root_column = obj.get_root();
+    RETURN_IF_ERROR(_inner_iter->next_batch(n, root_column, has_null));
+    obj.incr_num_rows(*n);
+    for (auto& entry : obj.get_subcolumns()) {
+        if (entry->data.size() != size + *n) {
+            entry->data.insertManyDefaults(*n);
+        }
+    }
+    // fill nullmap
+    if (root_column->is_nullable()) {
+        DCHECK(dst->is_nullable());
+        vectorized::ColumnUInt8& dst_null_map =
+                
assert_cast<vectorized::ColumnNullable&>(*dst).get_null_map_column();
+        vectorized::ColumnUInt8& src_null_map =
+                
assert_cast<vectorized::ColumnNullable&>(*root_column).get_null_map_column();
+        dst_null_map.insert_range_from(src_null_map, 0, src_null_map.size());
+    }
+#ifndef NDEBUG
+    obj.check_consistency();
+#endif
+    return Status::OK();
+}
+
+Status VariantRootColumnIterator::read_by_rowids(const rowid_t* rowids, const 
size_t count,

Review Comment:
   warning: method 'read_by_rowids' can be made static 
[readability-convert-member-functions-to-static]
   
   ```suggestion
   static Status VariantRootColumnIterator::read_by_rowids(const rowid_t* 
rowids, const size_t count,
   ```
   



##########
be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp:
##########
@@ -0,0 +1,232 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "olap/rowset/segment_v2/hierarchical_data_reader.h"
+
+#include "common/status.h"
+#include "io/io_common.h"
+#include "olap/rowset/segment_v2/column_reader.h"
+#include "vec/columns/column.h"
+#include "vec/columns/column_object.h"
+#include "vec/common/assert_cast.h"
+#include "vec/common/schema_util.h"
+#include "vec/data_types/data_type.h"
+#include "vec/data_types/data_type_nullable.h"
+#include "vec/json/path_in_data.h"
+
+namespace doris {
+namespace segment_v2 {
+
+Status HierarchicalDataReader::create(std::unique_ptr<ColumnIterator>* reader,
+                                      const SubcolumnColumnReaders::Node* node,
+                                      const SubcolumnColumnReaders::Node* root,
+                                      bool output_as_raw_json) {
+    // None leave node need merge with root
+    auto* stream_iter = new HierarchicalDataReader(node->path, 
output_as_raw_json);
+    std::vector<const SubcolumnColumnReaders::Node*> leaves;
+    vectorized::PathsInData leaves_paths;
+    SubcolumnColumnReaders::get_leaves_of_node(node, leaves, leaves_paths);
+    for (size_t i = 0; i < leaves_paths.size(); ++i) {
+        if (leaves_paths[i] == root->path) {
+            // use set_root to share instead
+            continue;
+        }
+        RETURN_IF_ERROR(stream_iter->add_stream(leaves[i]));
+    }
+    // Make sure the root node is in strem_cache, so that child can merge data 
with root
+    // Eg. {"a" : "b" : {"c" : 1}}, access the `a.b` path and merge with root 
path so that
+    // we could make sure the data could be fully merged, since some column 
may not be extracted but remains in root
+    // like {"a" : "b" : {"e" : 1.1}} in jsonb format
+    ColumnIterator* it;
+    RETURN_IF_ERROR(root->data.reader->new_iterator(&it));
+    stream_iter->set_root(std::make_unique<StreamReader>(
+            root->data.file_column_type->create_column(), 
std::unique_ptr<ColumnIterator>(it),
+            root->data.file_column_type));
+    reader->reset(stream_iter);
+    return Status::OK();
+}
+
+Status HierarchicalDataReader::init(const ColumnIteratorOptions& opts) {
+    RETURN_IF_ERROR(tranverse([&](SubstreamReaderTree::Node& node) {
+        RETURN_IF_ERROR(node.data.iterator->init(opts));
+        node.data.inited = true;
+        return Status::OK();
+    }));
+    if (_root_reader && !_root_reader->inited) {
+        RETURN_IF_ERROR(_root_reader->iterator->init(opts));
+        _root_reader->inited = true;
+    }
+    return Status::OK();
+}
+
+Status HierarchicalDataReader::seek_to_first() {
+    LOG(FATAL) << "Not implemented";
+}
+
+Status HierarchicalDataReader::seek_to_ordinal(ordinal_t ord) {
+    RETURN_IF_ERROR(tranverse([&](SubstreamReaderTree::Node& node) {
+        RETURN_IF_ERROR(node.data.iterator->seek_to_ordinal(ord));
+        return Status::OK();
+    }));
+    if (_root_reader) {
+        DCHECK(_root_reader->inited);
+        RETURN_IF_ERROR(_root_reader->iterator->seek_to_ordinal(ord));
+    }
+    return Status::OK();
+}
+
+Status HierarchicalDataReader::next_batch(size_t* n, 
vectorized::MutableColumnPtr& dst,

Review Comment:
   warning: method 'next_batch' can be made static 
[readability-convert-member-functions-to-static]
   
   ```suggestion
   static Status HierarchicalDataReader::next_batch(size_t* n, 
vectorized::MutableColumnPtr& dst,
   ```
   



##########
be/src/olap/rowset/segment_creator.cpp:
##########
@@ -39,24 +48,170 @@ SegmentFlusher::SegmentFlusher() = default;
 
 SegmentFlusher::~SegmentFlusher() = default;
 
-Status SegmentFlusher::init(const RowsetWriterContext& rowset_writer_context) {
-    _context = rowset_writer_context;
+Status SegmentFlusher::init(RowsetWriterContext& rowset_writer_context) {
+    _context = &rowset_writer_context;
     return Status::OK();
 }
 
 Status SegmentFlusher::flush_single_block(const vectorized::Block* block, 
int32_t segment_id,
-                                          int64_t* flush_size, 
TabletSchemaSPtr flush_schema) {
+                                          int64_t* flush_size) {
     if (block->rows() == 0) {
         return Status::OK();
     }
+    TabletSchemaSPtr flush_schema = nullptr;
+    // Expand variant columns
+    vectorized::Block flush_block(*block);
+    if (_context->write_type != DataWriteType::TYPE_COMPACTION &&
+        _context->tablet_schema->num_variant_columns() > 0) {
+        RETURN_IF_ERROR(_expand_variant_to_subcolumns(flush_block, 
flush_schema));
+    }
     std::unique_ptr<segment_v2::SegmentWriter> writer;
-    bool no_compression = block->bytes() <= 
config::segment_compression_threshold_kb * 1024;
+    bool no_compression = flush_block.bytes() <= 
config::segment_compression_threshold_kb * 1024;

Review Comment:
   warning: 1024 is a magic number; consider replacing it with a named constant 
[readability-magic-numbers]
   ```cpp
       bool no_compression = flush_block.bytes() <= 
config::segment_compression_threshold_kb * 1024;
                                                                                
               ^
   ```
   



##########
be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp:
##########
@@ -0,0 +1,232 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "olap/rowset/segment_v2/hierarchical_data_reader.h"
+
+#include "common/status.h"
+#include "io/io_common.h"
+#include "olap/rowset/segment_v2/column_reader.h"
+#include "vec/columns/column.h"
+#include "vec/columns/column_object.h"
+#include "vec/common/assert_cast.h"
+#include "vec/common/schema_util.h"
+#include "vec/data_types/data_type.h"
+#include "vec/data_types/data_type_nullable.h"
+#include "vec/json/path_in_data.h"
+
+namespace doris {
+namespace segment_v2 {
+
+Status HierarchicalDataReader::create(std::unique_ptr<ColumnIterator>* reader,

Review Comment:
   warning: method 'create' can be made static 
[readability-convert-member-functions-to-static]
   
   ```suggestion
   static Status 
HierarchicalDataReader::create(std::unique_ptr<ColumnIterator>* reader,
   ```
   



##########
be/src/olap/rowset/segment_creator.cpp:
##########
@@ -177,7 +338,7 @@
     return _writer->max_row_to_add(row_avg_size_in_bytes);
 }
 
-Status SegmentCreator::init(const RowsetWriterContext& rowset_writer_context) {
+Status SegmentCreator::init(RowsetWriterContext& rowset_writer_context) {

Review Comment:
   warning: method 'init' can be made static 
[readability-convert-member-functions-to-static]
   
   be/src/olap/rowset/segment_creator.h:155:
   ```diff
   -     Status init(RowsetWriterContext& rowset_writer_context);
   +     static Status init(RowsetWriterContext& rowset_writer_context);
   ```
   



##########
be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp:
##########
@@ -0,0 +1,232 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "olap/rowset/segment_v2/hierarchical_data_reader.h"
+
+#include "common/status.h"
+#include "io/io_common.h"
+#include "olap/rowset/segment_v2/column_reader.h"
+#include "vec/columns/column.h"
+#include "vec/columns/column_object.h"
+#include "vec/common/assert_cast.h"
+#include "vec/common/schema_util.h"
+#include "vec/data_types/data_type.h"
+#include "vec/data_types/data_type_nullable.h"
+#include "vec/json/path_in_data.h"
+
+namespace doris {
+namespace segment_v2 {
+
+Status HierarchicalDataReader::create(std::unique_ptr<ColumnIterator>* reader,
+                                      const SubcolumnColumnReaders::Node* node,
+                                      const SubcolumnColumnReaders::Node* root,
+                                      bool output_as_raw_json) {
+    // None leave node need merge with root
+    auto* stream_iter = new HierarchicalDataReader(node->path, 
output_as_raw_json);
+    std::vector<const SubcolumnColumnReaders::Node*> leaves;
+    vectorized::PathsInData leaves_paths;
+    SubcolumnColumnReaders::get_leaves_of_node(node, leaves, leaves_paths);
+    for (size_t i = 0; i < leaves_paths.size(); ++i) {
+        if (leaves_paths[i] == root->path) {
+            // use set_root to share instead
+            continue;
+        }
+        RETURN_IF_ERROR(stream_iter->add_stream(leaves[i]));
+    }
+    // Make sure the root node is in strem_cache, so that child can merge data 
with root
+    // Eg. {"a" : "b" : {"c" : 1}}, access the `a.b` path and merge with root 
path so that
+    // we could make sure the data could be fully merged, since some column 
may not be extracted but remains in root
+    // like {"a" : "b" : {"e" : 1.1}} in jsonb format
+    ColumnIterator* it;
+    RETURN_IF_ERROR(root->data.reader->new_iterator(&it));
+    stream_iter->set_root(std::make_unique<StreamReader>(
+            root->data.file_column_type->create_column(), 
std::unique_ptr<ColumnIterator>(it),
+            root->data.file_column_type));
+    reader->reset(stream_iter);
+    return Status::OK();
+}
+
+Status HierarchicalDataReader::init(const ColumnIteratorOptions& opts) {
+    RETURN_IF_ERROR(tranverse([&](SubstreamReaderTree::Node& node) {
+        RETURN_IF_ERROR(node.data.iterator->init(opts));
+        node.data.inited = true;
+        return Status::OK();
+    }));
+    if (_root_reader && !_root_reader->inited) {
+        RETURN_IF_ERROR(_root_reader->iterator->init(opts));
+        _root_reader->inited = true;
+    }
+    return Status::OK();
+}
+
+Status HierarchicalDataReader::seek_to_first() {
+    LOG(FATAL) << "Not implemented";
+}
+
+Status HierarchicalDataReader::seek_to_ordinal(ordinal_t ord) {
+    RETURN_IF_ERROR(tranverse([&](SubstreamReaderTree::Node& node) {
+        RETURN_IF_ERROR(node.data.iterator->seek_to_ordinal(ord));
+        return Status::OK();
+    }));
+    if (_root_reader) {
+        DCHECK(_root_reader->inited);
+        RETURN_IF_ERROR(_root_reader->iterator->seek_to_ordinal(ord));
+    }
+    return Status::OK();
+}
+
+Status HierarchicalDataReader::next_batch(size_t* n, 
vectorized::MutableColumnPtr& dst,
+                                          bool* has_null) {
+    return process_read(
+            [&](StreamReader& reader, const vectorized::PathInData& path,
+                const vectorized::DataTypePtr& type) {
+                CHECK(reader.inited);
+                RETURN_IF_ERROR(reader.iterator->next_batch(n, reader.column, 
has_null));
+                VLOG_DEBUG << fmt::format("{} next_batch {} rows, type={}", 
path.get_path(), *n,
+                                          type->get_name());
+                reader.rows_read += *n;
+                return Status::OK();
+            },
+            dst, *n);
+}
+
+Status HierarchicalDataReader::read_by_rowids(const rowid_t* rowids, const 
size_t count,
+                                              vectorized::MutableColumnPtr& 
dst) {
+    return process_read(
+            [&](StreamReader& reader, const vectorized::PathInData& path,
+                const vectorized::DataTypePtr& type) {
+                CHECK(reader.inited);
+                RETURN_IF_ERROR(reader.iterator->read_by_rowids(rowids, count, 
reader.column));
+                VLOG_DEBUG << fmt::format("{} read_by_rowids {} rows, 
type={}", path.get_path(),
+                                          count, type->get_name());
+                reader.rows_read += count;
+                return Status::OK();
+            },
+            dst, count);
+}
+
+Status HierarchicalDataReader::add_stream(const SubcolumnColumnReaders::Node* 
node) {

Review Comment:
   warning: method 'add_stream' can be made static 
[readability-convert-member-functions-to-static]
   
   ```suggestion
   static Status HierarchicalDataReader::add_stream(const 
SubcolumnColumnReaders::Node* node) {
   ```
   



##########
be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp:
##########
@@ -0,0 +1,232 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "olap/rowset/segment_v2/hierarchical_data_reader.h"
+
+#include "common/status.h"
+#include "io/io_common.h"
+#include "olap/rowset/segment_v2/column_reader.h"
+#include "vec/columns/column.h"
+#include "vec/columns/column_object.h"
+#include "vec/common/assert_cast.h"
+#include "vec/common/schema_util.h"
+#include "vec/data_types/data_type.h"
+#include "vec/data_types/data_type_nullable.h"
+#include "vec/json/path_in_data.h"
+
+namespace doris {
+namespace segment_v2 {

Review Comment:
   warning: nested namespaces can be concatenated 
[modernize-concat-nested-namespaces]
   
   ```suggestion
   namespace doris::segment_v2 {
   ```
   
   be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp:230:
   ```diff
   - } // namespace segment_v2
   - } // namespace doris
   + } // namespace doris
   ```
   



##########
be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp:
##########
@@ -0,0 +1,232 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "olap/rowset/segment_v2/hierarchical_data_reader.h"
+
+#include "common/status.h"
+#include "io/io_common.h"
+#include "olap/rowset/segment_v2/column_reader.h"
+#include "vec/columns/column.h"
+#include "vec/columns/column_object.h"
+#include "vec/common/assert_cast.h"
+#include "vec/common/schema_util.h"
+#include "vec/data_types/data_type.h"
+#include "vec/data_types/data_type_nullable.h"
+#include "vec/json/path_in_data.h"
+
+namespace doris {
+namespace segment_v2 {
+
+Status HierarchicalDataReader::create(std::unique_ptr<ColumnIterator>* reader,
+                                      const SubcolumnColumnReaders::Node* node,
+                                      const SubcolumnColumnReaders::Node* root,
+                                      bool output_as_raw_json) {
+    // None leave node need merge with root
+    auto* stream_iter = new HierarchicalDataReader(node->path, 
output_as_raw_json);
+    std::vector<const SubcolumnColumnReaders::Node*> leaves;
+    vectorized::PathsInData leaves_paths;
+    SubcolumnColumnReaders::get_leaves_of_node(node, leaves, leaves_paths);
+    for (size_t i = 0; i < leaves_paths.size(); ++i) {
+        if (leaves_paths[i] == root->path) {
+            // use set_root to share instead
+            continue;
+        }
+        RETURN_IF_ERROR(stream_iter->add_stream(leaves[i]));
+    }
+    // Make sure the root node is in strem_cache, so that child can merge data 
with root
+    // Eg. {"a" : "b" : {"c" : 1}}, access the `a.b` path and merge with root 
path so that
+    // we could make sure the data could be fully merged, since some column 
may not be extracted but remains in root
+    // like {"a" : "b" : {"e" : 1.1}} in jsonb format
+    ColumnIterator* it;
+    RETURN_IF_ERROR(root->data.reader->new_iterator(&it));
+    stream_iter->set_root(std::make_unique<StreamReader>(
+            root->data.file_column_type->create_column(), 
std::unique_ptr<ColumnIterator>(it),
+            root->data.file_column_type));
+    reader->reset(stream_iter);
+    return Status::OK();
+}
+
+Status HierarchicalDataReader::init(const ColumnIteratorOptions& opts) {
+    RETURN_IF_ERROR(tranverse([&](SubstreamReaderTree::Node& node) {
+        RETURN_IF_ERROR(node.data.iterator->init(opts));
+        node.data.inited = true;
+        return Status::OK();
+    }));
+    if (_root_reader && !_root_reader->inited) {
+        RETURN_IF_ERROR(_root_reader->iterator->init(opts));
+        _root_reader->inited = true;
+    }
+    return Status::OK();
+}
+
+Status HierarchicalDataReader::seek_to_first() {
+    LOG(FATAL) << "Not implemented";
+}
+
+Status HierarchicalDataReader::seek_to_ordinal(ordinal_t ord) {
+    RETURN_IF_ERROR(tranverse([&](SubstreamReaderTree::Node& node) {
+        RETURN_IF_ERROR(node.data.iterator->seek_to_ordinal(ord));
+        return Status::OK();
+    }));
+    if (_root_reader) {
+        DCHECK(_root_reader->inited);
+        RETURN_IF_ERROR(_root_reader->iterator->seek_to_ordinal(ord));
+    }
+    return Status::OK();
+}
+
+Status HierarchicalDataReader::next_batch(size_t* n, 
vectorized::MutableColumnPtr& dst,
+                                          bool* has_null) {
+    return process_read(
+            [&](StreamReader& reader, const vectorized::PathInData& path,
+                const vectorized::DataTypePtr& type) {
+                CHECK(reader.inited);
+                RETURN_IF_ERROR(reader.iterator->next_batch(n, reader.column, 
has_null));
+                VLOG_DEBUG << fmt::format("{} next_batch {} rows, type={}", 
path.get_path(), *n,
+                                          type->get_name());
+                reader.rows_read += *n;
+                return Status::OK();
+            },
+            dst, *n);
+}
+
+Status HierarchicalDataReader::read_by_rowids(const rowid_t* rowids, const 
size_t count,

Review Comment:
   warning: method 'read_by_rowids' can be made static 
[readability-convert-member-functions-to-static]
   
   ```suggestion
   static Status HierarchicalDataReader::read_by_rowids(const rowid_t* rowids, 
const size_t count,
   ```
   



##########
be/src/olap/rowset/segment_v2/hierarchical_data_reader.h:
##########
@@ -0,0 +1,237 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+#include <unordered_map>
+
+#include "io/io_common.h"
+#include "olap/field.h"
+#include "olap/iterators.h"
+#include "olap/rowset/segment_v2/column_reader.h"
+#include "olap/schema.h"
+#include "olap/tablet_schema.h"
+#include "vec/columns/column.h"
+#include "vec/columns/column_nullable.h"
+#include "vec/columns/column_object.h"
+#include "vec/columns/subcolumn_tree.h"
+#include "vec/common/assert_cast.h"
+#include "vec/data_types/data_type_object.h"
+#include "vec/data_types/data_type_string.h"
+#include "vec/json/path_in_data.h"
+
+namespace doris {
+namespace segment_v2 {

Review Comment:
   warning: nested namespaces can be concatenated 
[modernize-concat-nested-namespaces]
   
   ```suggestion
   namespace doris::segment_v2 {
   ```
   
   be/src/olap/rowset/segment_v2/hierarchical_data_reader.h:235:
   ```diff
   - } // namespace segment_v2
   - } // namespace doris
   + } // namespace doris
   ```
   



##########
be/src/olap/reader.cpp:
##########
@@ -259,6 +260,16 @@ Status TabletReader::_capture_rs_readers(const 
ReaderParams& read_params) {
     return Status::OK();
 }
 
+TabletColumn TabletReader::materialize_column(const TabletColumn& orig) {

Review Comment:
   warning: method 'materialize_column' can be made static 
[readability-convert-member-functions-to-static]
   
   be/src/olap/reader.h:263:
   ```diff
   -     TabletColumn materialize_column(const TabletColumn& orig);
   +     static TabletColumn materialize_column(const TabletColumn& orig);
   ```
   



##########
be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp:
##########
@@ -0,0 +1,232 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "olap/rowset/segment_v2/hierarchical_data_reader.h"
+
+#include "common/status.h"
+#include "io/io_common.h"
+#include "olap/rowset/segment_v2/column_reader.h"
+#include "vec/columns/column.h"
+#include "vec/columns/column_object.h"
+#include "vec/common/assert_cast.h"
+#include "vec/common/schema_util.h"
+#include "vec/data_types/data_type.h"
+#include "vec/data_types/data_type_nullable.h"
+#include "vec/json/path_in_data.h"
+
+namespace doris {
+namespace segment_v2 {
+
+Status HierarchicalDataReader::create(std::unique_ptr<ColumnIterator>* reader,
+                                      const SubcolumnColumnReaders::Node* node,
+                                      const SubcolumnColumnReaders::Node* root,
+                                      bool output_as_raw_json) {
+    // None leave node need merge with root
+    auto* stream_iter = new HierarchicalDataReader(node->path, 
output_as_raw_json);
+    std::vector<const SubcolumnColumnReaders::Node*> leaves;
+    vectorized::PathsInData leaves_paths;
+    SubcolumnColumnReaders::get_leaves_of_node(node, leaves, leaves_paths);
+    for (size_t i = 0; i < leaves_paths.size(); ++i) {
+        if (leaves_paths[i] == root->path) {
+            // use set_root to share instead
+            continue;
+        }
+        RETURN_IF_ERROR(stream_iter->add_stream(leaves[i]));
+    }
+    // Make sure the root node is in strem_cache, so that child can merge data 
with root
+    // Eg. {"a" : "b" : {"c" : 1}}, access the `a.b` path and merge with root 
path so that
+    // we could make sure the data could be fully merged, since some column 
may not be extracted but remains in root
+    // like {"a" : "b" : {"e" : 1.1}} in jsonb format
+    ColumnIterator* it;
+    RETURN_IF_ERROR(root->data.reader->new_iterator(&it));
+    stream_iter->set_root(std::make_unique<StreamReader>(
+            root->data.file_column_type->create_column(), 
std::unique_ptr<ColumnIterator>(it),
+            root->data.file_column_type));
+    reader->reset(stream_iter);
+    return Status::OK();
+}
+
+Status HierarchicalDataReader::init(const ColumnIteratorOptions& opts) {
+    RETURN_IF_ERROR(tranverse([&](SubstreamReaderTree::Node& node) {
+        RETURN_IF_ERROR(node.data.iterator->init(opts));
+        node.data.inited = true;
+        return Status::OK();
+    }));
+    if (_root_reader && !_root_reader->inited) {
+        RETURN_IF_ERROR(_root_reader->iterator->init(opts));
+        _root_reader->inited = true;
+    }
+    return Status::OK();
+}
+
+Status HierarchicalDataReader::seek_to_first() {
+    LOG(FATAL) << "Not implemented";
+}
+
+Status HierarchicalDataReader::seek_to_ordinal(ordinal_t ord) {
+    RETURN_IF_ERROR(tranverse([&](SubstreamReaderTree::Node& node) {
+        RETURN_IF_ERROR(node.data.iterator->seek_to_ordinal(ord));
+        return Status::OK();
+    }));
+    if (_root_reader) {
+        DCHECK(_root_reader->inited);
+        RETURN_IF_ERROR(_root_reader->iterator->seek_to_ordinal(ord));
+    }
+    return Status::OK();
+}
+
+Status HierarchicalDataReader::next_batch(size_t* n, 
vectorized::MutableColumnPtr& dst,
+                                          bool* has_null) {
+    return process_read(
+            [&](StreamReader& reader, const vectorized::PathInData& path,
+                const vectorized::DataTypePtr& type) {
+                CHECK(reader.inited);
+                RETURN_IF_ERROR(reader.iterator->next_batch(n, reader.column, 
has_null));
+                VLOG_DEBUG << fmt::format("{} next_batch {} rows, type={}", 
path.get_path(), *n,
+                                          type->get_name());
+                reader.rows_read += *n;
+                return Status::OK();
+            },
+            dst, *n);
+}
+
+Status HierarchicalDataReader::read_by_rowids(const rowid_t* rowids, const 
size_t count,
+                                              vectorized::MutableColumnPtr& 
dst) {
+    return process_read(
+            [&](StreamReader& reader, const vectorized::PathInData& path,
+                const vectorized::DataTypePtr& type) {
+                CHECK(reader.inited);
+                RETURN_IF_ERROR(reader.iterator->read_by_rowids(rowids, count, 
reader.column));
+                VLOG_DEBUG << fmt::format("{} read_by_rowids {} rows, 
type={}", path.get_path(),
+                                          count, type->get_name());
+                reader.rows_read += count;
+                return Status::OK();
+            },
+            dst, count);
+}
+
+Status HierarchicalDataReader::add_stream(const SubcolumnColumnReaders::Node* 
node) {
+    if (_substream_reader.find_leaf(node->path)) {
+        VLOG_DEBUG << "Already exist sub column " << node->path.get_path();
+        return Status::OK();
+    }
+    CHECK(node);
+    ColumnIterator* it;
+    RETURN_IF_ERROR(node->data.reader->new_iterator(&it));
+    std::unique_ptr<ColumnIterator> it_ptr;
+    it_ptr.reset(it);
+    StreamReader reader(node->data.file_column_type->create_column(), 
std::move(it_ptr),
+                        node->data.file_column_type);
+    bool added = _substream_reader.add(node->path, std::move(reader));
+    if (!added) {
+        return Status::InternalError("Failed to add node path {}", 
node->path.get_path());
+    }
+    VLOG_DEBUG << fmt::format("Add substream {} for {}", 
node->path.get_path(), _path.get_path());
+    return Status::OK();
+}
+
+ordinal_t HierarchicalDataReader::get_current_ordinal() const {
+    return (*_substream_reader.begin())->data.iterator->get_current_ordinal();
+}
+
+Status ExtractReader::init(const ColumnIteratorOptions& opts) {
+    if (!_root_reader->inited) {
+        RETURN_IF_ERROR(_root_reader->iterator->init(opts));
+        _root_reader->inited = true;
+    }
+    return Status::OK();
+}
+
+Status ExtractReader::seek_to_first() {
+    LOG(FATAL) << "Not implemented";
+}
+
+Status ExtractReader::seek_to_ordinal(ordinal_t ord) {
+    CHECK(_root_reader->inited);
+    return _root_reader->iterator->seek_to_ordinal(ord);
+}
+
+Status ExtractReader::extract_to(vectorized::MutableColumnPtr& dst, size_t 
nrows) {
+    DCHECK(_root_reader);
+    DCHECK(_root_reader->inited);
+    vectorized::ColumnNullable* nullable_column = nullptr;
+    if (dst->is_nullable()) {
+        nullable_column = assert_cast<vectorized::ColumnNullable*>(dst.get());
+    }
+    auto& variant =
+            nullable_column == nullptr
+                    ? assert_cast<vectorized::ColumnObject&>(*dst)
+                    : 
assert_cast<vectorized::ColumnObject&>(nullable_column->get_nested_column());
+    const auto& root =
+            _root_reader->column->is_nullable()
+                    ? assert_cast<vectorized::ColumnObject&>(
+                              
assert_cast<vectorized::ColumnNullable&>(*_root_reader->column)
+                                      .get_nested_column())
+                    : assert_cast<const 
vectorized::ColumnObject&>(*_root_reader->column);
+    // extract root value with path, we can't modify the original root column
+    // since some other column may depend on it.
+    vectorized::MutableColumnPtr extracted_column;
+    RETURN_IF_ERROR(root.extract_root( // trim the root name, eg. v.a.b -> a.b
+            _col.path_info().pop_front(), extracted_column));
+    if (variant.empty() || variant.is_null_root()) {
+        variant.create_root(root.get_root_type(), std::move(extracted_column));
+    } else {
+        vectorized::ColumnPtr cast_column;
+        const auto& expected_type = variant.get_root_type();
+        RETURN_IF_ERROR(vectorized::schema_util::cast_column(
+                {extracted_column->get_ptr(),
+                 vectorized::make_nullable(
+                         
std::make_shared<vectorized::ColumnObject::MostCommonType>()),
+                 ""},
+                expected_type, &cast_column));
+        variant.get_root()->insert_range_from(*cast_column, 0, nrows);
+        variant.set_num_rows(variant.get_root()->size());
+    }
+    if (dst->is_nullable()) {
+        // fill nullmap
+        vectorized::ColumnUInt8& dst_null_map =
+                
assert_cast<vectorized::ColumnNullable&>(*dst).get_null_map_column();
+        vectorized::ColumnUInt8& src_null_map =
+                
assert_cast<vectorized::ColumnNullable&>(*variant.get_root()).get_null_map_column();
+        dst_null_map.insert_range_from(src_null_map, 0, src_null_map.size());
+    }
+    _root_reader->column->clear();
+#ifndef NDEBUG
+    variant.check_consistency();
+#endif
+    return Status::OK();
+}
+
+Status ExtractReader::next_batch(size_t* n, vectorized::MutableColumnPtr& dst, 
bool* has_null) {
+    RETURN_IF_ERROR(_root_reader->iterator->next_batch(n, 
_root_reader->column));
+    RETURN_IF_ERROR(extract_to(dst, *n));
+    return Status::OK();
+}
+
+Status ExtractReader::read_by_rowids(const rowid_t* rowids, const size_t count,

Review Comment:
   warning: method 'read_by_rowids' can be made static 
[readability-convert-member-functions-to-static]
   
   ```suggestion
   static Status ExtractReader::read_by_rowids(const rowid_t* rowids, const 
size_t count,
   ```
   



##########
be/src/olap/rowset/segment_v2/segment.cpp:
##########
@@ -318,14 +330,54 @@ Status Segment::_load_index_impl() {
     });
 }
 
+static vectorized::DataTypePtr get_data_type_from_column_meta(
+        const segment_v2::ColumnMetaPB& column) {
+    return vectorized::DataTypeFactory::instance().create_data_type(column);
+}
+
+vectorized::DataTypePtr Segment::get_data_type_of(const Field& field, bool 
ignore_children) const {

Review Comment:
   warning: method 'get_data_type_of' can be made static 
[readability-convert-member-functions-to-static]
   
   ```suggestion
   vectorized::DataTypePtr Segment::get_data_type_of(const Field& field, bool 
ignore_children) {
   ```
   
   be/src/olap/rowset/segment_v2/segment.h:146:
   ```diff
   -     std::shared_ptr<const vectorized::IDataType> get_data_type_of(const 
Field& filed,
   -                                                                   bool 
ignore_children) const;
   +     static std::shared_ptr<const vectorized::IDataType> 
get_data_type_of(const Field& filed,
   +                                                                   bool 
ignore_children) ;
   ```
   



##########
be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp:
##########
@@ -0,0 +1,232 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "olap/rowset/segment_v2/hierarchical_data_reader.h"
+
+#include "common/status.h"
+#include "io/io_common.h"
+#include "olap/rowset/segment_v2/column_reader.h"
+#include "vec/columns/column.h"
+#include "vec/columns/column_object.h"
+#include "vec/common/assert_cast.h"
+#include "vec/common/schema_util.h"
+#include "vec/data_types/data_type.h"
+#include "vec/data_types/data_type_nullable.h"
+#include "vec/json/path_in_data.h"
+
+namespace doris {
+namespace segment_v2 {
+
+Status HierarchicalDataReader::create(std::unique_ptr<ColumnIterator>* reader,
+                                      const SubcolumnColumnReaders::Node* node,
+                                      const SubcolumnColumnReaders::Node* root,
+                                      bool output_as_raw_json) {
+    // None leave node need merge with root
+    auto* stream_iter = new HierarchicalDataReader(node->path, 
output_as_raw_json);
+    std::vector<const SubcolumnColumnReaders::Node*> leaves;
+    vectorized::PathsInData leaves_paths;
+    SubcolumnColumnReaders::get_leaves_of_node(node, leaves, leaves_paths);
+    for (size_t i = 0; i < leaves_paths.size(); ++i) {
+        if (leaves_paths[i] == root->path) {
+            // use set_root to share instead
+            continue;
+        }
+        RETURN_IF_ERROR(stream_iter->add_stream(leaves[i]));
+    }
+    // Make sure the root node is in strem_cache, so that child can merge data 
with root
+    // Eg. {"a" : "b" : {"c" : 1}}, access the `a.b` path and merge with root 
path so that
+    // we could make sure the data could be fully merged, since some column 
may not be extracted but remains in root
+    // like {"a" : "b" : {"e" : 1.1}} in jsonb format
+    ColumnIterator* it;
+    RETURN_IF_ERROR(root->data.reader->new_iterator(&it));
+    stream_iter->set_root(std::make_unique<StreamReader>(
+            root->data.file_column_type->create_column(), 
std::unique_ptr<ColumnIterator>(it),
+            root->data.file_column_type));
+    reader->reset(stream_iter);
+    return Status::OK();
+}
+
+Status HierarchicalDataReader::init(const ColumnIteratorOptions& opts) {
+    RETURN_IF_ERROR(tranverse([&](SubstreamReaderTree::Node& node) {
+        RETURN_IF_ERROR(node.data.iterator->init(opts));
+        node.data.inited = true;
+        return Status::OK();
+    }));
+    if (_root_reader && !_root_reader->inited) {
+        RETURN_IF_ERROR(_root_reader->iterator->init(opts));
+        _root_reader->inited = true;
+    }
+    return Status::OK();
+}
+
+Status HierarchicalDataReader::seek_to_first() {
+    LOG(FATAL) << "Not implemented";
+}
+
+Status HierarchicalDataReader::seek_to_ordinal(ordinal_t ord) {
+    RETURN_IF_ERROR(tranverse([&](SubstreamReaderTree::Node& node) {
+        RETURN_IF_ERROR(node.data.iterator->seek_to_ordinal(ord));
+        return Status::OK();
+    }));
+    if (_root_reader) {
+        DCHECK(_root_reader->inited);
+        RETURN_IF_ERROR(_root_reader->iterator->seek_to_ordinal(ord));
+    }
+    return Status::OK();
+}
+
+Status HierarchicalDataReader::next_batch(size_t* n, 
vectorized::MutableColumnPtr& dst,
+                                          bool* has_null) {
+    return process_read(
+            [&](StreamReader& reader, const vectorized::PathInData& path,
+                const vectorized::DataTypePtr& type) {
+                CHECK(reader.inited);
+                RETURN_IF_ERROR(reader.iterator->next_batch(n, reader.column, 
has_null));
+                VLOG_DEBUG << fmt::format("{} next_batch {} rows, type={}", 
path.get_path(), *n,
+                                          type->get_name());
+                reader.rows_read += *n;
+                return Status::OK();
+            },
+            dst, *n);
+}
+
+Status HierarchicalDataReader::read_by_rowids(const rowid_t* rowids, const 
size_t count,
+                                              vectorized::MutableColumnPtr& 
dst) {
+    return process_read(
+            [&](StreamReader& reader, const vectorized::PathInData& path,
+                const vectorized::DataTypePtr& type) {
+                CHECK(reader.inited);
+                RETURN_IF_ERROR(reader.iterator->read_by_rowids(rowids, count, 
reader.column));
+                VLOG_DEBUG << fmt::format("{} read_by_rowids {} rows, 
type={}", path.get_path(),
+                                          count, type->get_name());
+                reader.rows_read += count;
+                return Status::OK();
+            },
+            dst, count);
+}
+
+Status HierarchicalDataReader::add_stream(const SubcolumnColumnReaders::Node* 
node) {
+    if (_substream_reader.find_leaf(node->path)) {
+        VLOG_DEBUG << "Already exist sub column " << node->path.get_path();
+        return Status::OK();
+    }
+    CHECK(node);
+    ColumnIterator* it;
+    RETURN_IF_ERROR(node->data.reader->new_iterator(&it));
+    std::unique_ptr<ColumnIterator> it_ptr;
+    it_ptr.reset(it);
+    StreamReader reader(node->data.file_column_type->create_column(), 
std::move(it_ptr),
+                        node->data.file_column_type);
+    bool added = _substream_reader.add(node->path, std::move(reader));
+    if (!added) {
+        return Status::InternalError("Failed to add node path {}", 
node->path.get_path());
+    }
+    VLOG_DEBUG << fmt::format("Add substream {} for {}", 
node->path.get_path(), _path.get_path());
+    return Status::OK();
+}
+
+ordinal_t HierarchicalDataReader::get_current_ordinal() const {
+    return (*_substream_reader.begin())->data.iterator->get_current_ordinal();
+}
+
+Status ExtractReader::init(const ColumnIteratorOptions& opts) {
+    if (!_root_reader->inited) {
+        RETURN_IF_ERROR(_root_reader->iterator->init(opts));
+        _root_reader->inited = true;
+    }
+    return Status::OK();
+}
+
+Status ExtractReader::seek_to_first() {
+    LOG(FATAL) << "Not implemented";
+}
+
+Status ExtractReader::seek_to_ordinal(ordinal_t ord) {
+    CHECK(_root_reader->inited);
+    return _root_reader->iterator->seek_to_ordinal(ord);
+}
+
+Status ExtractReader::extract_to(vectorized::MutableColumnPtr& dst, size_t 
nrows) {
+    DCHECK(_root_reader);
+    DCHECK(_root_reader->inited);
+    vectorized::ColumnNullable* nullable_column = nullptr;
+    if (dst->is_nullable()) {
+        nullable_column = assert_cast<vectorized::ColumnNullable*>(dst.get());
+    }
+    auto& variant =
+            nullable_column == nullptr
+                    ? assert_cast<vectorized::ColumnObject&>(*dst)
+                    : 
assert_cast<vectorized::ColumnObject&>(nullable_column->get_nested_column());
+    const auto& root =
+            _root_reader->column->is_nullable()
+                    ? assert_cast<vectorized::ColumnObject&>(
+                              
assert_cast<vectorized::ColumnNullable&>(*_root_reader->column)
+                                      .get_nested_column())
+                    : assert_cast<const 
vectorized::ColumnObject&>(*_root_reader->column);
+    // extract root value with path, we can't modify the original root column
+    // since some other column may depend on it.
+    vectorized::MutableColumnPtr extracted_column;
+    RETURN_IF_ERROR(root.extract_root( // trim the root name, eg. v.a.b -> a.b
+            _col.path_info().pop_front(), extracted_column));
+    if (variant.empty() || variant.is_null_root()) {
+        variant.create_root(root.get_root_type(), std::move(extracted_column));
+    } else {
+        vectorized::ColumnPtr cast_column;
+        const auto& expected_type = variant.get_root_type();
+        RETURN_IF_ERROR(vectorized::schema_util::cast_column(
+                {extracted_column->get_ptr(),
+                 vectorized::make_nullable(
+                         
std::make_shared<vectorized::ColumnObject::MostCommonType>()),
+                 ""},
+                expected_type, &cast_column));
+        variant.get_root()->insert_range_from(*cast_column, 0, nrows);
+        variant.set_num_rows(variant.get_root()->size());
+    }
+    if (dst->is_nullable()) {
+        // fill nullmap
+        vectorized::ColumnUInt8& dst_null_map =
+                
assert_cast<vectorized::ColumnNullable&>(*dst).get_null_map_column();
+        vectorized::ColumnUInt8& src_null_map =
+                
assert_cast<vectorized::ColumnNullable&>(*variant.get_root()).get_null_map_column();
+        dst_null_map.insert_range_from(src_null_map, 0, src_null_map.size());
+    }
+    _root_reader->column->clear();
+#ifndef NDEBUG
+    variant.check_consistency();
+#endif
+    return Status::OK();
+}
+
+Status ExtractReader::next_batch(size_t* n, vectorized::MutableColumnPtr& dst, 
bool* has_null) {

Review Comment:
   warning: method 'next_batch' can be made static 
[readability-convert-member-functions-to-static]
   
   ```suggestion
   static Status ExtractReader::next_batch(size_t* n, 
vectorized::MutableColumnPtr& dst, bool* has_null) {
   ```
   



##########
be/src/olap/rowset/segment_v2/segment.cpp:
##########
@@ -341,6 +393,114 @@
                                              _file_reader, &reader));
         _column_readers.emplace(column.unique_id(), std::move(reader));
     }
+
+    // init by column path
+    for (uint32_t ordinal = 0; ordinal < _tablet_schema->num_columns(); 
++ordinal) {
+        auto& column = _tablet_schema->column(ordinal);
+        auto iter = column_path_to_footer_ordinal.find(column.path_info());
+        if (iter == column_path_to_footer_ordinal.end()) {
+            continue;
+        }
+        ColumnReaderOptions opts;
+        opts.kept_in_memory = _tablet_schema->is_in_memory();
+        std::unique_ptr<ColumnReader> reader;
+        RETURN_IF_ERROR(ColumnReader::create(opts, 
footer.columns(iter->second), footer.num_rows(),
+                                             _file_reader, &reader));
+        _sub_column_tree.add(
+                iter->first,
+                SubcolumnReader {std::move(reader),
+                                 
get_data_type_from_column_meta(footer.columns(iter->second))});
+    }
+    return Status::OK();
+}
+
+static Status new_default_iterator(const TabletColumn& tablet_column,
+                                   std::unique_ptr<ColumnIterator>* iter) {
+    if (!tablet_column.has_default_value() && !tablet_column.is_nullable()) {
+        return Status::InternalError("invalid nonexistent column without 
default value.");
+    }
+    auto type_info = get_type_info(&tablet_column);
+    std::unique_ptr<DefaultValueColumnIterator> default_value_iter(new 
DefaultValueColumnIterator(
+            tablet_column.has_default_value(), tablet_column.default_value(),
+            tablet_column.is_nullable(), std::move(type_info), 
tablet_column.precision(),
+            tablet_column.frac()));
+    ColumnIteratorOptions iter_opts;
+
+    RETURN_IF_ERROR(default_value_iter->init(iter_opts));
+    *iter = std::move(default_value_iter);
+    return Status::OK();
+}
+
+Status Segment::new_iterator_with_path(const TabletColumn& tablet_column,

Review Comment:
   warning: method 'new_iterator_with_path' can be made static 
[readability-convert-member-functions-to-static]
   
   ```suggestion
   static Status Segment::new_iterator_with_path(const TabletColumn& 
tablet_column,
   ```
   



##########
be/src/olap/rowset/segment_v2/segment.cpp:
##########
@@ -483,5 +659,25 @@
     return Status::OK();
 }
 
+bool Segment::same_with_storage_type(int32_t cid, const Schema& schema,

Review Comment:
   warning: method 'same_with_storage_type' can be made static 
[readability-convert-member-functions-to-static]
   
   be/src/olap/rowset/segment_v2/segment.cpp:662:
   ```diff
   -                                      bool ignore_children) const {
   +                                      bool ignore_children) {
   ```
   
   be/src/olap/rowset/segment_v2/segment.h:150:
   ```diff
   -     bool same_with_storage_type(int32_t cid, const Schema& schema, bool 
ignore_children) const;
   +     static bool same_with_storage_type(int32_t cid, const Schema& schema, 
bool ignore_children) ;
   ```
   



##########
be/src/olap/rowset/segment_v2/segment.cpp:
##########
@@ -352,36 +512,53 @@
 // but in the old schema column b's cid == 2
 // but they are not the same column
 Status Segment::new_column_iterator(const TabletColumn& tablet_column,
-                                    std::unique_ptr<ColumnIterator>* iter) {
+                                    std::unique_ptr<ColumnIterator>* iter,
+                                    StorageReadOptions* opt) {
+    // init column iterator by path info
+    if (!tablet_column.path_info().empty() || tablet_column.is_variant_type()) 
{
+        return new_iterator_with_path(tablet_column, iter, opt);
+    }
+    // init default iterator
     if (_column_readers.count(tablet_column.unique_id()) < 1) {
-        if (!tablet_column.has_default_value() && 
!tablet_column.is_nullable()) {
-            return Status::InternalError("invalid nonexistent column without 
default value.");
-        }
-        auto type_info = get_type_info(&tablet_column);
-        std::unique_ptr<DefaultValueColumnIterator> default_value_iter(
-                new 
DefaultValueColumnIterator(tablet_column.has_default_value(),
-                                               tablet_column.default_value(),
-                                               tablet_column.is_nullable(), 
std::move(type_info),
-                                               tablet_column.precision(), 
tablet_column.frac()));
-        ColumnIteratorOptions iter_opts;
-
-        RETURN_IF_ERROR(default_value_iter->init(iter_opts));
-        *iter = std::move(default_value_iter);
+        RETURN_IF_ERROR(new_default_iterator(tablet_column, iter));
         return Status::OK();
     }
+    // init iterator by unique id
     ColumnIterator* it;
     
RETURN_IF_ERROR(_column_readers.at(tablet_column.unique_id())->new_iterator(&it));
     iter->reset(it);
     return Status::OK();
 }
 
+Status Segment::new_column_iterator(int32_t unique_id, 
std::unique_ptr<ColumnIterator>* iter) {
+    ColumnIterator* it;
+    RETURN_IF_ERROR(_column_readers.at(unique_id)->new_iterator(&it));
+    iter->reset(it);
+    return Status::OK();
+}
+
+ColumnReader* Segment::_get_column_reader(const TabletColumn& col) {

Review Comment:
   warning: method '_get_column_reader' can be made static 
[readability-convert-member-functions-to-static]
   
   be/src/olap/rowset/segment_v2/segment.h:180:
   ```diff
   -     ColumnReader* _get_column_reader(const TabletColumn& col);
   +     static ColumnReader* _get_column_reader(const TabletColumn& col);
   ```
   



##########
be/src/olap/rowset/segment_v2/segment.cpp:
##########
@@ -352,36 +512,53 @@
 // but in the old schema column b's cid == 2
 // but they are not the same column
 Status Segment::new_column_iterator(const TabletColumn& tablet_column,
-                                    std::unique_ptr<ColumnIterator>* iter) {
+                                    std::unique_ptr<ColumnIterator>* iter,
+                                    StorageReadOptions* opt) {
+    // init column iterator by path info
+    if (!tablet_column.path_info().empty() || tablet_column.is_variant_type()) 
{
+        return new_iterator_with_path(tablet_column, iter, opt);
+    }
+    // init default iterator
     if (_column_readers.count(tablet_column.unique_id()) < 1) {
-        if (!tablet_column.has_default_value() && 
!tablet_column.is_nullable()) {
-            return Status::InternalError("invalid nonexistent column without 
default value.");
-        }
-        auto type_info = get_type_info(&tablet_column);
-        std::unique_ptr<DefaultValueColumnIterator> default_value_iter(
-                new 
DefaultValueColumnIterator(tablet_column.has_default_value(),
-                                               tablet_column.default_value(),
-                                               tablet_column.is_nullable(), 
std::move(type_info),
-                                               tablet_column.precision(), 
tablet_column.frac()));
-        ColumnIteratorOptions iter_opts;
-
-        RETURN_IF_ERROR(default_value_iter->init(iter_opts));
-        *iter = std::move(default_value_iter);
+        RETURN_IF_ERROR(new_default_iterator(tablet_column, iter));
         return Status::OK();
     }
+    // init iterator by unique id
     ColumnIterator* it;
     
RETURN_IF_ERROR(_column_readers.at(tablet_column.unique_id())->new_iterator(&it));
     iter->reset(it);
     return Status::OK();
 }
 
+Status Segment::new_column_iterator(int32_t unique_id, 
std::unique_ptr<ColumnIterator>* iter) {

Review Comment:
   warning: method 'new_column_iterator' can be made static 
[readability-convert-member-functions-to-static]
   
   ```suggestion
   static Status Segment::new_column_iterator(int32_t unique_id, 
std::unique_ptr<ColumnIterator>* iter) {
   ```
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Re: [PR] [Feature-Variant](Variant Type) support variant type query and index [doris]

Reply via email to