github-actions[bot] commented on code in PR #16335:
URL: https://github.com/apache/doris/pull/16335#discussion_r1093159661


##########
be/src/exec/base_scanner.cpp:
##########
@@ -250,6 +265,28 @@ Status 
BaseScanner::_materialize_dest_block(vectorized::Block* dest_block) {
                 std::move(column_ptr), slot_desc->get_data_type_ptr(), 
slot_desc->col_name()));
     }
 
+    // handle dynamic generated columns
+    if (!_full_base_schema_view->empty()) {
+        assert(_is_dynamic_schema);
+        for (size_t x = dest_block->columns(); x < _src_block.columns(); ++x) {
+            auto& column_type_name = _src_block.get_by_position(x);
+            const TColumn& tcolumn =
+                    
_full_base_schema_view->column_name_to_column[column_type_name.name];
+            auto original_type = 
vectorized::DataTypeFactory::instance().create_data_type(tcolumn);
+            // type conflict free path, always cast to original type
+            if (!column_type_name.type->equals(*original_type)) {
+                vectorized::ColumnPtr column_ptr;
+                
RETURN_IF_ERROR(vectorized::schema_util::cast_column(column_type_name,
+                                                                     
original_type, &column_ptr));
+                column_type_name.column = column_ptr;
+                column_type_name.type = original_type;
+            }
+            
dest_block->insert(vectorized::ColumnWithTypeAndName(std::move(column_type_name.column),

Review Comment:
   warning: passing result of std::move() as a const reference argument; no 
move will actually happen [performance-move-const-arg]
   
   ```suggestion
               
dest_block->insert(vectorized::ColumnWithTypeAndName(column_type_name.column,
   ```
   



##########
be/src/exec/base_scanner.cpp:
##########
@@ -250,6 +265,28 @@
                 std::move(column_ptr), slot_desc->get_data_type_ptr(), 
slot_desc->col_name()));
     }
 
+    // handle dynamic generated columns
+    if (!_full_base_schema_view->empty()) {
+        assert(_is_dynamic_schema);
+        for (size_t x = dest_block->columns(); x < _src_block.columns(); ++x) {
+            auto& column_type_name = _src_block.get_by_position(x);
+            const TColumn& tcolumn =
+                    
_full_base_schema_view->column_name_to_column[column_type_name.name];
+            auto original_type = 
vectorized::DataTypeFactory::instance().create_data_type(tcolumn);
+            // type conflict free path, always cast to original type
+            if (!column_type_name.type->equals(*original_type)) {
+                vectorized::ColumnPtr column_ptr;
+                
RETURN_IF_ERROR(vectorized::schema_util::cast_column(column_type_name,
+                                                                     
original_type, &column_ptr));
+                column_type_name.column = column_ptr;
+                column_type_name.type = original_type;
+            }
+            
dest_block->insert(vectorized::ColumnWithTypeAndName(std::move(column_type_name.column),
+                                                                 
std::move(column_type_name.type),

Review Comment:
   warning: passing result of std::move() as a const reference argument; no 
move will actually happen [performance-move-const-arg]
   
   ```suggestion
                                                                    
column_type_name.type,
   ```
   



##########
be/src/vec/columns/subcolumn_tree.h:
##########
@@ -0,0 +1,195 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// 
https://github.com/ClickHouse/ClickHouse/blob/master/src/DataTypes/Serializations/SubcolumnsTree.h
+// and modified by Doris
+
+#pragma once
+#include <vec/columns/column.h>
+#include <vec/common/hash_table/hash_map.h>
+#include <vec/data_types/data_type.h>
+#include <vec/json/path_in_data.h>
+namespace doris::vectorized {
+/// Tree that represents paths in document
+/// with additional data in nodes.
+
+template <typename NodeData>
+class SubcolumnsTree {
+public:
+    struct Node {
+        enum Kind {
+            TUPLE,
+            NESTED,
+            SCALAR,
+        };
+
+        explicit Node(Kind kind_) : kind(kind_) {}
+        Node(Kind kind_, const NodeData& data_) : kind(kind_), data(data_) {}
+        Node(Kind kind_, const NodeData& data_, const PathInData& path_)
+                : kind(kind_), data(data_), path(path_) {}
+
+        Kind kind = TUPLE;
+        const Node* parent = nullptr;
+
+        Arena strings_pool;
+        HashMapWithStackMemory<StringRef, std::shared_ptr<Node>, 
StringRefHash, 4> children;
+
+        NodeData data;
+        PathInData path;
+
+        bool is_nested() const { return kind == NESTED; }
+        bool is_scalar() const { return kind == SCALAR; }
+
+        void add_child(std::string_view key, std::shared_ptr<Node> next_node) {
+            next_node->parent = this;
+            StringRef key_ref {strings_pool.insert(key.data(), key.length()), 
key.length()};
+            children[key_ref] = std::move(next_node);
+        }
+    };
+
+    using NodeKind = typename Node::Kind;
+    using NodePtr = std::shared_ptr<Node>;
+
+    /// Add a leaf without any data in other nodes.
+    bool add(const PathInData& path, const NodeData& leaf_data) {
+        return add(path, [&](NodeKind kind, bool exists) -> NodePtr {
+            if (exists) return nullptr;

Review Comment:
   warning: statement should be inside braces 
[readability-braces-around-statements]
   
   ```suggestion
               if (exists) { return nullptr;
   }
   ```
   



##########
be/src/vec/columns/columns_common.cpp:
##########
@@ -279,4 +279,19 @@ INSTANTIATE(Float64, ColumnArray::Offset64)
 
 #undef INSTANTIATE
 
+namespace detail {
+template <typename T>
+const PaddedPODArray<T>* get_indexes_data(const IColumn& indexes) {
+    auto* column = typeid_cast<const ColumnVector<T>*>(&indexes);
+    if (column) return &column->get_data();

Review Comment:
   warning: statement should be inside braces 
[readability-braces-around-statements]
   
   ```suggestion
       if (column) { return &column->get_data();
   }
   ```
   



##########
be/src/vec/columns/column_object.cpp:
##########
@@ -0,0 +1,885 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// 
https://github.com/ClickHouse/ClickHouse/blob/master/src/Columns/ColumnObject.cpp
+// and modified by Doris
+
+#include <parallel_hashmap/phmap.h>
+#include <vec/columns/column_array.h>
+#include <vec/columns/column_nullable.h>
+#include <vec/columns/column_object.h>
+#include <vec/columns/columns_number.h>
+#include <vec/common/field_visitors.h>
+#include <vec/common/hash_table/hash_set.h>
+#include <vec/common/pod_array_fwd.h>
+#include <vec/common/schema_util.h>
+#include <vec/core/field.h>
+#include <vec/data_types/convert_field_to_type.h>
+#include <vec/data_types/data_type_array.h>
+#include <vec/data_types/data_type_nothing.h>
+#include <vec/data_types/get_least_supertype.h>
+
+#include <vec/data_types/data_type_factory.hpp>
+
+namespace doris::vectorized {
+namespace {
+
+DataTypePtr create_array_of_type(DataTypePtr type, size_t num_dimensions) {
+    for (size_t i = 0; i < num_dimensions; ++i)
+        type = std::make_shared<DataTypeArray>(std::move(type));

Review Comment:
   warning: statement should be inside braces 
[readability-braces-around-statements]
   
   ```suggestion
       for (size_t i = 0; i < num_dimensions; ++i) {
           type = std::make_shared<DataTypeArray>(std::move(type));
   }
   ```
   



##########
be/src/vec/columns/subcolumn_tree.h:
##########
@@ -0,0 +1,195 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// 
https://github.com/ClickHouse/ClickHouse/blob/master/src/DataTypes/Serializations/SubcolumnsTree.h
+// and modified by Doris
+
+#pragma once
+#include <vec/columns/column.h>
+#include <vec/common/hash_table/hash_map.h>
+#include <vec/data_types/data_type.h>
+#include <vec/json/path_in_data.h>
+namespace doris::vectorized {
+/// Tree that represents paths in document
+/// with additional data in nodes.
+
+template <typename NodeData>
+class SubcolumnsTree {
+public:
+    struct Node {
+        enum Kind {
+            TUPLE,
+            NESTED,
+            SCALAR,
+        };
+
+        explicit Node(Kind kind_) : kind(kind_) {}
+        Node(Kind kind_, const NodeData& data_) : kind(kind_), data(data_) {}
+        Node(Kind kind_, const NodeData& data_, const PathInData& path_)
+                : kind(kind_), data(data_), path(path_) {}
+
+        Kind kind = TUPLE;
+        const Node* parent = nullptr;
+
+        Arena strings_pool;
+        HashMapWithStackMemory<StringRef, std::shared_ptr<Node>, 
StringRefHash, 4> children;
+
+        NodeData data;
+        PathInData path;
+
+        bool is_nested() const { return kind == NESTED; }
+        bool is_scalar() const { return kind == SCALAR; }
+
+        void add_child(std::string_view key, std::shared_ptr<Node> next_node) {
+            next_node->parent = this;
+            StringRef key_ref {strings_pool.insert(key.data(), key.length()), 
key.length()};
+            children[key_ref] = std::move(next_node);
+        }
+    };
+
+    using NodeKind = typename Node::Kind;
+    using NodePtr = std::shared_ptr<Node>;
+
+    /// Add a leaf without any data in other nodes.
+    bool add(const PathInData& path, const NodeData& leaf_data) {
+        return add(path, [&](NodeKind kind, bool exists) -> NodePtr {
+            if (exists) return nullptr;
+
+            if (kind == Node::SCALAR) return std::make_shared<Node>(kind, 
leaf_data, path);
+
+            return std::make_shared<Node>(kind);
+        });
+    }
+
+    /// Callback for creation of node. Receives kind of node and
+    /// flag, which is true if node already exists.
+    using NodeCreator = std::function<NodePtr(NodeKind, bool)>;
+
+    bool add(const PathInData& path, const NodeCreator& node_creator) {
+        const auto& parts = path.get_parts();
+
+        if (parts.empty()) return false;

Review Comment:
   warning: statement should be inside braces 
[readability-braces-around-statements]
   
   ```suggestion
           if (parts.empty()) { return false;
   }
   ```
   



##########
be/src/vec/columns/subcolumn_tree.h:
##########
@@ -0,0 +1,195 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// 
https://github.com/ClickHouse/ClickHouse/blob/master/src/DataTypes/Serializations/SubcolumnsTree.h
+// and modified by Doris
+
+#pragma once
+#include <vec/columns/column.h>
+#include <vec/common/hash_table/hash_map.h>
+#include <vec/data_types/data_type.h>
+#include <vec/json/path_in_data.h>
+namespace doris::vectorized {
+/// Tree that represents paths in document
+/// with additional data in nodes.
+
+template <typename NodeData>
+class SubcolumnsTree {
+public:
+    struct Node {
+        enum Kind {
+            TUPLE,
+            NESTED,
+            SCALAR,
+        };
+
+        explicit Node(Kind kind_) : kind(kind_) {}
+        Node(Kind kind_, const NodeData& data_) : kind(kind_), data(data_) {}
+        Node(Kind kind_, const NodeData& data_, const PathInData& path_)
+                : kind(kind_), data(data_), path(path_) {}
+
+        Kind kind = TUPLE;
+        const Node* parent = nullptr;
+
+        Arena strings_pool;
+        HashMapWithStackMemory<StringRef, std::shared_ptr<Node>, 
StringRefHash, 4> children;
+
+        NodeData data;
+        PathInData path;
+
+        bool is_nested() const { return kind == NESTED; }
+        bool is_scalar() const { return kind == SCALAR; }
+
+        void add_child(std::string_view key, std::shared_ptr<Node> next_node) {
+            next_node->parent = this;
+            StringRef key_ref {strings_pool.insert(key.data(), key.length()), 
key.length()};
+            children[key_ref] = std::move(next_node);
+        }
+    };
+
+    using NodeKind = typename Node::Kind;
+    using NodePtr = std::shared_ptr<Node>;
+
+    /// Add a leaf without any data in other nodes.
+    bool add(const PathInData& path, const NodeData& leaf_data) {
+        return add(path, [&](NodeKind kind, bool exists) -> NodePtr {
+            if (exists) return nullptr;
+
+            if (kind == Node::SCALAR) return std::make_shared<Node>(kind, 
leaf_data, path);
+
+            return std::make_shared<Node>(kind);
+        });
+    }
+
+    /// Callback for creation of node. Receives kind of node and
+    /// flag, which is true if node already exists.
+    using NodeCreator = std::function<NodePtr(NodeKind, bool)>;
+
+    bool add(const PathInData& path, const NodeCreator& node_creator) {
+        const auto& parts = path.get_parts();
+
+        if (parts.empty()) return false;
+
+        if (!root) root = std::make_shared<Node>(Node::TUPLE);
+
+        Node* current_node = root.get();
+        for (size_t i = 0; i < parts.size() - 1; ++i) {
+            assert(current_node->kind != Node::SCALAR);
+
+            auto it = current_node->children.find(
+                    StringRef {parts[i].key.data(), parts[i].key.size()});
+            if (it != current_node->children.end()) {
+                current_node = it->get_second().get();
+                node_creator(current_node->kind, true);
+
+                if (current_node->is_nested() != parts[i].is_nested) return 
false;
+            } else {
+                auto next_kind = parts[i].is_nested ? Node::NESTED : 
Node::TUPLE;
+                auto next_node = node_creator(next_kind, false);
+                current_node->add_child(String(parts[i].key), next_node);
+                current_node = next_node.get();
+            }
+        }
+
+        auto it = current_node->children.find(
+                StringRef {parts.back().key.data(), parts.back().key.size()});
+        if (it != current_node->children.end()) return false;

Review Comment:
   warning: statement should be inside braces 
[readability-braces-around-statements]
   
   ```suggestion
           if (it != current_node->children.end()) { return false;
   }
   ```
   



##########
be/src/vec/columns/subcolumn_tree.h:
##########
@@ -0,0 +1,195 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// 
https://github.com/ClickHouse/ClickHouse/blob/master/src/DataTypes/Serializations/SubcolumnsTree.h
+// and modified by Doris
+
+#pragma once
+#include <vec/columns/column.h>
+#include <vec/common/hash_table/hash_map.h>
+#include <vec/data_types/data_type.h>
+#include <vec/json/path_in_data.h>
+namespace doris::vectorized {
+/// Tree that represents paths in document
+/// with additional data in nodes.
+
+template <typename NodeData>
+class SubcolumnsTree {
+public:
+    struct Node {
+        enum Kind {
+            TUPLE,
+            NESTED,
+            SCALAR,
+        };
+
+        explicit Node(Kind kind_) : kind(kind_) {}
+        Node(Kind kind_, const NodeData& data_) : kind(kind_), data(data_) {}
+        Node(Kind kind_, const NodeData& data_, const PathInData& path_)
+                : kind(kind_), data(data_), path(path_) {}
+
+        Kind kind = TUPLE;
+        const Node* parent = nullptr;
+
+        Arena strings_pool;
+        HashMapWithStackMemory<StringRef, std::shared_ptr<Node>, 
StringRefHash, 4> children;
+
+        NodeData data;
+        PathInData path;
+
+        bool is_nested() const { return kind == NESTED; }
+        bool is_scalar() const { return kind == SCALAR; }
+
+        void add_child(std::string_view key, std::shared_ptr<Node> next_node) {
+            next_node->parent = this;
+            StringRef key_ref {strings_pool.insert(key.data(), key.length()), 
key.length()};
+            children[key_ref] = std::move(next_node);
+        }
+    };
+
+    using NodeKind = typename Node::Kind;
+    using NodePtr = std::shared_ptr<Node>;
+
+    /// Add a leaf without any data in other nodes.
+    bool add(const PathInData& path, const NodeData& leaf_data) {
+        return add(path, [&](NodeKind kind, bool exists) -> NodePtr {
+            if (exists) return nullptr;
+
+            if (kind == Node::SCALAR) return std::make_shared<Node>(kind, 
leaf_data, path);
+
+            return std::make_shared<Node>(kind);
+        });
+    }
+
+    /// Callback for creation of node. Receives kind of node and
+    /// flag, which is true if node already exists.
+    using NodeCreator = std::function<NodePtr(NodeKind, bool)>;
+
+    bool add(const PathInData& path, const NodeCreator& node_creator) {
+        const auto& parts = path.get_parts();
+
+        if (parts.empty()) return false;
+
+        if (!root) root = std::make_shared<Node>(Node::TUPLE);
+
+        Node* current_node = root.get();
+        for (size_t i = 0; i < parts.size() - 1; ++i) {
+            assert(current_node->kind != Node::SCALAR);
+
+            auto it = current_node->children.find(
+                    StringRef {parts[i].key.data(), parts[i].key.size()});
+            if (it != current_node->children.end()) {
+                current_node = it->get_second().get();
+                node_creator(current_node->kind, true);
+
+                if (current_node->is_nested() != parts[i].is_nested) return 
false;
+            } else {
+                auto next_kind = parts[i].is_nested ? Node::NESTED : 
Node::TUPLE;
+                auto next_node = node_creator(next_kind, false);
+                current_node->add_child(String(parts[i].key), next_node);
+                current_node = next_node.get();
+            }
+        }
+
+        auto it = current_node->children.find(
+                StringRef {parts.back().key.data(), parts.back().key.size()});
+        if (it != current_node->children.end()) return false;
+
+        auto next_node = node_creator(Node::SCALAR, false);
+        current_node->add_child(String(parts.back().key), next_node);
+        leaves.push_back(std::move(next_node));
+
+        return true;
+    }
+
+    /// Find node that matches the path the best.
+    const Node* find_best_match(const PathInData& path) const { return 
find_impl(path, false); }
+
+    /// Find node that matches the path exactly.
+    const Node* find_exact(const PathInData& path) const { return 
find_impl(path, true); }
+
+    /// Find leaf by path.
+    const Node* find_leaf(const PathInData& path) const {
+        const auto* candidate = find_exact(path);
+        if (!candidate || !candidate->is_scalar()) return nullptr;
+        return candidate;
+    }
+
+    using NodePredicate = std::function<bool(const Node&)>;
+
+    /// Finds leaf that satisfies the predicate.
+    const Node* find_leaf(const NodePredicate& predicate) {
+        return find_leaf(root.get(), predicate);
+    }
+
+    static const Node* find_leaf(const Node* node, const NodePredicate& 
predicate) {
+        if (!node) return nullptr;

Review Comment:
   warning: statement should be inside braces 
[readability-braces-around-statements]
   
   ```suggestion
           if (!node) { return nullptr;
   }
   ```
   



##########
be/src/vec/columns/subcolumn_tree.h:
##########
@@ -0,0 +1,195 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// 
https://github.com/ClickHouse/ClickHouse/blob/master/src/DataTypes/Serializations/SubcolumnsTree.h
+// and modified by Doris
+
+#pragma once
+#include <vec/columns/column.h>
+#include <vec/common/hash_table/hash_map.h>
+#include <vec/data_types/data_type.h>
+#include <vec/json/path_in_data.h>
+namespace doris::vectorized {
+/// Tree that represents paths in document
+/// with additional data in nodes.
+
+template <typename NodeData>
+class SubcolumnsTree {
+public:
+    struct Node {
+        enum Kind {
+            TUPLE,
+            NESTED,
+            SCALAR,
+        };
+
+        explicit Node(Kind kind_) : kind(kind_) {}
+        Node(Kind kind_, const NodeData& data_) : kind(kind_), data(data_) {}
+        Node(Kind kind_, const NodeData& data_, const PathInData& path_)
+                : kind(kind_), data(data_), path(path_) {}
+
+        Kind kind = TUPLE;
+        const Node* parent = nullptr;
+
+        Arena strings_pool;
+        HashMapWithStackMemory<StringRef, std::shared_ptr<Node>, 
StringRefHash, 4> children;
+
+        NodeData data;
+        PathInData path;
+
+        bool is_nested() const { return kind == NESTED; }
+        bool is_scalar() const { return kind == SCALAR; }
+
+        void add_child(std::string_view key, std::shared_ptr<Node> next_node) {
+            next_node->parent = this;
+            StringRef key_ref {strings_pool.insert(key.data(), key.length()), 
key.length()};
+            children[key_ref] = std::move(next_node);
+        }
+    };
+
+    using NodeKind = typename Node::Kind;
+    using NodePtr = std::shared_ptr<Node>;
+
+    /// Add a leaf without any data in other nodes.
+    bool add(const PathInData& path, const NodeData& leaf_data) {
+        return add(path, [&](NodeKind kind, bool exists) -> NodePtr {
+            if (exists) return nullptr;
+
+            if (kind == Node::SCALAR) return std::make_shared<Node>(kind, 
leaf_data, path);
+
+            return std::make_shared<Node>(kind);
+        });
+    }
+
+    /// Callback for creation of node. Receives kind of node and
+    /// flag, which is true if node already exists.
+    using NodeCreator = std::function<NodePtr(NodeKind, bool)>;
+
+    bool add(const PathInData& path, const NodeCreator& node_creator) {
+        const auto& parts = path.get_parts();
+
+        if (parts.empty()) return false;
+
+        if (!root) root = std::make_shared<Node>(Node::TUPLE);
+
+        Node* current_node = root.get();
+        for (size_t i = 0; i < parts.size() - 1; ++i) {
+            assert(current_node->kind != Node::SCALAR);
+
+            auto it = current_node->children.find(
+                    StringRef {parts[i].key.data(), parts[i].key.size()});
+            if (it != current_node->children.end()) {
+                current_node = it->get_second().get();
+                node_creator(current_node->kind, true);
+
+                if (current_node->is_nested() != parts[i].is_nested) return 
false;
+            } else {
+                auto next_kind = parts[i].is_nested ? Node::NESTED : 
Node::TUPLE;
+                auto next_node = node_creator(next_kind, false);
+                current_node->add_child(String(parts[i].key), next_node);
+                current_node = next_node.get();
+            }
+        }
+
+        auto it = current_node->children.find(
+                StringRef {parts.back().key.data(), parts.back().key.size()});
+        if (it != current_node->children.end()) return false;
+
+        auto next_node = node_creator(Node::SCALAR, false);
+        current_node->add_child(String(parts.back().key), next_node);
+        leaves.push_back(std::move(next_node));
+
+        return true;
+    }
+
+    /// Find node that matches the path the best.
+    const Node* find_best_match(const PathInData& path) const { return 
find_impl(path, false); }
+
+    /// Find node that matches the path exactly.
+    const Node* find_exact(const PathInData& path) const { return 
find_impl(path, true); }
+
+    /// Find leaf by path.
+    const Node* find_leaf(const PathInData& path) const {
+        const auto* candidate = find_exact(path);
+        if (!candidate || !candidate->is_scalar()) return nullptr;

Review Comment:
   warning: statement should be inside braces 
[readability-braces-around-statements]
   
   ```suggestion
           if (!candidate || !candidate->is_scalar()) { return nullptr;
   }
   ```
   



##########
be/src/vec/columns/subcolumn_tree.h:
##########
@@ -0,0 +1,195 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// 
https://github.com/ClickHouse/ClickHouse/blob/master/src/DataTypes/Serializations/SubcolumnsTree.h
+// and modified by Doris
+
+#pragma once
+#include <vec/columns/column.h>
+#include <vec/common/hash_table/hash_map.h>
+#include <vec/data_types/data_type.h>
+#include <vec/json/path_in_data.h>
+namespace doris::vectorized {
+/// Tree that represents paths in document
+/// with additional data in nodes.
+
+template <typename NodeData>
+class SubcolumnsTree {
+public:
+    struct Node {
+        enum Kind {
+            TUPLE,
+            NESTED,
+            SCALAR,
+        };
+
+        explicit Node(Kind kind_) : kind(kind_) {}
+        Node(Kind kind_, const NodeData& data_) : kind(kind_), data(data_) {}
+        Node(Kind kind_, const NodeData& data_, const PathInData& path_)
+                : kind(kind_), data(data_), path(path_) {}
+
+        Kind kind = TUPLE;
+        const Node* parent = nullptr;
+
+        Arena strings_pool;
+        HashMapWithStackMemory<StringRef, std::shared_ptr<Node>, 
StringRefHash, 4> children;
+
+        NodeData data;
+        PathInData path;
+
+        bool is_nested() const { return kind == NESTED; }
+        bool is_scalar() const { return kind == SCALAR; }
+
+        void add_child(std::string_view key, std::shared_ptr<Node> next_node) {
+            next_node->parent = this;
+            StringRef key_ref {strings_pool.insert(key.data(), key.length()), 
key.length()};
+            children[key_ref] = std::move(next_node);
+        }
+    };
+
+    using NodeKind = typename Node::Kind;
+    using NodePtr = std::shared_ptr<Node>;
+
+    /// Add a leaf without any data in other nodes.
+    bool add(const PathInData& path, const NodeData& leaf_data) {
+        return add(path, [&](NodeKind kind, bool exists) -> NodePtr {
+            if (exists) return nullptr;
+
+            if (kind == Node::SCALAR) return std::make_shared<Node>(kind, 
leaf_data, path);
+
+            return std::make_shared<Node>(kind);
+        });
+    }
+
+    /// Callback for creation of node. Receives kind of node and
+    /// flag, which is true if node already exists.
+    using NodeCreator = std::function<NodePtr(NodeKind, bool)>;
+
+    bool add(const PathInData& path, const NodeCreator& node_creator) {
+        const auto& parts = path.get_parts();
+
+        if (parts.empty()) return false;
+
+        if (!root) root = std::make_shared<Node>(Node::TUPLE);
+
+        Node* current_node = root.get();
+        for (size_t i = 0; i < parts.size() - 1; ++i) {
+            assert(current_node->kind != Node::SCALAR);
+
+            auto it = current_node->children.find(
+                    StringRef {parts[i].key.data(), parts[i].key.size()});
+            if (it != current_node->children.end()) {
+                current_node = it->get_second().get();
+                node_creator(current_node->kind, true);
+
+                if (current_node->is_nested() != parts[i].is_nested) return 
false;
+            } else {
+                auto next_kind = parts[i].is_nested ? Node::NESTED : 
Node::TUPLE;
+                auto next_node = node_creator(next_kind, false);
+                current_node->add_child(String(parts[i].key), next_node);
+                current_node = next_node.get();
+            }
+        }
+
+        auto it = current_node->children.find(
+                StringRef {parts.back().key.data(), parts.back().key.size()});
+        if (it != current_node->children.end()) return false;
+
+        auto next_node = node_creator(Node::SCALAR, false);
+        current_node->add_child(String(parts.back().key), next_node);
+        leaves.push_back(std::move(next_node));
+
+        return true;
+    }
+
+    /// Find node that matches the path the best.
+    const Node* find_best_match(const PathInData& path) const { return 
find_impl(path, false); }
+
+    /// Find node that matches the path exactly.
+    const Node* find_exact(const PathInData& path) const { return 
find_impl(path, true); }
+
+    /// Find leaf by path.
+    const Node* find_leaf(const PathInData& path) const {
+        const auto* candidate = find_exact(path);
+        if (!candidate || !candidate->is_scalar()) return nullptr;
+        return candidate;
+    }
+
+    using NodePredicate = std::function<bool(const Node&)>;
+
+    /// Finds leaf that satisfies the predicate.
+    const Node* find_leaf(const NodePredicate& predicate) {
+        return find_leaf(root.get(), predicate);
+    }
+
+    static const Node* find_leaf(const Node* node, const NodePredicate& 
predicate) {
+        if (!node) return nullptr;
+
+        if (node->is_scalar()) return predicate(*node) ? node : nullptr;

Review Comment:
   warning: statement should be inside braces 
[readability-braces-around-statements]
   
   ```suggestion
           if (node->is_scalar()) { return predicate(*node) ? node : nullptr;
   }
   ```
   



##########
be/src/vec/common/schema_util.cpp:
##########
@@ -0,0 +1,480 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <vec/columns/column_array.h>
+#include <vec/columns/column_object.h>
+#include <vec/common/schema_util.h>
+#include <vec/core/field.h>
+#include <vec/data_types/data_type_array.h>
+#include <vec/data_types/data_type_object.h>
+#include <vec/functions/simple_function_factory.h>
+#include <vec/json/parse2column.h>
+
+#include <vec/data_types/data_type_factory.hpp>
+
+#include "gen_cpp/FrontendService.h"
+#include "gen_cpp/HeartbeatService_types.h"
+#include "olap/rowset/rowset_writer_context.h"
+#include "runtime/client_cache.h"
+#include "runtime/exec_env.h"
+#include "util/thrift_rpc_helper.h"
+
+namespace doris::vectorized::schema_util {
+
+size_t get_number_of_dimensions(const IDataType& type) {
+    if (const auto* type_array = typeid_cast<const DataTypeArray*>(&type))
+        return type_array->get_number_of_dimensions();
+    return 0;
+}
+size_t get_number_of_dimensions(const IColumn& column) {
+    if (const auto* column_array = check_and_get_column<ColumnArray>(column))
+        return column_array->get_number_of_dimensions();
+    return 0;
+}
+
+DataTypePtr get_base_type_of_array(const DataTypePtr& type) {
+    /// Get raw pointers to avoid extra copying of type pointers.
+    const DataTypeArray* last_array = nullptr;
+    const auto* current_type = type.get();
+    while (const auto* type_array = typeid_cast<const 
DataTypeArray*>(current_type)) {
+        current_type = type_array->get_nested_type().get();
+        last_array = type_array;
+    }
+    return last_array ? last_array->get_nested_type() : type;
+}
+
+Array create_empty_array_field(size_t num_dimensions) {
+    DCHECK(num_dimensions > 0);
+    Array array;
+    Array* current_array = &array;
+    for (size_t i = 1; i < num_dimensions; ++i) {
+        current_array->push_back(Array());
+        current_array = &current_array->back().get<Array&>();
+    }
+    return array;
+}
+
+FieldType get_field_type(const IDataType* data_type) {
+    switch (data_type->get_type_id()) {
+    case TypeIndex::UInt8:
+        return FieldType::OLAP_FIELD_TYPE_UNSIGNED_TINYINT;
+    case TypeIndex::UInt16:
+        return FieldType::OLAP_FIELD_TYPE_UNSIGNED_SMALLINT;
+    case TypeIndex::UInt32:
+        return FieldType::OLAP_FIELD_TYPE_UNSIGNED_INT;
+    case TypeIndex::UInt64:
+        return FieldType::OLAP_FIELD_TYPE_UNSIGNED_BIGINT;
+    case TypeIndex::Int8:
+        return FieldType::OLAP_FIELD_TYPE_TINYINT;
+    case TypeIndex::Int16:
+        return FieldType::OLAP_FIELD_TYPE_SMALLINT;
+    case TypeIndex::Int32:
+        return FieldType::OLAP_FIELD_TYPE_INT;
+    case TypeIndex::Int64:
+        return FieldType::OLAP_FIELD_TYPE_BIGINT;
+    case TypeIndex::Float32:
+        return FieldType::OLAP_FIELD_TYPE_FLOAT;
+    case TypeIndex::Float64:
+        return FieldType::OLAP_FIELD_TYPE_DOUBLE;
+    case TypeIndex::Decimal32:
+        return FieldType::OLAP_FIELD_TYPE_DECIMAL;
+    case TypeIndex::Array:
+        return FieldType::OLAP_FIELD_TYPE_ARRAY;
+    case TypeIndex::String:
+        return FieldType::OLAP_FIELD_TYPE_STRING;
+    case TypeIndex::Date:
+        return FieldType::OLAP_FIELD_TYPE_DATE;
+    case TypeIndex::DateTime:
+        return FieldType::OLAP_FIELD_TYPE_DATETIME;
+    case TypeIndex::Tuple:
+        return FieldType::OLAP_FIELD_TYPE_STRUCT;
+    // TODO add more types
+    default:
+        LOG(FATAL) << "unknow type";
+        return FieldType::OLAP_FIELD_TYPE_UNKNOWN;
+    }
+}
+
+Status parse_object_column(ColumnObject& dest, const IColumn& src, bool 
need_finalize,
+                           const int* row_begin, const int* row_end) {
+    assert(src.is_column_string());
+    const ColumnString* parsing_column {nullptr};
+    if (!src.is_nullable()) {
+        parsing_column = reinterpret_cast<const 
ColumnString*>(src.get_ptr().get());
+    } else {
+        auto nullable_column = reinterpret_cast<const 
ColumnNullable*>(src.get_ptr().get());
+        parsing_column = reinterpret_cast<const ColumnString*>(
+                nullable_column->get_nested_column().get_ptr().get());
+    }
+    std::vector<StringRef> jsons;
+    if (row_begin != nullptr) {
+        assert(row_end);
+        for (auto x = row_begin; x != row_end; ++x) {
+            StringRef ref = parsing_column->get_data_at(*x);
+            jsons.push_back(ref);
+        }
+    } else {
+        for (size_t i = 0; i < parsing_column->size(); ++i) {
+            StringRef ref = parsing_column->get_data_at(i);
+            jsons.push_back(ref);
+        }
+    }
+    // batch parse
+    RETURN_IF_ERROR(parse_json_to_variant(dest, jsons));
+
+    if (need_finalize) {
+        dest.finalize();
+    }
+    return Status::OK();
+}
+
+Status parse_object_column(Block& block, size_t position) {
+    // parse variant column and rewrite column
+    auto col = block.get_by_position(position).column;
+    const std::string& col_name = block.get_by_position(position).name;
+    if (!col->is_column_string()) {
+        return Status::InvalidArgument("only ColumnString can be parsed to 
ColumnObject");
+    }
+    vectorized::DataTypePtr type(
+            std::make_shared<vectorized::DataTypeObject>("", 
col->is_nullable()));
+    auto column_object = type->create_column();
+    RETURN_IF_ERROR(
+            
parse_object_column(assert_cast<ColumnObject&>(column_object->assume_mutable_ref()),
+                                *col, true /*need finalize*/, nullptr, 
nullptr));
+    // replace by object
+    block.safe_get_by_position(position).column = column_object->get_ptr();
+    block.safe_get_by_position(position).type = type;
+    block.safe_get_by_position(position).name = col_name;
+    return Status::OK();
+}
+
+void flatten_object(Block& block, size_t pos, bool replace_if_duplicated) {
+    auto column_object_ptr =
+            
assert_cast<ColumnObject*>(block.get_by_position(pos).column->assume_mutable().get());
+    if (column_object_ptr->empty()) {
+        block.erase(pos);
+        return;
+    }
+    size_t num_rows = column_object_ptr->size();
+    assert(block.rows() <= num_rows);
+    assert(column_object_ptr->is_finalized());
+    Columns subcolumns;
+    DataTypes types;
+    Names names;
+    for (auto& subcolumn : column_object_ptr->get_subcolumns()) {
+        subcolumns.push_back(subcolumn->data.get_finalized_column().get_ptr());
+        types.push_back(subcolumn->data.get_least_common_type());
+        names.push_back(subcolumn->path.get_path());
+    }
+    block.erase(pos);
+    for (size_t i = 0; i < subcolumns.size(); ++i) {
+        // block may already contains this column, eg. key columns, we should 
ignore
+        // or replcace the same column from object subcolumn
+        if (block.has(names[i])) {
+            if (replace_if_duplicated) {
+                auto& column_type_name = block.get_by_name(names[i]);
+                column_type_name.column = subcolumns[i];
+                column_type_name.type = types[i];
+            }
+            continue;
+        }
+        block.insert(ColumnWithTypeAndName {subcolumns[i], types[i], 
names[i]});
+    }
+
+    // fill default value
+    for (auto& [column, _1, _2] : block.get_columns_with_type_and_name()) {
+        if (column->size() < num_rows) {
+            column->assume_mutable()->insert_many_defaults(num_rows - 
column->size());
+        }
+    }
+}
+
+Status flatten_object(Block& block, bool replace_if_duplicated) {
+    auto object_pos =
+            std::find_if(block.begin(), block.end(), [](const 
ColumnWithTypeAndName& column) {
+                return column.type->get_type_id() == TypeIndex::VARIANT;
+            });
+    if (object_pos != block.end()) {
+        flatten_object(block, object_pos - block.begin(), 
replace_if_duplicated);
+    }
+    return Status::OK();
+}
+
+bool is_conversion_required_between_integers(const IDataType& lhs, const 
IDataType& rhs) {
+    WhichDataType which_lhs(lhs);
+    WhichDataType which_rhs(rhs);
+    bool is_native_int = which_lhs.is_native_int() && 
which_rhs.is_native_int();
+    bool is_native_uint = which_lhs.is_native_uint() && 
which_rhs.is_native_uint();
+    return (is_native_int || is_native_uint) &&
+           lhs.get_size_of_value_in_memory() <= 
rhs.get_size_of_value_in_memory();
+}
+
+bool is_conversion_required_between_integers(FieldType lhs, FieldType rhs) {
+    // We only support signed integers for semi-structure data at present
+    // TODO add unsigned integers
+    if (lhs == OLAP_FIELD_TYPE_BIGINT) {
+        return !(rhs == OLAP_FIELD_TYPE_TINYINT || rhs == 
OLAP_FIELD_TYPE_SMALLINT ||
+                 rhs == OLAP_FIELD_TYPE_INT || rhs == OLAP_FIELD_TYPE_BIGINT);
+    }
+    if (lhs == OLAP_FIELD_TYPE_INT) {
+        return !(rhs == OLAP_FIELD_TYPE_TINYINT || rhs == 
OLAP_FIELD_TYPE_SMALLINT ||
+                 rhs == OLAP_FIELD_TYPE_INT);
+    }
+    if (lhs == OLAP_FIELD_TYPE_SMALLINT) {
+        return !(rhs == OLAP_FIELD_TYPE_TINYINT || rhs == 
OLAP_FIELD_TYPE_SMALLINT);
+    }
+    if (lhs == OLAP_FIELD_TYPE_TINYINT) {
+        return !(rhs == OLAP_FIELD_TYPE_TINYINT);
+    }
+    return true;
+}
+
+Status cast_column(const ColumnWithTypeAndName& arg, const DataTypePtr& type, 
ColumnPtr* result) {
+    ColumnsWithTypeAndName arguments {arg,
+                                      
{type->create_column_const_with_default_value(1), type, ""}};
+    auto function = SimpleFunctionFactory::instance().get_function("CAST", 
arguments, type);
+    Block tmp_block {arguments};
+    // the 0 position is input argument, the 1 position is to type argument, 
the 2 position is result argument
+    vectorized::ColumnNumbers argnum;
+    argnum.emplace_back(0);
+    argnum.emplace_back(1);
+    size_t result_column = tmp_block.columns();
+    tmp_block.insert({nullptr, type, arg.name});
+    RETURN_IF_ERROR(
+            function->execute(nullptr, tmp_block, argnum, result_column, 
arg.column->size()));
+    *result = std::move(tmp_block.get_by_position(result_column).column);
+    return Status::OK();
+}
+
+static void get_column_def(const vectorized::DataTypePtr& data_type, const 
std::string& name,
+                           TColumnDef* column) {
+    if (!name.empty()) {
+        column->columnDesc.__set_columnName(name);
+    }
+    if (data_type->is_nullable()) {
+        const auto& real_type = static_cast<const 
DataTypeNullable&>(*data_type);
+        column->columnDesc.__set_isAllowNull(true);
+        get_column_def(real_type.get_nested_type(), "", column);
+        return;
+    }
+    
column->columnDesc.__set_columnType(to_thrift(get_primitive_type(data_type->get_type_id())));
+    if (data_type->get_type_id() == TypeIndex::Array) {
+        TColumnDef child;
+        column->columnDesc.__set_children({});
+        get_column_def(assert_cast<const 
DataTypeArray*>(data_type.get())->get_nested_type(), "",
+                       &child);
+        column->columnDesc.columnLength =
+                
TabletColumn::get_field_length_by_type(column->columnDesc.columnType, 0);
+        column->columnDesc.children.push_back(child.columnDesc);
+        return;
+    }
+    if (data_type->get_type_id() == TypeIndex::Tuple) {
+        // TODO
+        // auto tuple_type = assert_cast<const 
DataTypeTuple*>(data_type.get());
+        // DCHECK_EQ(tuple_type->get_elements().size(), 
tuple_type->get_element_names().size());
+        // for (size_t i = 0; i < tuple_type->get_elements().size(); ++i) {
+        //     TColumnDef child;
+        //     get_column_def(tuple_type->get_element(i), 
tuple_type->get_element_names()[i], &child);
+        //     column->columnDesc.children.push_back(child.columnDesc);
+        // }
+        // return;
+    }
+    if (data_type->get_type_id() == TypeIndex::String) {
+        return;
+    }
+    if (WhichDataType(*data_type).is_simple()) {
+        
column->columnDesc.__set_columnLength(data_type->get_size_of_value_in_memory());
+        return;
+    }
+    return;
+}

Review Comment:
   warning: redundant return statement at the end of a function with a void 
return type [readability-redundant-control-flow]
   
   ```suggestion
       }
   ```
   



##########
be/src/vec/core/block.cpp:
##########
@@ -977,4 +998,33 @@ void MutableBlock::clear_column_data() noexcept {
     }
 }
 
+void MutableBlock::initialize_index_by_name() {
+    for (size_t i = 0, size = _names.size(); i < size; ++i) {
+        index_by_name[_names[i]] = i;
+    }
+}
+
+bool MutableBlock::has(const std::string& name) const {
+    return index_by_name.end() != index_by_name.find(name);
+}
+
+size_t MutableBlock::get_position_by_name(const std::string& name) const {
+    auto it = index_by_name.find(name);
+    if (index_by_name.end() == it) {
+        LOG(FATAL) << fmt::format("Not found column {} in block. There are 
only columns: {}", name,
+                                  dump_names());
+    }
+
+    return it->second;
+}
+
+std::string MutableBlock::dump_names() const {
+    std::stringstream out;
+    for (auto it = _names.begin(); it != _names.end(); ++it) {
+        if (it != _names.begin()) out << ", ";

Review Comment:
   warning: statement should be inside braces 
[readability-braces-around-statements]
   
   ```suggestion
           if (it != _names.begin()) { out << ", ";
   }
   ```
   



##########
be/src/vec/core/accurate_comparison.h:
##########
@@ -456,22 +456,36 @@ inline bool_if_safe_conversion<A, B> greaterOrEqualsOp(A 
a, B b) {
 }
 
 /// Converts numeric to an equal numeric of other type.
-template <typename From, typename To>
+/// When `strict` is `true` check that result exactly same as input, otherwise 
just check overflow
+template <typename From, typename To, bool strict = true>
 inline bool convertNumeric(From value, To& result) {
     /// If the type is actually the same it's not necessary to do any checks.
     if constexpr (std::is_same_v<From, To>) {
         result = value;
         return true;
     }
-
-    /// Note that NaNs doesn't compare equal to anything, but they are still 
in range of any Float type.
-    if (is_nan(value) && std::is_floating_point_v<To>) {
-        result = value;
-        return true;
+    if constexpr (std::is_floating_point_v<From> && 
std::is_floating_point_v<To>) {
+        /// Note that NaNs doesn't compare equal to anything, but they are 
still in range of any Float type.
+        if (is_nan(value)) {
+            result = value;
+            return true;
+        }
+        if (value == std::numeric_limits<From>::infinity()) {
+            result = std::numeric_limits<To>::infinity();
+            return true;
+        }
+        if (value == -std::numeric_limits<From>::infinity()) {
+            result = -std::numeric_limits<To>::infinity();
+            return true;
+        }
+    }
+    if (greaterOp(value, std::numeric_limits<To>::max()) ||
+        lessOp(value, std::numeric_limits<To>::lowest())) {
+        return false;
     }
-
     result = static_cast<To>(value);
-    return equalsOp(value, result);
+    if constexpr (strict) return equalsOp(value, result);

Review Comment:
   warning: statement should be inside braces 
[readability-braces-around-statements]
   
   ```suggestion
       if constexpr (strict) { return equalsOp(value, result);
   }
   ```
   



##########
be/src/vec/data_types/get_least_supertype.cpp:
##########
@@ -50,12 +50,152 @@
 }
 } // namespace
 
-DataTypePtr get_least_supertype(const DataTypes& types) {
+Status get_numeric_type(const TypeIndexSet& types, DataTypePtr* type) {
+    bool all_numbers = true;
+
+    size_t max_bits_of_signed_integer = 0;
+    size_t max_bits_of_unsigned_integer = 0;
+    size_t max_mantissa_bits_of_floating = 0;
+
+    auto maximize = [](size_t& what, size_t value) {
+        if (value > what) what = value;
+    };
+
+    for (const auto& type : types) {
+        if (type == TypeIndex::UInt8)
+            maximize(max_bits_of_unsigned_integer, 8);
+        else if (type == TypeIndex::UInt16)

Review Comment:
   warning: statement should be inside braces 
[readability-braces-around-statements]
   
   ```suggestion
           else if (type == TypeIndex::UInt16) {
   ```
   
   be/src/vec/data_types/get_least_supertype.cpp:68:
   ```diff
   -         else if (type == TypeIndex::UInt32)
   +         } else if (type == TypeIndex::UInt32)
   ```
   



##########
be/src/vec/data_types/get_least_supertype.cpp:
##########
@@ -50,12 +50,152 @@ String get_exception_message_prefix(const DataTypes& 
types) {
 }
 } // namespace
 
-DataTypePtr get_least_supertype(const DataTypes& types) {
+Status get_numeric_type(const TypeIndexSet& types, DataTypePtr* type) {
+    bool all_numbers = true;
+
+    size_t max_bits_of_signed_integer = 0;
+    size_t max_bits_of_unsigned_integer = 0;
+    size_t max_mantissa_bits_of_floating = 0;
+
+    auto maximize = [](size_t& what, size_t value) {
+        if (value > what) what = value;

Review Comment:
   warning: statement should be inside braces 
[readability-braces-around-statements]
   
   ```suggestion
           if (value > what) { what = value;
   }
   ```
   



##########
be/src/vec/columns/subcolumn_tree.h:
##########
@@ -0,0 +1,195 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// 
https://github.com/ClickHouse/ClickHouse/blob/master/src/DataTypes/Serializations/SubcolumnsTree.h
+// and modified by Doris
+
+#pragma once
+#include <vec/columns/column.h>
+#include <vec/common/hash_table/hash_map.h>
+#include <vec/data_types/data_type.h>
+#include <vec/json/path_in_data.h>
+namespace doris::vectorized {
+/// Tree that represents paths in document
+/// with additional data in nodes.
+
+template <typename NodeData>
+class SubcolumnsTree {
+public:
+    struct Node {
+        enum Kind {
+            TUPLE,
+            NESTED,
+            SCALAR,
+        };
+
+        explicit Node(Kind kind_) : kind(kind_) {}
+        Node(Kind kind_, const NodeData& data_) : kind(kind_), data(data_) {}
+        Node(Kind kind_, const NodeData& data_, const PathInData& path_)
+                : kind(kind_), data(data_), path(path_) {}
+
+        Kind kind = TUPLE;
+        const Node* parent = nullptr;
+
+        Arena strings_pool;
+        HashMapWithStackMemory<StringRef, std::shared_ptr<Node>, 
StringRefHash, 4> children;
+
+        NodeData data;
+        PathInData path;
+
+        bool is_nested() const { return kind == NESTED; }
+        bool is_scalar() const { return kind == SCALAR; }
+
+        void add_child(std::string_view key, std::shared_ptr<Node> next_node) {
+            next_node->parent = this;
+            StringRef key_ref {strings_pool.insert(key.data(), key.length()), 
key.length()};
+            children[key_ref] = std::move(next_node);
+        }
+    };
+
+    using NodeKind = typename Node::Kind;
+    using NodePtr = std::shared_ptr<Node>;
+
+    /// Add a leaf without any data in other nodes.
+    bool add(const PathInData& path, const NodeData& leaf_data) {
+        return add(path, [&](NodeKind kind, bool exists) -> NodePtr {
+            if (exists) return nullptr;
+
+            if (kind == Node::SCALAR) return std::make_shared<Node>(kind, 
leaf_data, path);
+
+            return std::make_shared<Node>(kind);
+        });
+    }
+
+    /// Callback for creation of node. Receives kind of node and
+    /// flag, which is true if node already exists.
+    using NodeCreator = std::function<NodePtr(NodeKind, bool)>;
+
+    bool add(const PathInData& path, const NodeCreator& node_creator) {
+        const auto& parts = path.get_parts();
+
+        if (parts.empty()) return false;
+
+        if (!root) root = std::make_shared<Node>(Node::TUPLE);
+
+        Node* current_node = root.get();
+        for (size_t i = 0; i < parts.size() - 1; ++i) {
+            assert(current_node->kind != Node::SCALAR);
+
+            auto it = current_node->children.find(
+                    StringRef {parts[i].key.data(), parts[i].key.size()});
+            if (it != current_node->children.end()) {
+                current_node = it->get_second().get();
+                node_creator(current_node->kind, true);
+
+                if (current_node->is_nested() != parts[i].is_nested) return 
false;

Review Comment:
   warning: statement should be inside braces 
[readability-braces-around-statements]
   
   ```suggestion
                   if (current_node->is_nested() != parts[i].is_nested) { 
return false;
   }
   ```
   



##########
be/src/vec/columns/subcolumn_tree.h:
##########
@@ -0,0 +1,195 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// 
https://github.com/ClickHouse/ClickHouse/blob/master/src/DataTypes/Serializations/SubcolumnsTree.h
+// and modified by Doris
+
+#pragma once
+#include <vec/columns/column.h>
+#include <vec/common/hash_table/hash_map.h>
+#include <vec/data_types/data_type.h>
+#include <vec/json/path_in_data.h>
+namespace doris::vectorized {
+/// Tree that represents paths in document
+/// with additional data in nodes.
+
+template <typename NodeData>
+class SubcolumnsTree {
+public:
+    struct Node {
+        enum Kind {
+            TUPLE,
+            NESTED,
+            SCALAR,
+        };
+
+        explicit Node(Kind kind_) : kind(kind_) {}
+        Node(Kind kind_, const NodeData& data_) : kind(kind_), data(data_) {}
+        Node(Kind kind_, const NodeData& data_, const PathInData& path_)
+                : kind(kind_), data(data_), path(path_) {}
+
+        Kind kind = TUPLE;
+        const Node* parent = nullptr;
+
+        Arena strings_pool;
+        HashMapWithStackMemory<StringRef, std::shared_ptr<Node>, 
StringRefHash, 4> children;
+
+        NodeData data;
+        PathInData path;
+
+        bool is_nested() const { return kind == NESTED; }
+        bool is_scalar() const { return kind == SCALAR; }
+
+        void add_child(std::string_view key, std::shared_ptr<Node> next_node) {
+            next_node->parent = this;
+            StringRef key_ref {strings_pool.insert(key.data(), key.length()), 
key.length()};
+            children[key_ref] = std::move(next_node);
+        }
+    };
+
+    using NodeKind = typename Node::Kind;
+    using NodePtr = std::shared_ptr<Node>;
+
+    /// Add a leaf without any data in other nodes.
+    bool add(const PathInData& path, const NodeData& leaf_data) {
+        return add(path, [&](NodeKind kind, bool exists) -> NodePtr {
+            if (exists) return nullptr;
+
+            if (kind == Node::SCALAR) return std::make_shared<Node>(kind, 
leaf_data, path);

Review Comment:
   warning: statement should be inside braces 
[readability-braces-around-statements]
   
   ```suggestion
               if (kind == Node::SCALAR) { return std::make_shared<Node>(kind, 
leaf_data, path);
   }
   ```
   



##########
be/src/vec/columns/subcolumn_tree.h:
##########
@@ -0,0 +1,195 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// 
https://github.com/ClickHouse/ClickHouse/blob/master/src/DataTypes/Serializations/SubcolumnsTree.h
+// and modified by Doris
+
+#pragma once
+#include <vec/columns/column.h>
+#include <vec/common/hash_table/hash_map.h>
+#include <vec/data_types/data_type.h>
+#include <vec/json/path_in_data.h>
+namespace doris::vectorized {
+/// Tree that represents paths in document
+/// with additional data in nodes.
+
+template <typename NodeData>
+class SubcolumnsTree {
+public:
+    struct Node {
+        enum Kind {
+            TUPLE,
+            NESTED,
+            SCALAR,
+        };
+
+        explicit Node(Kind kind_) : kind(kind_) {}
+        Node(Kind kind_, const NodeData& data_) : kind(kind_), data(data_) {}
+        Node(Kind kind_, const NodeData& data_, const PathInData& path_)
+                : kind(kind_), data(data_), path(path_) {}
+
+        Kind kind = TUPLE;
+        const Node* parent = nullptr;
+
+        Arena strings_pool;
+        HashMapWithStackMemory<StringRef, std::shared_ptr<Node>, 
StringRefHash, 4> children;
+
+        NodeData data;
+        PathInData path;
+
+        bool is_nested() const { return kind == NESTED; }
+        bool is_scalar() const { return kind == SCALAR; }
+
+        void add_child(std::string_view key, std::shared_ptr<Node> next_node) {
+            next_node->parent = this;
+            StringRef key_ref {strings_pool.insert(key.data(), key.length()), 
key.length()};
+            children[key_ref] = std::move(next_node);
+        }
+    };
+
+    using NodeKind = typename Node::Kind;
+    using NodePtr = std::shared_ptr<Node>;
+
+    /// Add a leaf without any data in other nodes.
+    bool add(const PathInData& path, const NodeData& leaf_data) {
+        return add(path, [&](NodeKind kind, bool exists) -> NodePtr {
+            if (exists) return nullptr;
+
+            if (kind == Node::SCALAR) return std::make_shared<Node>(kind, 
leaf_data, path);
+
+            return std::make_shared<Node>(kind);
+        });
+    }
+
+    /// Callback for creation of node. Receives kind of node and
+    /// flag, which is true if node already exists.
+    using NodeCreator = std::function<NodePtr(NodeKind, bool)>;
+
+    bool add(const PathInData& path, const NodeCreator& node_creator) {
+        const auto& parts = path.get_parts();
+
+        if (parts.empty()) return false;
+
+        if (!root) root = std::make_shared<Node>(Node::TUPLE);

Review Comment:
   warning: statement should be inside braces 
[readability-braces-around-statements]
   
   ```suggestion
           if (!root) { root = std::make_shared<Node>(Node::TUPLE);
   }
   ```
   



##########
be/src/vec/columns/subcolumn_tree.h:
##########
@@ -0,0 +1,195 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// 
https://github.com/ClickHouse/ClickHouse/blob/master/src/DataTypes/Serializations/SubcolumnsTree.h
+// and modified by Doris
+
+#pragma once
+#include <vec/columns/column.h>
+#include <vec/common/hash_table/hash_map.h>
+#include <vec/data_types/data_type.h>
+#include <vec/json/path_in_data.h>
+namespace doris::vectorized {
+/// Tree that represents paths in document
+/// with additional data in nodes.
+
+template <typename NodeData>
+class SubcolumnsTree {
+public:
+    struct Node {
+        enum Kind {
+            TUPLE,
+            NESTED,
+            SCALAR,
+        };
+
+        explicit Node(Kind kind_) : kind(kind_) {}
+        Node(Kind kind_, const NodeData& data_) : kind(kind_), data(data_) {}
+        Node(Kind kind_, const NodeData& data_, const PathInData& path_)
+                : kind(kind_), data(data_), path(path_) {}
+
+        Kind kind = TUPLE;
+        const Node* parent = nullptr;
+
+        Arena strings_pool;
+        HashMapWithStackMemory<StringRef, std::shared_ptr<Node>, 
StringRefHash, 4> children;
+
+        NodeData data;
+        PathInData path;
+
+        bool is_nested() const { return kind == NESTED; }
+        bool is_scalar() const { return kind == SCALAR; }
+
+        void add_child(std::string_view key, std::shared_ptr<Node> next_node) {
+            next_node->parent = this;
+            StringRef key_ref {strings_pool.insert(key.data(), key.length()), 
key.length()};
+            children[key_ref] = std::move(next_node);
+        }
+    };
+
+    using NodeKind = typename Node::Kind;
+    using NodePtr = std::shared_ptr<Node>;
+
+    /// Add a leaf without any data in other nodes.
+    bool add(const PathInData& path, const NodeData& leaf_data) {
+        return add(path, [&](NodeKind kind, bool exists) -> NodePtr {
+            if (exists) return nullptr;
+
+            if (kind == Node::SCALAR) return std::make_shared<Node>(kind, 
leaf_data, path);
+
+            return std::make_shared<Node>(kind);
+        });
+    }
+
+    /// Callback for creation of node. Receives kind of node and
+    /// flag, which is true if node already exists.
+    using NodeCreator = std::function<NodePtr(NodeKind, bool)>;
+
+    bool add(const PathInData& path, const NodeCreator& node_creator) {
+        const auto& parts = path.get_parts();
+
+        if (parts.empty()) return false;
+
+        if (!root) root = std::make_shared<Node>(Node::TUPLE);
+
+        Node* current_node = root.get();
+        for (size_t i = 0; i < parts.size() - 1; ++i) {
+            assert(current_node->kind != Node::SCALAR);
+
+            auto it = current_node->children.find(
+                    StringRef {parts[i].key.data(), parts[i].key.size()});
+            if (it != current_node->children.end()) {
+                current_node = it->get_second().get();
+                node_creator(current_node->kind, true);
+
+                if (current_node->is_nested() != parts[i].is_nested) return 
false;
+            } else {
+                auto next_kind = parts[i].is_nested ? Node::NESTED : 
Node::TUPLE;
+                auto next_node = node_creator(next_kind, false);
+                current_node->add_child(String(parts[i].key), next_node);
+                current_node = next_node.get();
+            }
+        }
+
+        auto it = current_node->children.find(
+                StringRef {parts.back().key.data(), parts.back().key.size()});
+        if (it != current_node->children.end()) return false;
+
+        auto next_node = node_creator(Node::SCALAR, false);
+        current_node->add_child(String(parts.back().key), next_node);
+        leaves.push_back(std::move(next_node));
+
+        return true;
+    }
+
+    /// Find node that matches the path the best.
+    const Node* find_best_match(const PathInData& path) const { return 
find_impl(path, false); }
+
+    /// Find node that matches the path exactly.
+    const Node* find_exact(const PathInData& path) const { return 
find_impl(path, true); }
+
+    /// Find leaf by path.
+    const Node* find_leaf(const PathInData& path) const {
+        const auto* candidate = find_exact(path);
+        if (!candidate || !candidate->is_scalar()) return nullptr;
+        return candidate;
+    }
+
+    using NodePredicate = std::function<bool(const Node&)>;
+
+    /// Finds leaf that satisfies the predicate.
+    const Node* find_leaf(const NodePredicate& predicate) {
+        return find_leaf(root.get(), predicate);
+    }
+
+    static const Node* find_leaf(const Node* node, const NodePredicate& 
predicate) {
+        if (!node) return nullptr;
+
+        if (node->is_scalar()) return predicate(*node) ? node : nullptr;
+
+        for (auto it = node->children.begin(); it != node->children.end(); 
++it) {
+            auto child = it->get_second();
+            if (const auto* leaf = find_leaf(child.get(), predicate)) return 
leaf;
+        }
+        return nullptr;
+    }
+
+    /// Find first parent node that satisfies the predicate.
+    static const Node* find_parent(const Node* node, const NodePredicate& 
predicate) {
+        while (node && !predicate(*node)) node = node->parent;

Review Comment:
   warning: statement should be inside braces 
[readability-braces-around-statements]
   
   ```suggestion
           while (node && !predicate(*node)) { node = node->parent;
   }
   ```
   



##########
be/src/vec/columns/subcolumn_tree.h:
##########
@@ -0,0 +1,195 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// 
https://github.com/ClickHouse/ClickHouse/blob/master/src/DataTypes/Serializations/SubcolumnsTree.h
+// and modified by Doris
+
+#pragma once
+#include <vec/columns/column.h>
+#include <vec/common/hash_table/hash_map.h>
+#include <vec/data_types/data_type.h>
+#include <vec/json/path_in_data.h>
+namespace doris::vectorized {
+/// Tree that represents paths in document
+/// with additional data in nodes.
+
+template <typename NodeData>
+class SubcolumnsTree {
+public:
+    struct Node {
+        enum Kind {
+            TUPLE,
+            NESTED,
+            SCALAR,
+        };
+
+        explicit Node(Kind kind_) : kind(kind_) {}
+        Node(Kind kind_, const NodeData& data_) : kind(kind_), data(data_) {}
+        Node(Kind kind_, const NodeData& data_, const PathInData& path_)
+                : kind(kind_), data(data_), path(path_) {}
+
+        Kind kind = TUPLE;
+        const Node* parent = nullptr;
+
+        Arena strings_pool;
+        HashMapWithStackMemory<StringRef, std::shared_ptr<Node>, 
StringRefHash, 4> children;
+
+        NodeData data;
+        PathInData path;
+
+        bool is_nested() const { return kind == NESTED; }
+        bool is_scalar() const { return kind == SCALAR; }
+
+        void add_child(std::string_view key, std::shared_ptr<Node> next_node) {
+            next_node->parent = this;
+            StringRef key_ref {strings_pool.insert(key.data(), key.length()), 
key.length()};
+            children[key_ref] = std::move(next_node);
+        }
+    };
+
+    using NodeKind = typename Node::Kind;
+    using NodePtr = std::shared_ptr<Node>;
+
+    /// Add a leaf without any data in other nodes.
+    bool add(const PathInData& path, const NodeData& leaf_data) {
+        return add(path, [&](NodeKind kind, bool exists) -> NodePtr {
+            if (exists) return nullptr;
+
+            if (kind == Node::SCALAR) return std::make_shared<Node>(kind, 
leaf_data, path);
+
+            return std::make_shared<Node>(kind);
+        });
+    }
+
+    /// Callback for creation of node. Receives kind of node and
+    /// flag, which is true if node already exists.
+    using NodeCreator = std::function<NodePtr(NodeKind, bool)>;
+
+    bool add(const PathInData& path, const NodeCreator& node_creator) {
+        const auto& parts = path.get_parts();
+
+        if (parts.empty()) return false;
+
+        if (!root) root = std::make_shared<Node>(Node::TUPLE);
+
+        Node* current_node = root.get();
+        for (size_t i = 0; i < parts.size() - 1; ++i) {
+            assert(current_node->kind != Node::SCALAR);
+
+            auto it = current_node->children.find(
+                    StringRef {parts[i].key.data(), parts[i].key.size()});
+            if (it != current_node->children.end()) {
+                current_node = it->get_second().get();
+                node_creator(current_node->kind, true);
+
+                if (current_node->is_nested() != parts[i].is_nested) return 
false;
+            } else {
+                auto next_kind = parts[i].is_nested ? Node::NESTED : 
Node::TUPLE;
+                auto next_node = node_creator(next_kind, false);
+                current_node->add_child(String(parts[i].key), next_node);
+                current_node = next_node.get();
+            }
+        }
+
+        auto it = current_node->children.find(
+                StringRef {parts.back().key.data(), parts.back().key.size()});
+        if (it != current_node->children.end()) return false;
+
+        auto next_node = node_creator(Node::SCALAR, false);
+        current_node->add_child(String(parts.back().key), next_node);
+        leaves.push_back(std::move(next_node));
+
+        return true;
+    }
+
+    /// Find node that matches the path the best.
+    const Node* find_best_match(const PathInData& path) const { return 
find_impl(path, false); }
+
+    /// Find node that matches the path exactly.
+    const Node* find_exact(const PathInData& path) const { return 
find_impl(path, true); }
+
+    /// Find leaf by path.
+    const Node* find_leaf(const PathInData& path) const {
+        const auto* candidate = find_exact(path);
+        if (!candidate || !candidate->is_scalar()) return nullptr;
+        return candidate;
+    }
+
+    using NodePredicate = std::function<bool(const Node&)>;
+
+    /// Finds leaf that satisfies the predicate.
+    const Node* find_leaf(const NodePredicate& predicate) {
+        return find_leaf(root.get(), predicate);
+    }
+
+    static const Node* find_leaf(const Node* node, const NodePredicate& 
predicate) {
+        if (!node) return nullptr;
+
+        if (node->is_scalar()) return predicate(*node) ? node : nullptr;
+
+        for (auto it = node->children.begin(); it != node->children.end(); 
++it) {
+            auto child = it->get_second();
+            if (const auto* leaf = find_leaf(child.get(), predicate)) return 
leaf;

Review Comment:
   warning: statement should be inside braces 
[readability-braces-around-statements]
   
   ```suggestion
               if (const auto* leaf = find_leaf(child.get(), predicate)) { 
return leaf;
   }
   ```
   



##########
be/src/vec/columns/subcolumn_tree.h:
##########
@@ -0,0 +1,195 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// 
https://github.com/ClickHouse/ClickHouse/blob/master/src/DataTypes/Serializations/SubcolumnsTree.h
+// and modified by Doris
+
+#pragma once
+#include <vec/columns/column.h>
+#include <vec/common/hash_table/hash_map.h>
+#include <vec/data_types/data_type.h>
+#include <vec/json/path_in_data.h>
+namespace doris::vectorized {
+/// Tree that represents paths in document
+/// with additional data in nodes.
+
+template <typename NodeData>
+class SubcolumnsTree {
+public:
+    struct Node {
+        enum Kind {
+            TUPLE,
+            NESTED,
+            SCALAR,
+        };
+
+        explicit Node(Kind kind_) : kind(kind_) {}
+        Node(Kind kind_, const NodeData& data_) : kind(kind_), data(data_) {}
+        Node(Kind kind_, const NodeData& data_, const PathInData& path_)
+                : kind(kind_), data(data_), path(path_) {}
+
+        Kind kind = TUPLE;
+        const Node* parent = nullptr;
+
+        Arena strings_pool;
+        HashMapWithStackMemory<StringRef, std::shared_ptr<Node>, 
StringRefHash, 4> children;
+
+        NodeData data;
+        PathInData path;
+
+        bool is_nested() const { return kind == NESTED; }
+        bool is_scalar() const { return kind == SCALAR; }
+
+        void add_child(std::string_view key, std::shared_ptr<Node> next_node) {
+            next_node->parent = this;
+            StringRef key_ref {strings_pool.insert(key.data(), key.length()), 
key.length()};
+            children[key_ref] = std::move(next_node);
+        }
+    };
+
+    using NodeKind = typename Node::Kind;
+    using NodePtr = std::shared_ptr<Node>;
+
+    /// Add a leaf without any data in other nodes.
+    bool add(const PathInData& path, const NodeData& leaf_data) {
+        return add(path, [&](NodeKind kind, bool exists) -> NodePtr {
+            if (exists) return nullptr;
+
+            if (kind == Node::SCALAR) return std::make_shared<Node>(kind, 
leaf_data, path);
+
+            return std::make_shared<Node>(kind);
+        });
+    }
+
+    /// Callback for creation of node. Receives kind of node and
+    /// flag, which is true if node already exists.
+    using NodeCreator = std::function<NodePtr(NodeKind, bool)>;
+
+    bool add(const PathInData& path, const NodeCreator& node_creator) {
+        const auto& parts = path.get_parts();
+
+        if (parts.empty()) return false;
+
+        if (!root) root = std::make_shared<Node>(Node::TUPLE);
+
+        Node* current_node = root.get();
+        for (size_t i = 0; i < parts.size() - 1; ++i) {
+            assert(current_node->kind != Node::SCALAR);
+
+            auto it = current_node->children.find(
+                    StringRef {parts[i].key.data(), parts[i].key.size()});
+            if (it != current_node->children.end()) {
+                current_node = it->get_second().get();
+                node_creator(current_node->kind, true);
+
+                if (current_node->is_nested() != parts[i].is_nested) return 
false;
+            } else {
+                auto next_kind = parts[i].is_nested ? Node::NESTED : 
Node::TUPLE;
+                auto next_node = node_creator(next_kind, false);
+                current_node->add_child(String(parts[i].key), next_node);
+                current_node = next_node.get();
+            }
+        }
+
+        auto it = current_node->children.find(
+                StringRef {parts.back().key.data(), parts.back().key.size()});
+        if (it != current_node->children.end()) return false;
+
+        auto next_node = node_creator(Node::SCALAR, false);
+        current_node->add_child(String(parts.back().key), next_node);
+        leaves.push_back(std::move(next_node));
+
+        return true;
+    }
+
+    /// Find node that matches the path the best.
+    const Node* find_best_match(const PathInData& path) const { return 
find_impl(path, false); }
+
+    /// Find node that matches the path exactly.
+    const Node* find_exact(const PathInData& path) const { return 
find_impl(path, true); }
+
+    /// Find leaf by path.
+    const Node* find_leaf(const PathInData& path) const {
+        const auto* candidate = find_exact(path);
+        if (!candidate || !candidate->is_scalar()) return nullptr;
+        return candidate;
+    }
+
+    using NodePredicate = std::function<bool(const Node&)>;
+
+    /// Finds leaf that satisfies the predicate.
+    const Node* find_leaf(const NodePredicate& predicate) {
+        return find_leaf(root.get(), predicate);
+    }
+
+    static const Node* find_leaf(const Node* node, const NodePredicate& 
predicate) {
+        if (!node) return nullptr;
+
+        if (node->is_scalar()) return predicate(*node) ? node : nullptr;
+
+        for (auto it = node->children.begin(); it != node->children.end(); 
++it) {
+            auto child = it->get_second();
+            if (const auto* leaf = find_leaf(child.get(), predicate)) return 
leaf;
+        }
+        return nullptr;
+    }
+
+    /// Find first parent node that satisfies the predicate.
+    static const Node* find_parent(const Node* node, const NodePredicate& 
predicate) {
+        while (node && !predicate(*node)) node = node->parent;
+        return node;
+    }
+
+    bool empty() const { return root == nullptr; }
+    size_t size() const { return leaves.size(); }
+
+    using Nodes = std::vector<NodePtr>;
+
+    const Nodes& get_leaves() const { return leaves; }
+    const Node* get_root() const { return root.get(); }
+
+    using iterator = typename Nodes::iterator;
+    using const_iterator = typename Nodes::const_iterator;
+
+    iterator begin() { return leaves.begin(); }
+    iterator end() { return leaves.end(); }
+
+    const_iterator begin() const { return leaves.begin(); }
+    const_iterator end() const { return leaves.end(); }
+
+private:
+    const Node* find_impl(const PathInData& path, bool find_exact) const {
+        if (!root) return nullptr;
+
+        const auto& parts = path.get_parts();
+        const Node* current_node = root.get();
+
+        for (const auto& part : parts) {
+            auto it = current_node->children.find(StringRef {part.key.data(), 
part.key.size()});
+            if (it == current_node->children.end()) return find_exact ? 
nullptr : current_node;

Review Comment:
   warning: statement should be inside braces 
[readability-braces-around-statements]
   
   ```suggestion
               if (it == current_node->children.end()) { return find_exact ? 
nullptr : current_node;
   }
   ```
   



##########
be/src/vec/columns/subcolumn_tree.h:
##########
@@ -0,0 +1,195 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// 
https://github.com/ClickHouse/ClickHouse/blob/master/src/DataTypes/Serializations/SubcolumnsTree.h
+// and modified by Doris
+
+#pragma once
+#include <vec/columns/column.h>
+#include <vec/common/hash_table/hash_map.h>
+#include <vec/data_types/data_type.h>
+#include <vec/json/path_in_data.h>
+namespace doris::vectorized {
+/// Tree that represents paths in document
+/// with additional data in nodes.
+
+template <typename NodeData>
+class SubcolumnsTree {
+public:
+    struct Node {
+        enum Kind {
+            TUPLE,
+            NESTED,
+            SCALAR,
+        };
+
+        explicit Node(Kind kind_) : kind(kind_) {}
+        Node(Kind kind_, const NodeData& data_) : kind(kind_), data(data_) {}
+        Node(Kind kind_, const NodeData& data_, const PathInData& path_)
+                : kind(kind_), data(data_), path(path_) {}
+
+        Kind kind = TUPLE;
+        const Node* parent = nullptr;
+
+        Arena strings_pool;
+        HashMapWithStackMemory<StringRef, std::shared_ptr<Node>, 
StringRefHash, 4> children;
+
+        NodeData data;
+        PathInData path;
+
+        bool is_nested() const { return kind == NESTED; }
+        bool is_scalar() const { return kind == SCALAR; }
+
+        void add_child(std::string_view key, std::shared_ptr<Node> next_node) {
+            next_node->parent = this;
+            StringRef key_ref {strings_pool.insert(key.data(), key.length()), 
key.length()};
+            children[key_ref] = std::move(next_node);
+        }
+    };
+
+    using NodeKind = typename Node::Kind;
+    using NodePtr = std::shared_ptr<Node>;
+
+    /// Add a leaf without any data in other nodes.
+    bool add(const PathInData& path, const NodeData& leaf_data) {
+        return add(path, [&](NodeKind kind, bool exists) -> NodePtr {
+            if (exists) return nullptr;
+
+            if (kind == Node::SCALAR) return std::make_shared<Node>(kind, 
leaf_data, path);
+
+            return std::make_shared<Node>(kind);
+        });
+    }
+
+    /// Callback for creation of node. Receives kind of node and
+    /// flag, which is true if node already exists.
+    using NodeCreator = std::function<NodePtr(NodeKind, bool)>;
+
+    bool add(const PathInData& path, const NodeCreator& node_creator) {
+        const auto& parts = path.get_parts();
+
+        if (parts.empty()) return false;
+
+        if (!root) root = std::make_shared<Node>(Node::TUPLE);
+
+        Node* current_node = root.get();
+        for (size_t i = 0; i < parts.size() - 1; ++i) {
+            assert(current_node->kind != Node::SCALAR);
+
+            auto it = current_node->children.find(
+                    StringRef {parts[i].key.data(), parts[i].key.size()});
+            if (it != current_node->children.end()) {
+                current_node = it->get_second().get();
+                node_creator(current_node->kind, true);
+
+                if (current_node->is_nested() != parts[i].is_nested) return 
false;
+            } else {
+                auto next_kind = parts[i].is_nested ? Node::NESTED : 
Node::TUPLE;
+                auto next_node = node_creator(next_kind, false);
+                current_node->add_child(String(parts[i].key), next_node);
+                current_node = next_node.get();
+            }
+        }
+
+        auto it = current_node->children.find(
+                StringRef {parts.back().key.data(), parts.back().key.size()});
+        if (it != current_node->children.end()) return false;
+
+        auto next_node = node_creator(Node::SCALAR, false);
+        current_node->add_child(String(parts.back().key), next_node);
+        leaves.push_back(std::move(next_node));
+
+        return true;
+    }
+
+    /// Find node that matches the path the best.
+    const Node* find_best_match(const PathInData& path) const { return 
find_impl(path, false); }
+
+    /// Find node that matches the path exactly.
+    const Node* find_exact(const PathInData& path) const { return 
find_impl(path, true); }
+
+    /// Find leaf by path.
+    const Node* find_leaf(const PathInData& path) const {
+        const auto* candidate = find_exact(path);
+        if (!candidate || !candidate->is_scalar()) return nullptr;
+        return candidate;
+    }
+
+    using NodePredicate = std::function<bool(const Node&)>;
+
+    /// Finds leaf that satisfies the predicate.
+    const Node* find_leaf(const NodePredicate& predicate) {
+        return find_leaf(root.get(), predicate);
+    }
+
+    static const Node* find_leaf(const Node* node, const NodePredicate& 
predicate) {
+        if (!node) return nullptr;
+
+        if (node->is_scalar()) return predicate(*node) ? node : nullptr;
+
+        for (auto it = node->children.begin(); it != node->children.end(); 
++it) {
+            auto child = it->get_second();
+            if (const auto* leaf = find_leaf(child.get(), predicate)) return 
leaf;
+        }
+        return nullptr;
+    }
+
+    /// Find first parent node that satisfies the predicate.
+    static const Node* find_parent(const Node* node, const NodePredicate& 
predicate) {
+        while (node && !predicate(*node)) node = node->parent;
+        return node;
+    }
+
+    bool empty() const { return root == nullptr; }
+    size_t size() const { return leaves.size(); }
+
+    using Nodes = std::vector<NodePtr>;
+
+    const Nodes& get_leaves() const { return leaves; }
+    const Node* get_root() const { return root.get(); }
+
+    using iterator = typename Nodes::iterator;
+    using const_iterator = typename Nodes::const_iterator;
+
+    iterator begin() { return leaves.begin(); }
+    iterator end() { return leaves.end(); }
+
+    const_iterator begin() const { return leaves.begin(); }
+    const_iterator end() const { return leaves.end(); }
+
+private:
+    const Node* find_impl(const PathInData& path, bool find_exact) const {
+        if (!root) return nullptr;

Review Comment:
   warning: statement should be inside braces 
[readability-braces-around-statements]
   
   ```suggestion
           if (!root) { return nullptr;
   }
   ```
   



##########
be/src/vec/common/schema_util.cpp:
##########
@@ -0,0 +1,480 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <vec/columns/column_array.h>
+#include <vec/columns/column_object.h>
+#include <vec/common/schema_util.h>
+#include <vec/core/field.h>
+#include <vec/data_types/data_type_array.h>
+#include <vec/data_types/data_type_object.h>
+#include <vec/functions/simple_function_factory.h>
+#include <vec/json/parse2column.h>
+
+#include <vec/data_types/data_type_factory.hpp>
+
+#include "gen_cpp/FrontendService.h"
+#include "gen_cpp/HeartbeatService_types.h"
+#include "olap/rowset/rowset_writer_context.h"
+#include "runtime/client_cache.h"
+#include "runtime/exec_env.h"
+#include "util/thrift_rpc_helper.h"
+
+namespace doris::vectorized::schema_util {
+
+size_t get_number_of_dimensions(const IDataType& type) {
+    if (const auto* type_array = typeid_cast<const DataTypeArray*>(&type))
+        return type_array->get_number_of_dimensions();
+    return 0;
+}
+size_t get_number_of_dimensions(const IColumn& column) {
+    if (const auto* column_array = check_and_get_column<ColumnArray>(column))
+        return column_array->get_number_of_dimensions();

Review Comment:
   warning: statement should be inside braces 
[readability-braces-around-statements]
   
   ```suggestion
       if (const auto* column_array = 
check_and_get_column<ColumnArray>(column)) {
           return column_array->get_number_of_dimensions();
   }
   ```
   



##########
be/src/vec/common/schema_util.cpp:
##########
@@ -0,0 +1,480 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <vec/columns/column_array.h>
+#include <vec/columns/column_object.h>
+#include <vec/common/schema_util.h>
+#include <vec/core/field.h>
+#include <vec/data_types/data_type_array.h>
+#include <vec/data_types/data_type_object.h>
+#include <vec/functions/simple_function_factory.h>
+#include <vec/json/parse2column.h>
+
+#include <vec/data_types/data_type_factory.hpp>
+
+#include "gen_cpp/FrontendService.h"
+#include "gen_cpp/HeartbeatService_types.h"
+#include "olap/rowset/rowset_writer_context.h"
+#include "runtime/client_cache.h"
+#include "runtime/exec_env.h"
+#include "util/thrift_rpc_helper.h"
+
+namespace doris::vectorized::schema_util {
+
+size_t get_number_of_dimensions(const IDataType& type) {
+    if (const auto* type_array = typeid_cast<const DataTypeArray*>(&type))
+        return type_array->get_number_of_dimensions();

Review Comment:
   warning: statement should be inside braces 
[readability-braces-around-statements]
   
   ```suggestion
       if (const auto* type_array = typeid_cast<const DataTypeArray*>(&type)) {
           return type_array->get_number_of_dimensions();
   }
   ```
   



##########
be/src/vec/data_types/get_least_supertype.cpp:
##########
@@ -50,12 +50,152 @@
 }
 } // namespace
 
-DataTypePtr get_least_supertype(const DataTypes& types) {
+Status get_numeric_type(const TypeIndexSet& types, DataTypePtr* type) {
+    bool all_numbers = true;
+
+    size_t max_bits_of_signed_integer = 0;
+    size_t max_bits_of_unsigned_integer = 0;
+    size_t max_mantissa_bits_of_floating = 0;
+
+    auto maximize = [](size_t& what, size_t value) {
+        if (value > what) what = value;
+    };
+
+    for (const auto& type : types) {
+        if (type == TypeIndex::UInt8)

Review Comment:
   warning: statement should be inside braces 
[readability-braces-around-statements]
   
   ```suggestion
           if (type == TypeIndex::UInt8) {
   ```
   
   be/src/vec/data_types/get_least_supertype.cpp:66:
   ```diff
   -         else if (type == TypeIndex::UInt16)
   +         } else if (type == TypeIndex::UInt16)
   ```
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org


Reply via email to