github-actions[bot] commented on code in PR #26572:
URL: https://github.com/apache/doris/pull/26572#discussion_r1386069235


##########
be/src/olap/rowset/beta_rowset_writer.cpp:
##########
@@ -544,6 +551,24 @@ bool BetaRowsetWriter::_is_segment_overlapping(
     return false;
 }
 
+// update tablet schema when meet variant columns, before commit_txn
+// Eg. rowset schema:       A(int),    B(float),  C(int), D(int)
+// _tabelt->tablet_schema:  A(bigint), B(double)
+//  => update_schema:       A(bigint), B(double), C(int), D(int)
+void BetaRowsetWriter::update_rowset_schema(TabletSchemaSPtr flush_schema) {

Review Comment:
   warning: method 'update_rowset_schema' can be made static 
[readability-convert-member-functions-to-static]
   
   be/src/olap/rowset/beta_rowset_writer.h:173:
   ```diff
   -     void update_rowset_schema(TabletSchemaSPtr flush_schema);
   +     static void update_rowset_schema(TabletSchemaSPtr flush_schema);
   ```
   



##########
be/src/exprs/json_functions.cpp:
##########
@@ -316,4 +318,33 @@ Status 
JsonFunctions::extract_from_object(simdjson::ondemand::object& obj,
     return Status::OK();
 }
 
+std::string JsonFunctions::print_json_value(const rapidjson::Value& value) {
+    rapidjson::StringBuffer buffer;
+    buffer.Clear();
+    rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
+    value.Accept(writer);
+    return std::string(buffer.GetString());
+}
+
+void JsonFunctions::merge_objects(rapidjson::Value& dst_object, 
rapidjson::Value& src_object,

Review Comment:
   warning: method 'merge_objects' can be made static 
[readability-convert-member-functions-to-static]
   
   ```suggestion
   static void JsonFunctions::merge_objects(rapidjson::Value& dst_object, 
rapidjson::Value& src_object,
   ```
   



##########
be/src/vec/columns/column_dummy.h:
##########
@@ -0,0 +1,175 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// 
https://github.com/ClickHouse/ClickHouse/blob/master/src/AggregateFunctions/IColumnDummy.h
+// and modified by Doris
+
+#pragma once
+
+#include "vec/columns/column.h"
+#include "vec/columns/columns_common.h"
+#include "vec/common/arena.h"
+#include "vec/common/pod_array.h"
+
+namespace doris::vectorized {
+
+/** Base class for columns-constants that contain a value that is not in the 
`Field`.
+  * Not a full-fledged column and is used in a special way.
+  */
+class IColumnDummy : public IColumn {
+public:
+    IColumnDummy() : s(0) {}
+    IColumnDummy(size_t s_) : s(s_) {}
+
+public:
+    virtual MutableColumnPtr clone_dummy(size_t s_) const = 0;
+
+    MutableColumnPtr clone_resized(size_t s) const override { return 
clone_dummy(s); }
+    size_t size() const override { return s; }
+    void insert_default() override { ++s; }
+    void pop_back(size_t n) override { s -= n; }
+    size_t byte_size() const override { return 0; }
+    size_t allocated_bytes() const override { return 0; }
+    int compare_at(size_t, size_t, const IColumn&, int) const override { 
return 0; }

Review Comment:
   warning: all parameters should be named in a function 
[readability-named-parameter]
   
   ```suggestion
       int compare_at(size_t /*n*/, size_t /*m*/, const IColumn& /*rhs*/, int 
/*nan_direction_hint*/) const override { return 0; }
   ```
   



##########
be/src/olap/base_tablet.cpp:
##########
@@ -65,4 +66,14 @@ void BaseTablet::update_max_version_schema(const 
TabletSchemaSPtr& tablet_schema
     }
 }
 
+void BaseTablet::update_by_least_common_schema(const TabletSchemaSPtr& 
update_schema) {

Review Comment:
   warning: method 'update_by_least_common_schema' can be made static 
[readability-convert-member-functions-to-static]
   
   be/src/olap/base_tablet.h:67:
   ```diff
   -     void update_by_least_common_schema(const TabletSchemaSPtr& 
update_schema);
   +     static void update_by_least_common_schema(const TabletSchemaSPtr& 
update_schema);
   ```
   



##########
be/src/vec/columns/column_dummy.h:
##########
@@ -0,0 +1,175 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// 
https://github.com/ClickHouse/ClickHouse/blob/master/src/AggregateFunctions/IColumnDummy.h
+// and modified by Doris
+
+#pragma once
+
+#include "vec/columns/column.h"
+#include "vec/columns/columns_common.h"
+#include "vec/common/arena.h"
+#include "vec/common/pod_array.h"
+
+namespace doris::vectorized {
+
+/** Base class for columns-constants that contain a value that is not in the 
`Field`.
+  * Not a full-fledged column and is used in a special way.
+  */
+class IColumnDummy : public IColumn {
+public:
+    IColumnDummy() : s(0) {}
+    IColumnDummy(size_t s_) : s(s_) {}
+
+public:

Review Comment:
   warning: redundant access specifier has the same accessibility as the 
previous access specifier [readability-redundant-access-specifiers]
   
   ```suggestion
   
   ```
   <details>
   <summary>Additional context</summary>
   
   **be/src/vec/columns/column_dummy.h:33:** previously declared here
   ```cpp
   public:
   ^
   ```
   
   </details>
   



##########
be/src/vec/columns/column_array.cpp:
##########
@@ -481,6 +481,10 @@ void ColumnArray::insert_range_from(const IColumn& src, 
size_t start, size_t len
     }
 }
 
+double ColumnArray::get_ratio_of_default_rows(double sample_ratio) const {

Review Comment:
   warning: method 'get_ratio_of_default_rows' can be made static 
[readability-convert-member-functions-to-static]
   
   ```suggestion
   double ColumnArray::get_ratio_of_default_rows(double sample_ratio) {
   ```
   
   be/src/vec/columns/column_array.h:267:
   ```diff
   -     double get_ratio_of_default_rows(double sample_ratio) const override;
   +     static double get_ratio_of_default_rows(double sample_ratio) override;
   ```
   



##########
be/src/vec/columns/column_dummy.h:
##########
@@ -0,0 +1,175 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// 
https://github.com/ClickHouse/ClickHouse/blob/master/src/AggregateFunctions/IColumnDummy.h
+// and modified by Doris
+
+#pragma once
+
+#include "vec/columns/column.h"
+#include "vec/columns/columns_common.h"
+#include "vec/common/arena.h"
+#include "vec/common/pod_array.h"
+
+namespace doris::vectorized {
+
+/** Base class for columns-constants that contain a value that is not in the 
`Field`.
+  * Not a full-fledged column and is used in a special way.
+  */
+class IColumnDummy : public IColumn {
+public:
+    IColumnDummy() : s(0) {}
+    IColumnDummy(size_t s_) : s(s_) {}
+
+public:
+    virtual MutableColumnPtr clone_dummy(size_t s_) const = 0;
+
+    MutableColumnPtr clone_resized(size_t s) const override { return 
clone_dummy(s); }
+    size_t size() const override { return s; }
+    void insert_default() override { ++s; }
+    void pop_back(size_t n) override { s -= n; }
+    size_t byte_size() const override { return 0; }
+    size_t allocated_bytes() const override { return 0; }
+    int compare_at(size_t, size_t, const IColumn&, int) const override { 
return 0; }
+
+    [[noreturn]] Field operator[](size_t) const override {
+        LOG(FATAL) << "Cannot get value from " << get_name();
+    }
+
+    void get(size_t, Field&) const override {

Review Comment:
   warning: all parameters should be named in a function 
[readability-named-parameter]
   
   ```suggestion
       void get(size_t /*n*/, Field& /*res*/) const override {
   ```
   



##########
be/src/vec/columns/column_dummy.h:
##########
@@ -0,0 +1,175 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// 
https://github.com/ClickHouse/ClickHouse/blob/master/src/AggregateFunctions/IColumnDummy.h
+// and modified by Doris
+
+#pragma once
+
+#include "vec/columns/column.h"
+#include "vec/columns/columns_common.h"
+#include "vec/common/arena.h"
+#include "vec/common/pod_array.h"
+
+namespace doris::vectorized {
+
+/** Base class for columns-constants that contain a value that is not in the 
`Field`.
+  * Not a full-fledged column and is used in a special way.
+  */
+class IColumnDummy : public IColumn {
+public:
+    IColumnDummy() : s(0) {}
+    IColumnDummy(size_t s_) : s(s_) {}
+
+public:
+    virtual MutableColumnPtr clone_dummy(size_t s_) const = 0;
+
+    MutableColumnPtr clone_resized(size_t s) const override { return 
clone_dummy(s); }
+    size_t size() const override { return s; }
+    void insert_default() override { ++s; }
+    void pop_back(size_t n) override { s -= n; }
+    size_t byte_size() const override { return 0; }
+    size_t allocated_bytes() const override { return 0; }
+    int compare_at(size_t, size_t, const IColumn&, int) const override { 
return 0; }
+
+    [[noreturn]] Field operator[](size_t) const override {

Review Comment:
   warning: all parameters should be named in a function 
[readability-named-parameter]
   
   ```suggestion
       [[noreturn]] Field operator[](size_t /*n*/) const override {
   ```
   



##########
be/src/vec/columns/column_dummy.h:
##########
@@ -0,0 +1,175 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// 
https://github.com/ClickHouse/ClickHouse/blob/master/src/AggregateFunctions/IColumnDummy.h
+// and modified by Doris
+
+#pragma once
+
+#include "vec/columns/column.h"
+#include "vec/columns/columns_common.h"
+#include "vec/common/arena.h"
+#include "vec/common/pod_array.h"
+
+namespace doris::vectorized {
+
+/** Base class for columns-constants that contain a value that is not in the 
`Field`.
+  * Not a full-fledged column and is used in a special way.
+  */
+class IColumnDummy : public IColumn {
+public:
+    IColumnDummy() : s(0) {}
+    IColumnDummy(size_t s_) : s(s_) {}
+
+public:
+    virtual MutableColumnPtr clone_dummy(size_t s_) const = 0;
+
+    MutableColumnPtr clone_resized(size_t s) const override { return 
clone_dummy(s); }
+    size_t size() const override { return s; }
+    void insert_default() override { ++s; }
+    void pop_back(size_t n) override { s -= n; }
+    size_t byte_size() const override { return 0; }
+    size_t allocated_bytes() const override { return 0; }
+    int compare_at(size_t, size_t, const IColumn&, int) const override { 
return 0; }
+
+    [[noreturn]] Field operator[](size_t) const override {
+        LOG(FATAL) << "Cannot get value from " << get_name();
+    }
+
+    void get(size_t, Field&) const override {
+        LOG(FATAL) << "Cannot get value from " << get_name();
+    }
+
+    void insert(const Field&) override {
+        LOG(FATAL) << "Cannot insert element into " << get_name();
+    }
+
+    StringRef get_data_at(size_t) const override { return {}; }

Review Comment:
   warning: all parameters should be named in a function 
[readability-named-parameter]
   
   ```suggestion
       StringRef get_data_at(size_t /*n*/) const override { return {}; }
   ```
   



##########
be/src/vec/columns/column_dummy.h:
##########
@@ -0,0 +1,175 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// 
https://github.com/ClickHouse/ClickHouse/blob/master/src/AggregateFunctions/IColumnDummy.h
+// and modified by Doris
+
+#pragma once
+
+#include "vec/columns/column.h"
+#include "vec/columns/columns_common.h"
+#include "vec/common/arena.h"
+#include "vec/common/pod_array.h"
+
+namespace doris::vectorized {
+
+/** Base class for columns-constants that contain a value that is not in the 
`Field`.
+  * Not a full-fledged column and is used in a special way.
+  */
+class IColumnDummy : public IColumn {
+public:
+    IColumnDummy() : s(0) {}
+    IColumnDummy(size_t s_) : s(s_) {}
+
+public:
+    virtual MutableColumnPtr clone_dummy(size_t s_) const = 0;
+
+    MutableColumnPtr clone_resized(size_t s) const override { return 
clone_dummy(s); }
+    size_t size() const override { return s; }
+    void insert_default() override { ++s; }
+    void pop_back(size_t n) override { s -= n; }
+    size_t byte_size() const override { return 0; }
+    size_t allocated_bytes() const override { return 0; }
+    int compare_at(size_t, size_t, const IColumn&, int) const override { 
return 0; }
+
+    [[noreturn]] Field operator[](size_t) const override {
+        LOG(FATAL) << "Cannot get value from " << get_name();
+    }
+
+    void get(size_t, Field&) const override {
+        LOG(FATAL) << "Cannot get value from " << get_name();
+    }
+
+    void insert(const Field&) override {

Review Comment:
   warning: all parameters should be named in a function 
[readability-named-parameter]
   
   ```suggestion
       void insert(const Field& /*x*/) override {
   ```
   



##########
be/src/vec/columns/column_dummy.h:
##########
@@ -0,0 +1,175 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// 
https://github.com/ClickHouse/ClickHouse/blob/master/src/AggregateFunctions/IColumnDummy.h
+// and modified by Doris
+
+#pragma once
+
+#include "vec/columns/column.h"
+#include "vec/columns/columns_common.h"
+#include "vec/common/arena.h"
+#include "vec/common/pod_array.h"
+
+namespace doris::vectorized {
+
+/** Base class for columns-constants that contain a value that is not in the 
`Field`.
+  * Not a full-fledged column and is used in a special way.
+  */
+class IColumnDummy : public IColumn {
+public:
+    IColumnDummy() : s(0) {}
+    IColumnDummy(size_t s_) : s(s_) {}
+
+public:
+    virtual MutableColumnPtr clone_dummy(size_t s_) const = 0;
+
+    MutableColumnPtr clone_resized(size_t s) const override { return 
clone_dummy(s); }
+    size_t size() const override { return s; }
+    void insert_default() override { ++s; }
+    void pop_back(size_t n) override { s -= n; }
+    size_t byte_size() const override { return 0; }
+    size_t allocated_bytes() const override { return 0; }
+    int compare_at(size_t, size_t, const IColumn&, int) const override { 
return 0; }
+
+    [[noreturn]] Field operator[](size_t) const override {
+        LOG(FATAL) << "Cannot get value from " << get_name();
+    }
+
+    void get(size_t, Field&) const override {
+        LOG(FATAL) << "Cannot get value from " << get_name();
+    }
+
+    void insert(const Field&) override {
+        LOG(FATAL) << "Cannot insert element into " << get_name();
+    }
+
+    StringRef get_data_at(size_t) const override { return {}; }
+
+    void insert_data(const char*, size_t) override { ++s; }
+
+    StringRef serialize_value_into_arena(size_t /*n*/, Arena& arena,
+                                         char const*& begin) const override {
+        return {arena.alloc_continue(0, begin), 0};
+    }
+
+    const char* deserialize_and_insert_from_arena(const char* pos) override {
+        ++s;
+        return pos;
+    }
+
+    void insert_from(const IColumn&, size_t) override { ++s; }

Review Comment:
   warning: all parameters should be named in a function 
[readability-named-parameter]
   
   ```suggestion
       void insert_from(const IColumn& /*src*/, size_t /*n*/) override { ++s; }
   ```
   



##########
be/src/vec/columns/column_dummy.h:
##########
@@ -0,0 +1,175 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// 
https://github.com/ClickHouse/ClickHouse/blob/master/src/AggregateFunctions/IColumnDummy.h
+// and modified by Doris
+
+#pragma once
+
+#include "vec/columns/column.h"
+#include "vec/columns/columns_common.h"
+#include "vec/common/arena.h"
+#include "vec/common/pod_array.h"
+
+namespace doris::vectorized {
+
+/** Base class for columns-constants that contain a value that is not in the 
`Field`.
+  * Not a full-fledged column and is used in a special way.
+  */
+class IColumnDummy : public IColumn {
+public:
+    IColumnDummy() : s(0) {}
+    IColumnDummy(size_t s_) : s(s_) {}
+
+public:
+    virtual MutableColumnPtr clone_dummy(size_t s_) const = 0;
+
+    MutableColumnPtr clone_resized(size_t s) const override { return 
clone_dummy(s); }
+    size_t size() const override { return s; }
+    void insert_default() override { ++s; }
+    void pop_back(size_t n) override { s -= n; }
+    size_t byte_size() const override { return 0; }
+    size_t allocated_bytes() const override { return 0; }
+    int compare_at(size_t, size_t, const IColumn&, int) const override { 
return 0; }
+
+    [[noreturn]] Field operator[](size_t) const override {
+        LOG(FATAL) << "Cannot get value from " << get_name();
+    }
+
+    void get(size_t, Field&) const override {
+        LOG(FATAL) << "Cannot get value from " << get_name();
+    }
+
+    void insert(const Field&) override {
+        LOG(FATAL) << "Cannot insert element into " << get_name();
+    }
+
+    StringRef get_data_at(size_t) const override { return {}; }
+
+    void insert_data(const char*, size_t) override { ++s; }

Review Comment:
   warning: all parameters should be named in a function 
[readability-named-parameter]
   
   ```suggestion
       void insert_data(const char* /*pos*/, size_t /*length*/) override { ++s; 
}
   ```
   



##########
be/src/vec/columns/column_dummy.h:
##########
@@ -0,0 +1,175 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// 
https://github.com/ClickHouse/ClickHouse/blob/master/src/AggregateFunctions/IColumnDummy.h
+// and modified by Doris
+
+#pragma once
+
+#include "vec/columns/column.h"
+#include "vec/columns/columns_common.h"
+#include "vec/common/arena.h"
+#include "vec/common/pod_array.h"
+
+namespace doris::vectorized {
+
+/** Base class for columns-constants that contain a value that is not in the 
`Field`.
+  * Not a full-fledged column and is used in a special way.
+  */
+class IColumnDummy : public IColumn {
+public:
+    IColumnDummy() : s(0) {}
+    IColumnDummy(size_t s_) : s(s_) {}
+
+public:
+    virtual MutableColumnPtr clone_dummy(size_t s_) const = 0;
+
+    MutableColumnPtr clone_resized(size_t s) const override { return 
clone_dummy(s); }
+    size_t size() const override { return s; }
+    void insert_default() override { ++s; }
+    void pop_back(size_t n) override { s -= n; }
+    size_t byte_size() const override { return 0; }
+    size_t allocated_bytes() const override { return 0; }
+    int compare_at(size_t, size_t, const IColumn&, int) const override { 
return 0; }
+
+    [[noreturn]] Field operator[](size_t) const override {
+        LOG(FATAL) << "Cannot get value from " << get_name();
+    }
+
+    void get(size_t, Field&) const override {
+        LOG(FATAL) << "Cannot get value from " << get_name();
+    }
+
+    void insert(const Field&) override {
+        LOG(FATAL) << "Cannot insert element into " << get_name();
+    }
+
+    StringRef get_data_at(size_t) const override { return {}; }
+
+    void insert_data(const char*, size_t) override { ++s; }
+
+    StringRef serialize_value_into_arena(size_t /*n*/, Arena& arena,
+                                         char const*& begin) const override {
+        return {arena.alloc_continue(0, begin), 0};
+    }
+
+    const char* deserialize_and_insert_from_arena(const char* pos) override {
+        ++s;
+        return pos;
+    }
+
+    void insert_from(const IColumn&, size_t) override { ++s; }
+
+    void insert_range_from(const IColumn& /*src*/, size_t /*start*/, size_t 
length) override {
+        s += length;
+    }
+
+    void insert_indices_from(const IColumn& src, const int* indices_begin,
+                             const int* indices_end) override {
+        s += (indices_end - indices_begin);
+    }
+
+    ColumnPtr filter(const Filter& filt, ssize_t /*result_size_hint*/) const 
override {
+        return clone_dummy(count_bytes_in_filter(filt));
+    }
+
+    size_t filter(const Filter& filter) override {
+        const auto result_size = count_bytes_in_filter(filter);
+        s = result_size;
+        return result_size;
+    }
+
+    ColumnPtr permute(const Permutation& perm, size_t limit) const override {
+        if (s != perm.size()) {
+            LOG(FATAL) << "Size of permutation doesn't match size of column.";
+        }
+
+        return clone_dummy(limit ? std::min(s, limit) : s);
+    }
+
+    void get_permutation(bool /*reverse*/, size_t /*limit*/, int 
/*nan_direction_hint*/,
+                         Permutation& res) const override {
+        res.resize(s);
+        for (size_t i = 0; i < s; ++i) res[i] = i;
+    }
+
+    ColumnPtr replicate(const Offsets& offsets) const override {
+        column_match_offsets_size(s, offsets.size());
+
+        return clone_dummy(offsets.back());
+    }
+
+    void replicate(const uint32_t* indexs, size_t target_size, IColumn& 
column) const override {
+        LOG(FATAL) << "Not implemented";
+    }
+
+    MutableColumns scatter(ColumnIndex num_columns, const Selector& selector) 
const override {
+        if (s != selector.size()) {
+            LOG(FATAL) << "Size of selector doesn't match size of column.";
+        }
+
+        std::vector<size_t> counts(num_columns);
+        for (auto idx : selector) ++counts[idx];
+
+        MutableColumns res(num_columns);
+        for (size_t i = 0; i < num_columns; ++i) res[i] = 
clone_resized(counts[i]);
+
+        return res;
+    }
+
+    void append_data_by_selector(MutableColumnPtr& res,
+                                 const IColumn::Selector& selector) const 
override {
+        size_t num_rows = size();
+
+        if (num_rows < selector.size()) {
+            LOG(FATAL) << fmt::format("Size of selector: {}, is larger than 
size of column:{}",
+                                      selector.size(), num_rows);
+        }
+
+        res->reserve(num_rows);
+
+        for (size_t i = 0; i < selector.size(); ++i) res->insert_from(*this, 
selector[i]);

Review Comment:
   warning: statement should be inside braces 
[readability-braces-around-statements]
   
   ```suggestion
           for (size_t i = 0; i < selector.size(); ++i) { 
res->insert_from(*this, selector[i]);
   }
   ```
   



##########
be/src/vec/columns/column_object.cpp:
##########
@@ -845,28 +905,318 @@
                        [](const auto& entry) { return 
entry->data.is_finalized(); });
 }
 
-void ColumnObject::finalize() {
+static bool check_if_valid_column_name(const PathInData& path) {
+    static const std::regex 
COLUMN_NAME_REGEX("^[_a-zA-Z@0-9][.a-zA-Z0-9_+-/><?@#$%^&*]{0,255}$");
+    return std::regex_match(path.get_path(), COLUMN_NAME_REGEX);
+}
+
+void ColumnObject::Subcolumn::wrapp_array_nullable() {
+    // Wrap array with nullable, treat empty array as null to elimate conflict 
at present
+    auto& result_column = get_finalized_column_ptr();
+    if (result_column->is_column_array() && !result_column->is_nullable()) {
+        auto new_null_map = ColumnUInt8::create();
+        new_null_map->reserve(result_column->size());
+        auto& null_map_data = new_null_map->get_data();
+        auto array = static_cast<const ColumnArray*>(result_column.get());
+        for (size_t i = 0; i < array->size(); ++i) {
+            null_map_data.push_back(array->is_default_at(i));
+        }
+        result_column = ColumnNullable::create(std::move(result_column), 
std::move(new_null_map));
+        data_types[0] = make_nullable(data_types[0]);
+        least_common_type = LeastCommonType {data_types[0]};
+    }
+}
+
+rapidjson::Value* find_leaf_node_by_path(rapidjson::Value& json, const 
PathInData& path,
+                                         int idx = 0) {
+    if (idx >= path.get_parts().size()) {
+        return &json;
+    }
+
+    std::string_view current_key = path.get_parts()[idx].key;
+    if (!json.IsObject()) {
+        return nullptr;
+    }
+    rapidjson::Value name(current_key.data(), current_key.size());
+    auto it = json.FindMember(name);
+    if (it == json.MemberEnd()) {
+        return nullptr;
+    }
+    rapidjson::Value& current = it->value;
+    // if (idx == path.get_parts().size() - 1) {
+    //     return &current;
+    // }
+    return find_leaf_node_by_path(current, path, idx + 1);
+}
+
+void find_and_set_leave_value(const IColumn* column, const PathInData& path,
+                              const DataTypeSerDeSPtr& type, rapidjson::Value& 
root,
+                              rapidjson::Document::AllocatorType& allocator, 
int row) {
+    const auto* nullable = assert_cast<const ColumnNullable*>(column);
+    if (nullable->is_null_at(row)) {
+        return;
+    }
+    // TODO could cache the result of leaf nodes with it's path info
+    rapidjson::Value* target = find_leaf_node_by_path(root, path);
+    if (UNLIKELY(!target)) {
+        rapidjson::StringBuffer buffer;
+        rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
+        root.Accept(writer);
+        LOG(FATAL) << "could not find path " << path.get_path()
+                   << ", root: " << std::string(buffer.GetString(), 
buffer.GetSize());
+    }
+    type->write_one_cell_to_json(*column, *target, allocator, row);
+}
+
+// compact null values
+// {"a" : {"b" : "d" {"n" : null}, "e" : null}, "c" : 10 }
+// after compact -> {"a" : {"c"} : 10}
+void compact_null_values(rapidjson::Value& json, 
rapidjson::Document::AllocatorType& allocator) {
+    if (!json.IsObject() || json.IsNull()) {
+        return;
+    }
+
+    rapidjson::Value::MemberIterator it = json.MemberBegin();
+    while (it != json.MemberEnd()) {
+        rapidjson::Value& value = it->value;
+        if (value.IsNull()) {
+            it = json.EraseMember(it);
+            continue;
+        }
+        compact_null_values(value, allocator);
+        if (value.IsObject() && value.ObjectEmpty()) {
+            it = json.EraseMember(it);
+            continue;
+        }
+        ++it;
+    }
+}
+
+// Construct rapidjson value from Subcolumns
+void get_json_by_column_tree(rapidjson::Value& root, 
rapidjson::Document::AllocatorType& allocator,
+                             const ColumnObject::Subcolumns::Node* node_root) {
+    if (node_root == nullptr || node_root->children.empty()) {
+        root.SetNull();
+        return;
+    }
+    root.SetObject();
+    for (auto it = node_root->children.begin(); it != 
node_root->children.end(); ++it) {
+        auto child = it->get_second();
+        rapidjson::Value value(rapidjson::kObjectType);
+        get_json_by_column_tree(value, allocator, child.get());
+        root.AddMember(rapidjson::StringRef(it->get_first().data, 
it->get_first().size), value,
+                       allocator);
+    }
+}
+
+bool ColumnObject::serialize_one_row_to_string(int row, std::string* output) 
const {
+    if (!is_finalized()) {
+        const_cast<ColumnObject*>(this)->finalize();
+    }
+    rapidjson::StringBuffer buf;
+    if (is_scalar_variant()) {
+        auto type = get_root_type();
+        *output = type->to_string(*get_root(), row);
+        return true;
+    }
+    bool res = serialize_one_row_to_json_format(row, &buf, nullptr);
+    if (res) {
+        // TODO avoid copy
+        *output = std::string(buf.GetString(), buf.GetSize());
+    }
+    return res;
+}
+
+bool ColumnObject::serialize_one_row_to_string(int row, BufferWritable& 
output) const {
+    if (!is_finalized()) {
+        const_cast<ColumnObject*>(this)->finalize();
+    }
+    if (is_scalar_variant()) {
+        auto type = get_root_type();
+        type->to_string(*get_root(), row, output);
+        return true;
+    }
+    rapidjson::StringBuffer buf;
+    bool res = serialize_one_row_to_json_format(row, &buf, nullptr);
+    if (res) {
+        output.write(buf.GetString(), buf.GetLength());
+    }
+    return res;
+}
+
+bool ColumnObject::serialize_one_row_to_json_format(int row, 
rapidjson::StringBuffer* output,
+                                                    bool* is_null) const {
+    CHECK(is_finalized());
+    if (subcolumns.empty()) {
+        if (is_null != nullptr) {
+            *is_null = true;
+        } else {
+            rapidjson::Value root(rapidjson::kNullType);
+            rapidjson::Writer<rapidjson::StringBuffer> writer(*output);
+            return root.Accept(writer);
+        }
+        return true;
+    }
+    CHECK(size() > row);
+    rapidjson::StringBuffer buffer;
+    rapidjson::Value root(rapidjson::kNullType);
+    if (doc_structure == nullptr) {
+        doc_structure = std::make_shared<rapidjson::Document>();
+        rapidjson::Document::AllocatorType& allocator = 
doc_structure->GetAllocator();
+        get_json_by_column_tree(*doc_structure, allocator, 
subcolumns.get_root());
+    }
+    if (!doc_structure->IsNull()) {
+        root.CopyFrom(*doc_structure, doc_structure->GetAllocator());
+    }
+#ifndef NDEBUG
+    VLOG_DEBUG << "dump structure " << 
JsonFunctions::print_json_value(*doc_structure);
+#endif
+    for (const auto& subcolumn : subcolumns) {
+        find_and_set_leave_value(subcolumn->data.get_finalized_column_ptr(), 
subcolumn->path,
+                                 
subcolumn->data.get_least_common_type_serde(), root,
+                                 doc_structure->GetAllocator(), row);
+    }
+    compact_null_values(root, doc_structure->GetAllocator());
+    if (root.IsNull() && is_null != nullptr) {
+        // Fast path
+        *is_null = true;
+    } else {
+        output->Clear();
+        rapidjson::Writer<rapidjson::StringBuffer> writer(*output);
+        return root.Accept(writer);
+    }
+    return true;
+}
+
+void ColumnObject::merge_sparse_to_root_column() {

Review Comment:
   warning: method 'merge_sparse_to_root_column' can be made const 
[readability-make-member-function-const]
   
   be/src/vec/columns/column_object.h:252:
   ```diff
   -     void merge_sparse_to_root_column();
   +     void merge_sparse_to_root_column() const;
   ```
   
   ```suggestion
   void ColumnObject::merge_sparse_to_root_column() const {
   ```
   



##########
be/src/vec/columns/column_impl.h:
##########
@@ -86,4 +86,33 @@ void 
IColumn::get_indices_of_non_default_rows_impl(IColumn::Offsets64& indices,
     }
 }
 
+template <typename Derived>
+double IColumn::get_ratio_of_default_rows_impl(double sample_ratio) const {
+    if (sample_ratio <= 0.0 || sample_ratio > 1.0) {
+        LOG(FATAL) << "Value of 'sample_ratio' must be in interval (0.0; 1.0], 
but got: "
+                   << sample_ratio;
+    }
+    static constexpr auto max_number_of_rows_for_full_search = 1000;
+    size_t num_rows = size();
+    size_t num_sampled_rows = std::min(static_cast<size_t>(num_rows * 
sample_ratio), num_rows);
+    size_t num_checked_rows = 0;
+    size_t res = 0;
+    if (num_sampled_rows == num_rows || num_rows <= 
max_number_of_rows_for_full_search) {
+        for (size_t i = 0; i < num_rows; ++i)
+            res += static_cast<const Derived&>(*this).is_default_at(i);

Review Comment:
   warning: statement should be inside braces 
[readability-braces-around-statements]
   
   ```suggestion
           for (size_t i = 0; i < num_rows; ++i) {
               res += static_cast<const Derived&>(*this).is_default_at(i);
   }
   ```
   



##########
be/src/vec/columns/column_nothing.h:
##########
@@ -0,0 +1,44 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// 
https://github.com/ClickHouse/ClickHouse/blob/master/src/AggregateFunctions/ColumnNothing.h
+// and modified by Doris
+
+#pragma once
+
+#include "vec/columns/column_dummy.h"
+
+namespace doris::vectorized {
+
+class ColumnNothing final : public COWHelper<IColumnDummy, ColumnNothing> {
+private:
+    friend class COWHelper<IColumnDummy, ColumnNothing>;
+
+    ColumnNothing(size_t s_) { s = s_; }
+
+    ColumnNothing(const ColumnNothing&) = default;
+
+public:
+    const char* get_family_name() const override { return "Nothing"; }
+    MutableColumnPtr clone_dummy(size_t s_) const override { return 
ColumnNothing::create(s_); }
+
+    bool structure_equals(const IColumn& rhs) const override {

Review Comment:
   warning: method 'structure_equals' can be made static 
[readability-convert-member-functions-to-static]
   
   ```suggestion
       static bool structure_equals(const IColumn& rhs) override {
   ```
   



##########
be/src/vec/columns/column_object.cpp:
##########
@@ -672,59 +740,52 @@
         if (*x == -1) {
             ColumnObject::insert_default();
         } else {
-            ColumnObject::try_insert_from(src, *x);
+            ColumnObject::insert_from(src, *x);
         }
     }
     finalize();
     return Status::OK();
 }
 
-void ColumnObject::try_insert_range_from(const IColumn& src, size_t start, 
size_t length) {
+FieldInfo ColumnObject::Subcolumn::get_subcolumn_field_info() const {

Review Comment:
   warning: method 'get_subcolumn_field_info' can be made static 
[readability-convert-member-functions-to-static]
   
   be/src/vec/columns/column_object.h:148:
   ```diff
   -         FieldInfo get_subcolumn_field_info() const;
   +         static FieldInfo get_subcolumn_field_info() ;
   ```
   
   ```suggestion
   FieldInfo ColumnObject::Subcolumn::get_subcolumn_field_info() {
   ```
   



##########
be/src/vec/columns/column_object.cpp:
##########
@@ -845,28 +905,318 @@
                        [](const auto& entry) { return 
entry->data.is_finalized(); });
 }
 
-void ColumnObject::finalize() {
+static bool check_if_valid_column_name(const PathInData& path) {
+    static const std::regex 
COLUMN_NAME_REGEX("^[_a-zA-Z@0-9][.a-zA-Z0-9_+-/><?@#$%^&*]{0,255}$");
+    return std::regex_match(path.get_path(), COLUMN_NAME_REGEX);
+}
+
+void ColumnObject::Subcolumn::wrapp_array_nullable() {

Review Comment:
   warning: method 'wrapp_array_nullable' can be made static 
[readability-convert-member-functions-to-static]
   
   be/src/vec/columns/column_object.h:190:
   ```diff
   -         void wrapp_array_nullable();
   +         static void wrapp_array_nullable();
   ```
   



##########
be/src/vec/columns/column_nullable.h:
##########
@@ -354,6 +355,10 @@ class ColumnNullable final : public COWHelper<IColumn, 
ColumnNullable> {
         return get_ptr();
     }
 
+    double get_ratio_of_default_rows(double sample_ratio) const override {

Review Comment:
   warning: method 'get_ratio_of_default_rows' can be made static 
[readability-convert-member-functions-to-static]
   
   ```suggestion
       static double get_ratio_of_default_rows(double sample_ratio) override {
   ```
   



##########
be/src/olap/rowset/vertical_beta_rowset_writer.h:
##########
@@ -49,6 +49,8 @@ class VerticalBetaRowsetWriter : public BetaRowsetWriter {
 
     int64_t num_rows() const override { return _total_key_group_rows; }
 
+    virtual const RowsetWriterContext& context() const override { LOG(FATAL) 
<< "Not implemented"; }

Review Comment:
   warning: 'virtual' is redundant since the function is already declared 
'override' [modernize-use-override]
   
   ```suggestion
       const RowsetWriterContext& context() const override { LOG(FATAL) << "Not 
implemented"; }
   ```
   



##########
be/src/vec/columns/column_object.cpp:
##########
@@ -845,28 +905,318 @@
                        [](const auto& entry) { return 
entry->data.is_finalized(); });
 }
 
-void ColumnObject::finalize() {
+static bool check_if_valid_column_name(const PathInData& path) {
+    static const std::regex 
COLUMN_NAME_REGEX("^[_a-zA-Z@0-9][.a-zA-Z0-9_+-/><?@#$%^&*]{0,255}$");
+    return std::regex_match(path.get_path(), COLUMN_NAME_REGEX);
+}
+
+void ColumnObject::Subcolumn::wrapp_array_nullable() {
+    // Wrap array with nullable, treat empty array as null to elimate conflict 
at present
+    auto& result_column = get_finalized_column_ptr();
+    if (result_column->is_column_array() && !result_column->is_nullable()) {
+        auto new_null_map = ColumnUInt8::create();
+        new_null_map->reserve(result_column->size());
+        auto& null_map_data = new_null_map->get_data();
+        auto array = static_cast<const ColumnArray*>(result_column.get());
+        for (size_t i = 0; i < array->size(); ++i) {
+            null_map_data.push_back(array->is_default_at(i));
+        }
+        result_column = ColumnNullable::create(std::move(result_column), 
std::move(new_null_map));
+        data_types[0] = make_nullable(data_types[0]);
+        least_common_type = LeastCommonType {data_types[0]};
+    }
+}
+
+rapidjson::Value* find_leaf_node_by_path(rapidjson::Value& json, const 
PathInData& path,
+                                         int idx = 0) {
+    if (idx >= path.get_parts().size()) {
+        return &json;
+    }
+
+    std::string_view current_key = path.get_parts()[idx].key;
+    if (!json.IsObject()) {
+        return nullptr;
+    }
+    rapidjson::Value name(current_key.data(), current_key.size());
+    auto it = json.FindMember(name);
+    if (it == json.MemberEnd()) {
+        return nullptr;
+    }
+    rapidjson::Value& current = it->value;
+    // if (idx == path.get_parts().size() - 1) {
+    //     return &current;
+    // }
+    return find_leaf_node_by_path(current, path, idx + 1);
+}
+
+void find_and_set_leave_value(const IColumn* column, const PathInData& path,
+                              const DataTypeSerDeSPtr& type, rapidjson::Value& 
root,
+                              rapidjson::Document::AllocatorType& allocator, 
int row) {
+    const auto* nullable = assert_cast<const ColumnNullable*>(column);
+    if (nullable->is_null_at(row)) {
+        return;
+    }
+    // TODO could cache the result of leaf nodes with it's path info
+    rapidjson::Value* target = find_leaf_node_by_path(root, path);
+    if (UNLIKELY(!target)) {
+        rapidjson::StringBuffer buffer;
+        rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
+        root.Accept(writer);
+        LOG(FATAL) << "could not find path " << path.get_path()
+                   << ", root: " << std::string(buffer.GetString(), 
buffer.GetSize());
+    }
+    type->write_one_cell_to_json(*column, *target, allocator, row);
+}
+
+// compact null values
+// {"a" : {"b" : "d" {"n" : null}, "e" : null}, "c" : 10 }
+// after compact -> {"a" : {"c"} : 10}
+void compact_null_values(rapidjson::Value& json, 
rapidjson::Document::AllocatorType& allocator) {
+    if (!json.IsObject() || json.IsNull()) {
+        return;
+    }
+
+    rapidjson::Value::MemberIterator it = json.MemberBegin();
+    while (it != json.MemberEnd()) {
+        rapidjson::Value& value = it->value;
+        if (value.IsNull()) {
+            it = json.EraseMember(it);
+            continue;
+        }
+        compact_null_values(value, allocator);
+        if (value.IsObject() && value.ObjectEmpty()) {
+            it = json.EraseMember(it);
+            continue;
+        }
+        ++it;
+    }
+}
+
+// Construct rapidjson value from Subcolumns
+void get_json_by_column_tree(rapidjson::Value& root, 
rapidjson::Document::AllocatorType& allocator,
+                             const ColumnObject::Subcolumns::Node* node_root) {
+    if (node_root == nullptr || node_root->children.empty()) {
+        root.SetNull();
+        return;
+    }
+    root.SetObject();
+    for (auto it = node_root->children.begin(); it != 
node_root->children.end(); ++it) {
+        auto child = it->get_second();
+        rapidjson::Value value(rapidjson::kObjectType);
+        get_json_by_column_tree(value, allocator, child.get());
+        root.AddMember(rapidjson::StringRef(it->get_first().data, 
it->get_first().size), value,
+                       allocator);
+    }
+}
+
+bool ColumnObject::serialize_one_row_to_string(int row, std::string* output) 
const {
+    if (!is_finalized()) {
+        const_cast<ColumnObject*>(this)->finalize();
+    }
+    rapidjson::StringBuffer buf;
+    if (is_scalar_variant()) {
+        auto type = get_root_type();
+        *output = type->to_string(*get_root(), row);
+        return true;
+    }
+    bool res = serialize_one_row_to_json_format(row, &buf, nullptr);
+    if (res) {
+        // TODO avoid copy
+        *output = std::string(buf.GetString(), buf.GetSize());
+    }
+    return res;
+}
+
+bool ColumnObject::serialize_one_row_to_string(int row, BufferWritable& 
output) const {
+    if (!is_finalized()) {
+        const_cast<ColumnObject*>(this)->finalize();
+    }
+    if (is_scalar_variant()) {
+        auto type = get_root_type();
+        type->to_string(*get_root(), row, output);
+        return true;
+    }
+    rapidjson::StringBuffer buf;
+    bool res = serialize_one_row_to_json_format(row, &buf, nullptr);
+    if (res) {
+        output.write(buf.GetString(), buf.GetLength());
+    }
+    return res;
+}
+
+bool ColumnObject::serialize_one_row_to_json_format(int row, 
rapidjson::StringBuffer* output,
+                                                    bool* is_null) const {
+    CHECK(is_finalized());
+    if (subcolumns.empty()) {
+        if (is_null != nullptr) {
+            *is_null = true;
+        } else {
+            rapidjson::Value root(rapidjson::kNullType);
+            rapidjson::Writer<rapidjson::StringBuffer> writer(*output);
+            return root.Accept(writer);
+        }
+        return true;
+    }
+    CHECK(size() > row);
+    rapidjson::StringBuffer buffer;
+    rapidjson::Value root(rapidjson::kNullType);
+    if (doc_structure == nullptr) {
+        doc_structure = std::make_shared<rapidjson::Document>();
+        rapidjson::Document::AllocatorType& allocator = 
doc_structure->GetAllocator();
+        get_json_by_column_tree(*doc_structure, allocator, 
subcolumns.get_root());
+    }
+    if (!doc_structure->IsNull()) {
+        root.CopyFrom(*doc_structure, doc_structure->GetAllocator());
+    }
+#ifndef NDEBUG
+    VLOG_DEBUG << "dump structure " << 
JsonFunctions::print_json_value(*doc_structure);
+#endif
+    for (const auto& subcolumn : subcolumns) {
+        find_and_set_leave_value(subcolumn->data.get_finalized_column_ptr(), 
subcolumn->path,
+                                 
subcolumn->data.get_least_common_type_serde(), root,
+                                 doc_structure->GetAllocator(), row);
+    }
+    compact_null_values(root, doc_structure->GetAllocator());
+    if (root.IsNull() && is_null != nullptr) {
+        // Fast path
+        *is_null = true;
+    } else {
+        output->Clear();
+        rapidjson::Writer<rapidjson::StringBuffer> writer(*output);
+        return root.Accept(writer);
+    }
+    return true;
+}
+
+void ColumnObject::merge_sparse_to_root_column() {
+    CHECK(is_finalized());
+    if (sparse_columns.empty()) {
+        return;
+    }
+    ColumnPtr src = 
subcolumns.get_mutable_root()->data.get_finalized_column_ptr();
+    MutableColumnPtr mresult = src->clone_empty();
+    const ColumnNullable* src_null = assert_cast<const 
ColumnNullable*>(src.get());
+    const ColumnString* src_column_ptr =
+            assert_cast<const ColumnString*>(&src_null->get_nested_column());
+    rapidjson::StringBuffer buffer;
+    doc_structure = std::make_shared<rapidjson::Document>();
+    rapidjson::Document::AllocatorType& allocator = 
doc_structure->GetAllocator();
+    get_json_by_column_tree(*doc_structure, allocator, 
sparse_columns.get_root());
+
+#ifndef NDEBUG
+    VLOG_DEBUG << "dump structure " << 
JsonFunctions::print_json_value(*doc_structure);
+#endif
+
+    ColumnNullable* result_column_nullable =
+            assert_cast<ColumnNullable*>(mresult->assume_mutable().get());
+    ColumnString* result_column_ptr =
+            
assert_cast<ColumnString*>(&result_column_nullable->get_nested_column());
+    result_column_nullable->reserve(num_rows);
+    // parse each row to jsonb
+    for (size_t i = 0; i < num_rows; ++i) {
+        // root is not null, store original value, eg. the root is scalar type 
like '[1]'
+        if (!src_null->empty() && !src_null->is_null_at(i)) {
+            result_column_ptr->insert_data(src_column_ptr->get_data_at(i).data,
+                                           
src_column_ptr->get_data_at(i).size);
+            result_column_nullable->get_null_map_data().push_back(0);
+            continue;
+        }
+
+        // parse and encode sparse columns
+        buffer.Clear();
+        rapidjson::Value root(rapidjson::kNullType);
+        if (!doc_structure->IsNull()) {
+            root.CopyFrom(*doc_structure, doc_structure->GetAllocator());
+        }
+        size_t null_count = 0;
+        for (const auto& subcolumn : sparse_columns) {
+            auto& column = subcolumn->data.get_finalized_column_ptr();
+            if (assert_cast<const ColumnNullable&>(*column).is_null_at(i)) {
+                ++null_count;
+                continue;
+            }
+            find_and_set_leave_value(column, subcolumn->path,
+                                     
subcolumn->data.get_least_common_type_serde(), root,
+                                     doc_structure->GetAllocator(), i);
+        }
+
+        // all null values, store null to sparse root
+        if (null_count == sparse_columns.size()) {
+            result_column_ptr->insert_default();
+            result_column_nullable->get_null_map_data().push_back(1);
+            continue;
+        }
+
+        // encode sparse columns into jsonb format
+        compact_null_values(root, doc_structure->GetAllocator());
+        // parse as jsonb value and put back to rootnode
+        // TODO, we could convert to jsonb directly from rapidjson::Value for 
better performance, instead of parsing
+        JsonbParser parser;
+        rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
+        root.Accept(writer);
+        bool res = parser.parse(buffer.GetString(), buffer.GetSize());
+        CHECK(res) << "buffer:" << std::string(buffer.GetString(), 
buffer.GetSize())
+                   << ", row_num:" << i;
+        
result_column_ptr->insert_data(parser.getWriter().getOutput()->getBuffer(),
+                                       
parser.getWriter().getOutput()->getSize());
+        result_column_nullable->get_null_map_data().push_back(0);
+    }
+
+    // assign merged column
+    subcolumns.get_mutable_root()->data.get_finalized_column_ptr() = 
mresult->get_ptr();
+}
+
+void ColumnObject::finalize(bool ignore_sparse) {

Review Comment:
   warning: method 'finalize' can be made const 
[readability-make-member-function-const]
   
   be/src/vec/columns/column_object.h:326:
   ```diff
   -     void finalize(bool ignore_sparse);
   +     void finalize(bool ignore_sparse) const;
   ```
   
   ```suggestion
   void ColumnObject::finalize(bool ignore_sparse) const {
   ```
   



##########
be/src/vec/columns/column_dummy.h:
##########
@@ -0,0 +1,175 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// 
https://github.com/ClickHouse/ClickHouse/blob/master/src/AggregateFunctions/IColumnDummy.h
+// and modified by Doris
+
+#pragma once
+
+#include "vec/columns/column.h"
+#include "vec/columns/columns_common.h"
+#include "vec/common/arena.h"
+#include "vec/common/pod_array.h"
+
+namespace doris::vectorized {
+
+/** Base class for columns-constants that contain a value that is not in the 
`Field`.
+  * Not a full-fledged column and is used in a special way.
+  */
+class IColumnDummy : public IColumn {
+public:
+    IColumnDummy() : s(0) {}
+    IColumnDummy(size_t s_) : s(s_) {}
+
+public:
+    virtual MutableColumnPtr clone_dummy(size_t s_) const = 0;
+
+    MutableColumnPtr clone_resized(size_t s) const override { return 
clone_dummy(s); }
+    size_t size() const override { return s; }
+    void insert_default() override { ++s; }
+    void pop_back(size_t n) override { s -= n; }
+    size_t byte_size() const override { return 0; }
+    size_t allocated_bytes() const override { return 0; }
+    int compare_at(size_t, size_t, const IColumn&, int) const override { 
return 0; }
+
+    [[noreturn]] Field operator[](size_t) const override {
+        LOG(FATAL) << "Cannot get value from " << get_name();
+    }
+
+    void get(size_t, Field&) const override {
+        LOG(FATAL) << "Cannot get value from " << get_name();
+    }
+
+    void insert(const Field&) override {
+        LOG(FATAL) << "Cannot insert element into " << get_name();
+    }
+
+    StringRef get_data_at(size_t) const override { return {}; }
+
+    void insert_data(const char*, size_t) override { ++s; }
+
+    StringRef serialize_value_into_arena(size_t /*n*/, Arena& arena,
+                                         char const*& begin) const override {
+        return {arena.alloc_continue(0, begin), 0};
+    }
+
+    const char* deserialize_and_insert_from_arena(const char* pos) override {
+        ++s;
+        return pos;
+    }
+
+    void insert_from(const IColumn&, size_t) override { ++s; }
+
+    void insert_range_from(const IColumn& /*src*/, size_t /*start*/, size_t 
length) override {
+        s += length;
+    }
+
+    void insert_indices_from(const IColumn& src, const int* indices_begin,
+                             const int* indices_end) override {
+        s += (indices_end - indices_begin);
+    }
+
+    ColumnPtr filter(const Filter& filt, ssize_t /*result_size_hint*/) const 
override {
+        return clone_dummy(count_bytes_in_filter(filt));
+    }
+
+    size_t filter(const Filter& filter) override {
+        const auto result_size = count_bytes_in_filter(filter);
+        s = result_size;
+        return result_size;
+    }
+
+    ColumnPtr permute(const Permutation& perm, size_t limit) const override {
+        if (s != perm.size()) {
+            LOG(FATAL) << "Size of permutation doesn't match size of column.";
+        }
+
+        return clone_dummy(limit ? std::min(s, limit) : s);
+    }
+
+    void get_permutation(bool /*reverse*/, size_t /*limit*/, int 
/*nan_direction_hint*/,
+                         Permutation& res) const override {
+        res.resize(s);
+        for (size_t i = 0; i < s; ++i) res[i] = i;
+    }
+
+    ColumnPtr replicate(const Offsets& offsets) const override {
+        column_match_offsets_size(s, offsets.size());
+
+        return clone_dummy(offsets.back());
+    }
+
+    void replicate(const uint32_t* indexs, size_t target_size, IColumn& 
column) const override {
+        LOG(FATAL) << "Not implemented";
+    }
+
+    MutableColumns scatter(ColumnIndex num_columns, const Selector& selector) 
const override {
+        if (s != selector.size()) {
+            LOG(FATAL) << "Size of selector doesn't match size of column.";
+        }
+
+        std::vector<size_t> counts(num_columns);
+        for (auto idx : selector) ++counts[idx];
+
+        MutableColumns res(num_columns);
+        for (size_t i = 0; i < num_columns; ++i) res[i] = 
clone_resized(counts[i]);
+
+        return res;
+    }
+
+    void append_data_by_selector(MutableColumnPtr& res,
+                                 const IColumn::Selector& selector) const 
override {
+        size_t num_rows = size();
+
+        if (num_rows < selector.size()) {
+            LOG(FATAL) << fmt::format("Size of selector: {}, is larger than 
size of column:{}",
+                                      selector.size(), num_rows);
+        }
+
+        res->reserve(num_rows);
+
+        for (size_t i = 0; i < selector.size(); ++i) res->insert_from(*this, 
selector[i]);
+    }
+
+    void addSize(size_t delta) { s += delta; }
+
+    bool is_dummy() const override { return true; }
+
+    void replace_column_data(const IColumn& rhs, size_t row, size_t self_row = 
0) override {
+        LOG(FATAL) << "should not call the method in column dummy";
+    }
+
+    void replace_column_data_default(size_t self_row = 0) override {
+        LOG(FATAL) << "should not call the method in column dummy";
+    }
+
+    void get_indices_of_non_default_rows(Offsets64&, size_t, size_t) const 
override {

Review Comment:
   warning: all parameters should be named in a function 
[readability-named-parameter]
   
   ```suggestion
       void get_indices_of_non_default_rows(Offsets64& /*unused*/, size_t 
/*unused*/, size_t /*unused*/) const override {
   ```
   



##########
be/src/vec/columns/column_dummy.h:
##########
@@ -0,0 +1,175 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// 
https://github.com/ClickHouse/ClickHouse/blob/master/src/AggregateFunctions/IColumnDummy.h
+// and modified by Doris
+
+#pragma once
+
+#include "vec/columns/column.h"
+#include "vec/columns/columns_common.h"
+#include "vec/common/arena.h"
+#include "vec/common/pod_array.h"
+
+namespace doris::vectorized {
+
+/** Base class for columns-constants that contain a value that is not in the 
`Field`.
+  * Not a full-fledged column and is used in a special way.
+  */
+class IColumnDummy : public IColumn {
+public:
+    IColumnDummy() : s(0) {}
+    IColumnDummy(size_t s_) : s(s_) {}
+
+public:
+    virtual MutableColumnPtr clone_dummy(size_t s_) const = 0;
+
+    MutableColumnPtr clone_resized(size_t s) const override { return 
clone_dummy(s); }
+    size_t size() const override { return s; }
+    void insert_default() override { ++s; }
+    void pop_back(size_t n) override { s -= n; }
+    size_t byte_size() const override { return 0; }
+    size_t allocated_bytes() const override { return 0; }
+    int compare_at(size_t, size_t, const IColumn&, int) const override { 
return 0; }
+
+    [[noreturn]] Field operator[](size_t) const override {
+        LOG(FATAL) << "Cannot get value from " << get_name();
+    }
+
+    void get(size_t, Field&) const override {
+        LOG(FATAL) << "Cannot get value from " << get_name();
+    }
+
+    void insert(const Field&) override {
+        LOG(FATAL) << "Cannot insert element into " << get_name();
+    }
+
+    StringRef get_data_at(size_t) const override { return {}; }
+
+    void insert_data(const char*, size_t) override { ++s; }
+
+    StringRef serialize_value_into_arena(size_t /*n*/, Arena& arena,
+                                         char const*& begin) const override {
+        return {arena.alloc_continue(0, begin), 0};
+    }
+
+    const char* deserialize_and_insert_from_arena(const char* pos) override {
+        ++s;
+        return pos;
+    }
+
+    void insert_from(const IColumn&, size_t) override { ++s; }
+
+    void insert_range_from(const IColumn& /*src*/, size_t /*start*/, size_t 
length) override {
+        s += length;
+    }
+
+    void insert_indices_from(const IColumn& src, const int* indices_begin,
+                             const int* indices_end) override {
+        s += (indices_end - indices_begin);
+    }
+
+    ColumnPtr filter(const Filter& filt, ssize_t /*result_size_hint*/) const 
override {
+        return clone_dummy(count_bytes_in_filter(filt));
+    }
+
+    size_t filter(const Filter& filter) override {
+        const auto result_size = count_bytes_in_filter(filter);
+        s = result_size;
+        return result_size;
+    }
+
+    ColumnPtr permute(const Permutation& perm, size_t limit) const override {
+        if (s != perm.size()) {
+            LOG(FATAL) << "Size of permutation doesn't match size of column.";
+        }
+
+        return clone_dummy(limit ? std::min(s, limit) : s);
+    }
+
+    void get_permutation(bool /*reverse*/, size_t /*limit*/, int 
/*nan_direction_hint*/,
+                         Permutation& res) const override {
+        res.resize(s);
+        for (size_t i = 0; i < s; ++i) res[i] = i;

Review Comment:
   warning: statement should be inside braces 
[readability-braces-around-statements]
   
   ```suggestion
           for (size_t i = 0; i < s; ++i) { res[i] = i;
   }
   ```
   



##########
be/src/vec/columns/column_nothing.h:
##########
@@ -0,0 +1,44 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// 
https://github.com/ClickHouse/ClickHouse/blob/master/src/AggregateFunctions/ColumnNothing.h
+// and modified by Doris
+
+#pragma once
+
+#include "vec/columns/column_dummy.h"
+
+namespace doris::vectorized {
+
+class ColumnNothing final : public COWHelper<IColumnDummy, ColumnNothing> {
+private:
+    friend class COWHelper<IColumnDummy, ColumnNothing>;
+
+    ColumnNothing(size_t s_) { s = s_; }
+
+    ColumnNothing(const ColumnNothing&) = default;
+
+public:
+    const char* get_family_name() const override { return "Nothing"; }

Review Comment:
   warning: method 'get_family_name' can be made static 
[readability-convert-member-functions-to-static]
   
   ```suggestion
       static const char* get_family_name() override { return "Nothing"; }
   ```
   



##########
be/src/vec/columns/column_object.cpp:
##########
@@ -358,53 +334,121 @@ void ColumnObject::Subcolumn::insert(Field field, 
FieldInfo info) {
 }
 
 void ColumnObject::Subcolumn::insertRangeFrom(const Subcolumn& src, size_t 
start, size_t length) {
-    assert(src.is_finalized());
-    const auto& src_column = src.data.back();
-    const auto& src_type = src.least_common_type.get();
+    assert(start + length <= src.size());
+    size_t end = start + length;
+    // num_rows += length;
     if (data.empty()) {
-        add_new_column_part(src.least_common_type.get());
-        data.back()->insert_range_from(*src_column, start, length);
-    } else if (least_common_type.get()->equals(*src_type)) {
-        data.back()->insert_range_from(*src_column, start, length);
-    } else {
-        DataTypePtr new_least_common_type = nullptr;
-        get_least_supertype(DataTypes {least_common_type.get(), src_type}, 
&new_least_common_type,
-                            true /*compatible with string type*/);
-        ColumnPtr casted_column;
-        Status st = schema_util::cast_column({src_column, src_type, ""}, 
new_least_common_type,
-                                             &casted_column);
+        add_new_column_part(src.get_least_common_type());
+    } else if (!least_common_type.get()->equals(*src.get_least_common_type())) 
{
+        DataTypePtr new_least_common_type;
+        get_least_supertype<LeastSupertypeOnError::Jsonb>(
+                DataTypes {least_common_type.get(), 
src.get_least_common_type()},
+                &new_least_common_type);
+        if (!new_least_common_type->equals(*least_common_type.get())) {
+            add_new_column_part(std::move(new_least_common_type));
+        }
+    }
+    if (end <= src.num_of_defaults_in_prefix) {
+        data.back()->insert_many_defaults(length);
+        return;
+    }
+    if (start < src.num_of_defaults_in_prefix) {
+        data.back()->insert_many_defaults(src.num_of_defaults_in_prefix - 
start);
+    }
+    auto insert_from_part = [&](const auto& column, const auto& column_type, 
size_t from,
+                                size_t n) {
+        assert(from + n <= column->size());
+        if (column_type->equals(*least_common_type.get())) {
+            data.back()->insert_range_from(*column, from, n);
+            return;
+        }
+        /// If we need to insert large range, there is no sense to cut part of 
column and cast it.
+        /// Casting of all column and inserting from it can be faster.
+        /// Threshold is just a guess.
+        if (n * 3 >= column->size()) {
+            ColumnPtr casted_column;
+            Status st = schema_util::cast_column({column, column_type, ""}, 
least_common_type.get(),
+                                                 &casted_column);
+            if (!st.ok()) {
+                throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
+                                       st.to_string() + ", real_code:{}", 
st.code());
+            }
+            data.back()->insert_range_from(*casted_column, from, n);
+            return;
+        }
+        auto casted_column = column->cut(from, n);
+        Status st = schema_util::cast_column({casted_column, column_type, ""},
+                                             least_common_type.get(), 
&casted_column);
         if (!st.ok()) {
             throw doris::Exception(ErrorCode::INVALID_ARGUMENT, st.to_string() 
+ ", real_code:{}",
                                    st.code());
         }
-        if (!least_common_type.get()->equals(*new_least_common_type)) {
-            add_new_column_part(std::move(new_least_common_type));
-        }
-        data.back()->insert_range_from(*casted_column, start, length);
+        data.back()->insert_range_from(*casted_column, 0, n);
+    };
+    size_t pos = 0;
+    size_t processed_rows = src.num_of_defaults_in_prefix;
+    /// Find the first part of the column that intersects the range.
+    while (pos < src.data.size() && processed_rows + src.data[pos]->size() < 
start) {
+        processed_rows += src.data[pos]->size();
+        ++pos;
+    }
+    /// Insert from the first part of column.
+    if (pos < src.data.size() && processed_rows < start) {
+        size_t part_start = start - processed_rows;
+        size_t part_length = std::min(src.data[pos]->size() - part_start, end 
- start);
+        insert_from_part(src.data[pos], src.data_types[pos], part_start, 
part_length);
+        processed_rows += src.data[pos]->size();
+        ++pos;
+    }
+    /// Insert from the parts of column in the middle of range.
+    while (pos < src.data.size() && processed_rows + src.data[pos]->size() < 
end) {
+        insert_from_part(src.data[pos], src.data_types[pos], 0, 
src.data[pos]->size());
+        processed_rows += src.data[pos]->size();
+        ++pos;
+    }
+    /// Insert from the last part of column if needed.
+    if (pos < src.data.size() && processed_rows < end) {
+        size_t part_end = end - processed_rows;
+        insert_from_part(src.data[pos], src.data_types[pos], 0, part_end);
     }
 }
 
 bool ColumnObject::Subcolumn::is_finalized() const {
-    return data.empty() || (data.size() == 1 && num_of_defaults_in_prefix == 
0);
+    return num_of_defaults_in_prefix == 0 && (data.empty() || (data.size() == 
1));
 }
 
 template <typename Func>
-ColumnPtr ColumnObject::apply_for_subcolumns(Func&& func, std::string_view 
func_name) const {
+MutableColumnPtr ColumnObject::apply_for_subcolumns(Func&& func) const {
     if (!is_finalized()) {
-        // LOG(FATAL) << "Cannot " << func_name << " non-finalized 
ColumnObject";
-        throw doris::Exception(doris::ErrorCode::INTERNAL_ERROR,
-                               "Cannot {} non-finalized ColumnObject", 
func_name);
+        auto finalized = clone_finalized();
+        auto& finalized_object = assert_cast<ColumnObject&>(*finalized);
+        return finalized_object.apply_for_subcolumns(std::forward<Func>(func));
     }
-    auto res = ColumnObject::create(is_nullable);
+    auto res = ColumnObject::create(is_nullable, false);
     for (const auto& subcolumn : subcolumns) {
         auto new_subcolumn = func(subcolumn->data.get_finalized_column());
-        res->add_sub_column(subcolumn->path, new_subcolumn->assume_mutable());
+        res->add_sub_column(subcolumn->path, new_subcolumn->assume_mutable(),
+                            subcolumn->data.get_least_common_type());
     }
     return res;
 }
 ColumnPtr ColumnObject::index(const IColumn& indexes, size_t limit) const {
     return apply_for_subcolumns(
-            [&](const auto& subcolumn) { return subcolumn.index(indexes, 
limit); }, "index");
+            [&](const auto& subcolumn) { return subcolumn.index(indexes, 
limit); });
+}
+
+bool ColumnObject::Subcolumn::check_if_sparse_column(size_t num_rows) {

Review Comment:
   warning: method 'check_if_sparse_column' can be made static 
[readability-convert-member-functions-to-static]
   
   be/src/vec/columns/column_object.h:143:
   ```diff
   -         bool check_if_sparse_column(size_t num_rows);
   +         static bool check_if_sparse_column(size_t num_rows);
   ```
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to