This is an automated email from the ASF dual-hosted git repository.

eldenmoon pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new 4ffd4a6d263 [Fix](SchemaChange) refactor variant root column iterator 
to make row read corret  (#41942)
4ffd4a6d263 is described below

commit 4ffd4a6d263c4d7b3e8e0dd99d5f8e6b601961e2
Author: lihangyu <15605149...@163.com>
AuthorDate: Thu Oct 17 09:41:17 2024 +0800

    [Fix](SchemaChange) refactor variant root column iterator to make row read 
corret  (#41942)
    
    pick (#41700)
---
 be/src/olap/rowset/segment_v2/column_reader.cpp | 85 ++++++++++++-------------
 be/src/olap/rowset/segment_v2/column_reader.h   |  3 +
 be/src/vec/columns/column_object.cpp            |  6 ++
 be/src/vec/columns/column_object.h              |  2 +
 4 files changed, 52 insertions(+), 44 deletions(-)

diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp 
b/be/src/olap/rowset/segment_v2/column_reader.cpp
index e970250a973..e2e6e93f602 100644
--- a/be/src/olap/rowset/segment_v2/column_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/column_reader.cpp
@@ -1581,28 +1581,15 @@ void 
DefaultValueColumnIterator::_insert_many_default(vectorized::MutableColumnP
     }
 }
 
-Status VariantRootColumnIterator::next_batch(size_t* n, 
vectorized::MutableColumnPtr& dst,
-                                             bool* has_null) {
-    size_t size = dst->size();
+Status VariantRootColumnIterator::_process_root_column(
+        vectorized::MutableColumnPtr& dst, vectorized::MutableColumnPtr& 
root_column,
+        const vectorized::DataTypePtr& most_common_type) {
     auto& obj =
             dst->is_nullable()
                     ? assert_cast<vectorized::ColumnObject&>(
                               
assert_cast<vectorized::ColumnNullable&>(*dst).get_nested_column())
                     : assert_cast<vectorized::ColumnObject&>(*dst);
-    if (obj.is_null_root()) {
-        obj.create_root();
-    }
-    if (!obj.is_finalized()) {
-        obj.finalize();
-    }
-    auto root_column = obj.get_root();
-    RETURN_IF_ERROR(_inner_iter->next_batch(n, root_column, has_null));
-    obj.incr_num_rows(*n);
-    for (auto& entry : obj.get_subcolumns()) {
-        if (entry->data.size() != size + *n) {
-            entry->data.insert_many_defaults(*n);
-        }
-    }
+
     // fill nullmap
     if (root_column->is_nullable() && dst->is_nullable()) {
         vectorized::ColumnUInt8& dst_null_map =
@@ -1611,47 +1598,57 @@ Status VariantRootColumnIterator::next_batch(size_t* n, 
vectorized::MutableColum
                 
assert_cast<vectorized::ColumnNullable&>(*root_column).get_null_map_column();
         dst_null_map.insert_range_from(src_null_map, 0, src_null_map.size());
     }
+
+    // add root column to a tmp object column
+    auto tmp = vectorized::ColumnObject::create(true, false);
+    auto& tmp_obj = assert_cast<vectorized::ColumnObject&>(*tmp);
+    tmp_obj.add_sub_column({}, std::move(root_column), most_common_type);
+
+    // merge tmp object column to dst
+    obj.insert_range_from(*tmp, 0, tmp->size());
+
+    // finalize object if needed
+    if (!obj.is_finalized()) {
+        obj.finalize();
+    }
+
 #ifndef NDEBUG
     obj.check_consistency();
 #endif
+
     return Status::OK();
 }
 
+Status VariantRootColumnIterator::next_batch(size_t* n, 
vectorized::MutableColumnPtr& dst,
+                                             bool* has_null) {
+    // read root column
+    auto& obj =
+            dst->is_nullable()
+                    ? assert_cast<vectorized::ColumnObject&>(
+                              
assert_cast<vectorized::ColumnNullable&>(*dst).get_nested_column())
+                    : assert_cast<vectorized::ColumnObject&>(*dst);
+
+    auto most_common_type = obj.get_most_common_type();
+    auto root_column = most_common_type->create_column();
+    RETURN_IF_ERROR(_inner_iter->next_batch(n, root_column, has_null));
+
+    return _process_root_column(dst, root_column, most_common_type);
+}
+
 Status VariantRootColumnIterator::read_by_rowids(const rowid_t* rowids, const 
size_t count,
                                                  vectorized::MutableColumnPtr& 
dst) {
-    size_t size = dst->size();
+    // read root column
     auto& obj =
             dst->is_nullable()
                     ? assert_cast<vectorized::ColumnObject&>(
                               
assert_cast<vectorized::ColumnNullable&>(*dst).get_nested_column())
                     : assert_cast<vectorized::ColumnObject&>(*dst);
-    if (obj.is_null_root()) {
-        obj.create_root();
-    }
-    if (!obj.is_finalized()) {
-        obj.finalize();
-    }
-    auto root_column = obj.get_root();
+
+    auto most_common_type = obj.get_most_common_type();
+    auto root_column = most_common_type->create_column();
     RETURN_IF_ERROR(_inner_iter->read_by_rowids(rowids, count, root_column));
-    obj.incr_num_rows(count);
-    for (auto& entry : obj.get_subcolumns()) {
-        if (entry->data.size() != (size + count)) {
-            entry->data.insert_many_defaults(count);
-        }
-    }
-    // fill nullmap
-    if (root_column->is_nullable() && dst->is_nullable()) {
-        vectorized::ColumnUInt8& dst_null_map =
-                
assert_cast<vectorized::ColumnNullable&>(*dst).get_null_map_column();
-        vectorized::ColumnUInt8& src_null_map =
-                
assert_cast<vectorized::ColumnNullable&>(*root_column).get_null_map_column();
-        DCHECK_EQ(src_null_map.size() - size, count);
-        dst_null_map.insert_range_from(src_null_map, size, count);
-    }
-#ifndef NDEBUG
-    obj.check_consistency();
-#endif
-    return Status::OK();
+
+    return _process_root_column(dst, root_column, most_common_type);
 }
 
 Status DefaultNestedColumnIterator::next_batch(size_t* n, 
vectorized::MutableColumnPtr& dst) {
diff --git a/be/src/olap/rowset/segment_v2/column_reader.h 
b/be/src/olap/rowset/segment_v2/column_reader.h
index 6727ea7dc81..187490d06fb 100644
--- a/be/src/olap/rowset/segment_v2/column_reader.h
+++ b/be/src/olap/rowset/segment_v2/column_reader.h
@@ -654,6 +654,9 @@ public:
     ordinal_t get_current_ordinal() const override { return 
_inner_iter->get_current_ordinal(); }
 
 private:
+    Status _process_root_column(vectorized::MutableColumnPtr& dst,
+                                vectorized::MutableColumnPtr& root_column,
+                                const vectorized::DataTypePtr& 
most_common_type);
     std::unique_ptr<FileColumnIterator> _inner_iter;
 };
 
diff --git a/be/src/vec/columns/column_object.cpp 
b/be/src/vec/columns/column_object.cpp
index 134b096d8ba..ef10e3b5f7b 100644
--- a/be/src/vec/columns/column_object.cpp
+++ b/be/src/vec/columns/column_object.cpp
@@ -1803,6 +1803,12 @@ void ColumnObject::create_root(const DataTypePtr& type, 
MutableColumnPtr&& colum
     add_sub_column({}, std::move(column), type);
 }
 
+DataTypePtr ColumnObject::get_most_common_type() const {
+    auto type = is_nullable ? make_nullable(std::make_shared<MostCommonType>())
+                            : std::make_shared<MostCommonType>();
+    return type;
+}
+
 bool ColumnObject::is_null_root() const {
     auto* root = subcolumns.get_root();
     if (root == nullptr) {
diff --git a/be/src/vec/columns/column_object.h 
b/be/src/vec/columns/column_object.h
index 96a27e44e92..564082b0ef4 100644
--- a/be/src/vec/columns/column_object.h
+++ b/be/src/vec/columns/column_object.h
@@ -294,6 +294,8 @@ public:
     // create root with type and column if missing
     void create_root(const DataTypePtr& type, MutableColumnPtr&& column);
 
+    DataTypePtr get_most_common_type() const;
+
     // root is null or type nothing
     bool is_null_root() const;
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to