This is an automated email from the ASF dual-hosted git repository. eldenmoon pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.0 by this push: new 4ffd4a6d263 [Fix](SchemaChange) refactor variant root column iterator to make row read corret (#41942) 4ffd4a6d263 is described below commit 4ffd4a6d263c4d7b3e8e0dd99d5f8e6b601961e2 Author: lihangyu <15605149...@163.com> AuthorDate: Thu Oct 17 09:41:17 2024 +0800 [Fix](SchemaChange) refactor variant root column iterator to make row read corret (#41942) pick (#41700) --- be/src/olap/rowset/segment_v2/column_reader.cpp | 85 ++++++++++++------------- be/src/olap/rowset/segment_v2/column_reader.h | 3 + be/src/vec/columns/column_object.cpp | 6 ++ be/src/vec/columns/column_object.h | 2 + 4 files changed, 52 insertions(+), 44 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp b/be/src/olap/rowset/segment_v2/column_reader.cpp index e970250a973..e2e6e93f602 100644 --- a/be/src/olap/rowset/segment_v2/column_reader.cpp +++ b/be/src/olap/rowset/segment_v2/column_reader.cpp @@ -1581,28 +1581,15 @@ void DefaultValueColumnIterator::_insert_many_default(vectorized::MutableColumnP } } -Status VariantRootColumnIterator::next_batch(size_t* n, vectorized::MutableColumnPtr& dst, - bool* has_null) { - size_t size = dst->size(); +Status VariantRootColumnIterator::_process_root_column( + vectorized::MutableColumnPtr& dst, vectorized::MutableColumnPtr& root_column, + const vectorized::DataTypePtr& most_common_type) { auto& obj = dst->is_nullable() ? assert_cast<vectorized::ColumnObject&>( assert_cast<vectorized::ColumnNullable&>(*dst).get_nested_column()) : assert_cast<vectorized::ColumnObject&>(*dst); - if (obj.is_null_root()) { - obj.create_root(); - } - if (!obj.is_finalized()) { - obj.finalize(); - } - auto root_column = obj.get_root(); - RETURN_IF_ERROR(_inner_iter->next_batch(n, root_column, has_null)); - obj.incr_num_rows(*n); - for (auto& entry : obj.get_subcolumns()) { - if (entry->data.size() != size + *n) { - entry->data.insert_many_defaults(*n); - } - } + // fill nullmap if (root_column->is_nullable() && dst->is_nullable()) { vectorized::ColumnUInt8& dst_null_map = @@ -1611,47 +1598,57 @@ Status VariantRootColumnIterator::next_batch(size_t* n, vectorized::MutableColum assert_cast<vectorized::ColumnNullable&>(*root_column).get_null_map_column(); dst_null_map.insert_range_from(src_null_map, 0, src_null_map.size()); } + + // add root column to a tmp object column + auto tmp = vectorized::ColumnObject::create(true, false); + auto& tmp_obj = assert_cast<vectorized::ColumnObject&>(*tmp); + tmp_obj.add_sub_column({}, std::move(root_column), most_common_type); + + // merge tmp object column to dst + obj.insert_range_from(*tmp, 0, tmp->size()); + + // finalize object if needed + if (!obj.is_finalized()) { + obj.finalize(); + } + #ifndef NDEBUG obj.check_consistency(); #endif + return Status::OK(); } +Status VariantRootColumnIterator::next_batch(size_t* n, vectorized::MutableColumnPtr& dst, + bool* has_null) { + // read root column + auto& obj = + dst->is_nullable() + ? assert_cast<vectorized::ColumnObject&>( + assert_cast<vectorized::ColumnNullable&>(*dst).get_nested_column()) + : assert_cast<vectorized::ColumnObject&>(*dst); + + auto most_common_type = obj.get_most_common_type(); + auto root_column = most_common_type->create_column(); + RETURN_IF_ERROR(_inner_iter->next_batch(n, root_column, has_null)); + + return _process_root_column(dst, root_column, most_common_type); +} + Status VariantRootColumnIterator::read_by_rowids(const rowid_t* rowids, const size_t count, vectorized::MutableColumnPtr& dst) { - size_t size = dst->size(); + // read root column auto& obj = dst->is_nullable() ? assert_cast<vectorized::ColumnObject&>( assert_cast<vectorized::ColumnNullable&>(*dst).get_nested_column()) : assert_cast<vectorized::ColumnObject&>(*dst); - if (obj.is_null_root()) { - obj.create_root(); - } - if (!obj.is_finalized()) { - obj.finalize(); - } - auto root_column = obj.get_root(); + + auto most_common_type = obj.get_most_common_type(); + auto root_column = most_common_type->create_column(); RETURN_IF_ERROR(_inner_iter->read_by_rowids(rowids, count, root_column)); - obj.incr_num_rows(count); - for (auto& entry : obj.get_subcolumns()) { - if (entry->data.size() != (size + count)) { - entry->data.insert_many_defaults(count); - } - } - // fill nullmap - if (root_column->is_nullable() && dst->is_nullable()) { - vectorized::ColumnUInt8& dst_null_map = - assert_cast<vectorized::ColumnNullable&>(*dst).get_null_map_column(); - vectorized::ColumnUInt8& src_null_map = - assert_cast<vectorized::ColumnNullable&>(*root_column).get_null_map_column(); - DCHECK_EQ(src_null_map.size() - size, count); - dst_null_map.insert_range_from(src_null_map, size, count); - } -#ifndef NDEBUG - obj.check_consistency(); -#endif - return Status::OK(); + + return _process_root_column(dst, root_column, most_common_type); } Status DefaultNestedColumnIterator::next_batch(size_t* n, vectorized::MutableColumnPtr& dst) { diff --git a/be/src/olap/rowset/segment_v2/column_reader.h b/be/src/olap/rowset/segment_v2/column_reader.h index 6727ea7dc81..187490d06fb 100644 --- a/be/src/olap/rowset/segment_v2/column_reader.h +++ b/be/src/olap/rowset/segment_v2/column_reader.h @@ -654,6 +654,9 @@ public: ordinal_t get_current_ordinal() const override { return _inner_iter->get_current_ordinal(); } private: + Status _process_root_column(vectorized::MutableColumnPtr& dst, + vectorized::MutableColumnPtr& root_column, + const vectorized::DataTypePtr& most_common_type); std::unique_ptr<FileColumnIterator> _inner_iter; }; diff --git a/be/src/vec/columns/column_object.cpp b/be/src/vec/columns/column_object.cpp index 134b096d8ba..ef10e3b5f7b 100644 --- a/be/src/vec/columns/column_object.cpp +++ b/be/src/vec/columns/column_object.cpp @@ -1803,6 +1803,12 @@ void ColumnObject::create_root(const DataTypePtr& type, MutableColumnPtr&& colum add_sub_column({}, std::move(column), type); } +DataTypePtr ColumnObject::get_most_common_type() const { + auto type = is_nullable ? make_nullable(std::make_shared<MostCommonType>()) + : std::make_shared<MostCommonType>(); + return type; +} + bool ColumnObject::is_null_root() const { auto* root = subcolumns.get_root(); if (root == nullptr) { diff --git a/be/src/vec/columns/column_object.h b/be/src/vec/columns/column_object.h index 96a27e44e92..564082b0ef4 100644 --- a/be/src/vec/columns/column_object.h +++ b/be/src/vec/columns/column_object.h @@ -294,6 +294,8 @@ public: // create root with type and column if missing void create_root(const DataTypePtr& type, MutableColumnPtr&& column); + DataTypePtr get_most_common_type() const; + // root is null or type nothing bool is_null_root() const; --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org