github-actions[bot] commented on code in PR #26749: URL: https://github.com/apache/doris/pull/26749#discussion_r1389079488
########## be/src/olap/rowset/segment_v2/segment.cpp: ########## @@ -341,6 +383,114 @@ Status Segment::_create_column_readers(const SegmentFooterPB& footer) { _file_reader, &reader)); _column_readers.emplace(column.unique_id(), std::move(reader)); } + + // init by column path + for (uint32_t ordinal = 0; ordinal < _tablet_schema->num_columns(); ++ordinal) { + auto& column = _tablet_schema->column(ordinal); + auto iter = column_path_to_footer_ordinal.find(column.path_info()); + if (iter == column_path_to_footer_ordinal.end()) { + continue; + } + ColumnReaderOptions opts; + opts.kept_in_memory = _tablet_schema->is_in_memory(); + std::unique_ptr<ColumnReader> reader; + RETURN_IF_ERROR(ColumnReader::create(opts, footer.columns(iter->second), footer.num_rows(), + _file_reader, &reader)); + _sub_column_tree.add( + iter->first, + SubcolumnReader {std::move(reader), + get_data_type_from_column_meta(footer.columns(iter->second))}); + } + return Status::OK(); +} + +static Status new_default_iterator(const TabletColumn& tablet_column, + std::unique_ptr<ColumnIterator>* iter) { + if (!tablet_column.has_default_value() && !tablet_column.is_nullable()) { + return Status::InternalError("invalid nonexistent column without default value."); + } + auto type_info = get_type_info(&tablet_column); + std::unique_ptr<DefaultValueColumnIterator> default_value_iter(new DefaultValueColumnIterator( + tablet_column.has_default_value(), tablet_column.default_value(), + tablet_column.is_nullable(), std::move(type_info), tablet_column.precision(), + tablet_column.frac())); + ColumnIteratorOptions iter_opts; + + RETURN_IF_ERROR(default_value_iter->init(iter_opts)); + *iter = std::move(default_value_iter); + return Status::OK(); +} + +Status Segment::new_iterator_with_path(const TabletColumn& tablet_column, + std::unique_ptr<ColumnIterator>* iter, + StorageReadOptions* opt) { + vectorized::PathInData root_path; + if (tablet_column.path_info().empty()) { + // Missing path info, but need read the whole variant column + root_path = vectorized::PathInData(tablet_column.name_lower_case()); + } else { + root_path = vectorized::PathInData({tablet_column.path_info().get_parts()[0]}); + } + auto root = _sub_column_tree.find_leaf(root_path); Review Comment: warning: method 'new_iterator_with_path' can be made static [readability-convert-member-functions-to-static] ```suggestion static Status Segment::new_iterator_with_path(const TabletColumn& tablet_column, ``` ########## be/src/olap/rowset/segment_v2/segment.cpp: ########## @@ -352,36 +502,53 @@ // but in the old schema column b's cid == 2 // but they are not the same column Status Segment::new_column_iterator(const TabletColumn& tablet_column, - std::unique_ptr<ColumnIterator>* iter) { + std::unique_ptr<ColumnIterator>* iter, + StorageReadOptions* opt) { + // init column iterator by path info + if (!tablet_column.path_info().empty() || tablet_column.is_variant_type()) { + return new_iterator_with_path(tablet_column, iter, opt); + } + // init default iterator if (_column_readers.count(tablet_column.unique_id()) < 1) { - if (!tablet_column.has_default_value() && !tablet_column.is_nullable()) { - return Status::InternalError("invalid nonexistent column without default value."); - } - auto type_info = get_type_info(&tablet_column); - std::unique_ptr<DefaultValueColumnIterator> default_value_iter( - new DefaultValueColumnIterator(tablet_column.has_default_value(), - tablet_column.default_value(), - tablet_column.is_nullable(), std::move(type_info), - tablet_column.precision(), tablet_column.frac())); - ColumnIteratorOptions iter_opts; - - RETURN_IF_ERROR(default_value_iter->init(iter_opts)); - *iter = std::move(default_value_iter); + RETURN_IF_ERROR(new_default_iterator(tablet_column, iter)); return Status::OK(); } + // init iterator by unique id ColumnIterator* it; RETURN_IF_ERROR(_column_readers.at(tablet_column.unique_id())->new_iterator(&it)); iter->reset(it); return Status::OK(); } +Status Segment::new_column_iterator(int32_t unique_id, std::unique_ptr<ColumnIterator>* iter) { + ColumnIterator* it; + RETURN_IF_ERROR(_column_readers.at(unique_id)->new_iterator(&it)); + iter->reset(it); + return Status::OK(); +} + +ColumnReader* Segment::_get_column_reader(const TabletColumn& col) { + // init column iterator by path info + if (!col.path_info().empty() || col.is_variant_type()) { + auto node = _sub_column_tree.find_exact(col.path_info()); Review Comment: warning: method 'new_column_iterator' can be made static [readability-convert-member-functions-to-static] ```suggestion static Status Segment::new_column_iterator(int32_t unique_id, std::unique_ptr<ColumnIterator>* iter) { ``` ########## be/src/vec/exec/scan/new_olap_scan_node.cpp: ########## @@ -416,6 +421,49 @@ std::string NewOlapScanNode::get_name() { return fmt::format("VNewOlapScanNode({0})", _olap_scan_node.table_name); } +void NewOlapScanNode::_filter_and_collect_cast_type_for_variant( Review Comment: warning: method '_filter_and_collect_cast_type_for_variant' can be made static [readability-convert-member-functions-to-static] ```suggestion static void NewOlapScanNode::_filter_and_collect_cast_type_for_variant( ``` ########## be/src/olap/rowset/segment_v2/segment.cpp: ########## @@ -483,5 +649,25 @@ return Status::OK(); } +bool Segment::same_with_storage_type(int32_t cid, const Schema& schema, + bool ignore_children) const { + auto file_column_type = get_data_type_of(*schema.column(cid), ignore_children); + auto expected_type = Schema::get_data_type_ptr(*schema.column(cid)); + // ignore struct and map now + auto type_without_nullable = vectorized::remove_nullable(expected_type); + if (vectorized::WhichDataType(type_without_nullable).is_struct() || + vectorized::WhichDataType(type_without_nullable).is_map()) { + return true; + } +#ifndef NDEBUG Review Comment: warning: method 'same_with_storage_type' can be made static [readability-convert-member-functions-to-static] be/src/olap/rowset/segment_v2/segment.cpp:662: ```diff - bool ignore_children) const { + bool ignore_children) { ``` be/src/olap/rowset/segment_v2/segment.h:150: ```diff - bool same_with_storage_type(int32_t cid, const Schema& schema, bool ignore_children) const; + static bool same_with_storage_type(int32_t cid, const Schema& schema, bool ignore_children) ; ``` ########## be/src/olap/rowset/segment_v2/segment.cpp: ########## @@ -352,36 +502,53 @@ // but in the old schema column b's cid == 2 // but they are not the same column Status Segment::new_column_iterator(const TabletColumn& tablet_column, - std::unique_ptr<ColumnIterator>* iter) { + std::unique_ptr<ColumnIterator>* iter, + StorageReadOptions* opt) { + // init column iterator by path info + if (!tablet_column.path_info().empty() || tablet_column.is_variant_type()) { + return new_iterator_with_path(tablet_column, iter, opt); + } + // init default iterator if (_column_readers.count(tablet_column.unique_id()) < 1) { - if (!tablet_column.has_default_value() && !tablet_column.is_nullable()) { - return Status::InternalError("invalid nonexistent column without default value."); - } - auto type_info = get_type_info(&tablet_column); - std::unique_ptr<DefaultValueColumnIterator> default_value_iter( - new DefaultValueColumnIterator(tablet_column.has_default_value(), - tablet_column.default_value(), - tablet_column.is_nullable(), std::move(type_info), - tablet_column.precision(), tablet_column.frac())); - ColumnIteratorOptions iter_opts; - - RETURN_IF_ERROR(default_value_iter->init(iter_opts)); - *iter = std::move(default_value_iter); + RETURN_IF_ERROR(new_default_iterator(tablet_column, iter)); return Status::OK(); } + // init iterator by unique id ColumnIterator* it; RETURN_IF_ERROR(_column_readers.at(tablet_column.unique_id())->new_iterator(&it)); iter->reset(it); return Status::OK(); } +Status Segment::new_column_iterator(int32_t unique_id, std::unique_ptr<ColumnIterator>* iter) { + ColumnIterator* it; + RETURN_IF_ERROR(_column_readers.at(unique_id)->new_iterator(&it)); + iter->reset(it); + return Status::OK(); +} + +ColumnReader* Segment::_get_column_reader(const TabletColumn& col) { + // init column iterator by path info + if (!col.path_info().empty() || col.is_variant_type()) { + auto node = _sub_column_tree.find_exact(col.path_info()); + if (node != nullptr) { + return node->data.reader.get(); + } + return nullptr; + } + auto col_unique_id = col.unique_id(); + if (_column_readers.count(col_unique_id) > 0) { Review Comment: warning: method '_get_column_reader' can be made static [readability-convert-member-functions-to-static] be/src/olap/rowset/segment_v2/segment.h:180: ```diff - ColumnReader* _get_column_reader(const TabletColumn& col); + static ColumnReader* _get_column_reader(const TabletColumn& col); ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org