eldenmoon commented on code in PR #48401:
URL: https://github.com/apache/doris/pull/48401#discussion_r1972954440
##########
be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp:
##########
@@ -469,39 +462,143 @@ void
SparseColumnExtractReader::_fill_path_column(vectorized::MutableColumnPtr&
#ifndef NDEBUG
var.check_consistency();
#endif
- _sparse_column->clear();
+ // _sparse_column->clear();
}
-Status SparseColumnExtractReader::next_batch(size_t* n,
vectorized::MutableColumnPtr& dst,
- bool* has_null) {
- _sparse_column->clear();
- RETURN_IF_ERROR(_sparse_column_reader->next_batch(n, _sparse_column,
has_null));
- const auto& offsets = assert_cast<const
vectorized::ColumnMap&>(*_sparse_column).get_offsets();
- // Check if we don't have any paths in shared data in current range.
- if (offsets.back() == offsets[-1]) {
- dst->insert_many_defaults(*n);
- } else {
- _fill_path_column(dst);
+Status SparseColumnMergeReader::seek_to_first() {
+ RETURN_IF_ERROR(_sparse_column_reader->seek_to_first());
+ for (auto& entry : _src_subcolumns_for_sparse) {
+ RETURN_IF_ERROR(entry->data.iterator->seek_to_first());
}
return Status::OK();
}
-Status SparseColumnExtractReader::read_by_rowids(const rowid_t* rowids, const
size_t count,
- vectorized::MutableColumnPtr&
dst) {
- _sparse_column->clear();
- RETURN_IF_ERROR(_sparse_column_reader->read_by_rowids(rowids, count,
_sparse_column));
- const auto& offsets = assert_cast<const
vectorized::ColumnMap&>(*_sparse_column).get_offsets();
- // Check if we don't have any paths in shared data in current range.
- if (offsets.back() == offsets[-1]) {
- dst->insert_many_defaults(count);
- } else {
- _fill_path_column(dst);
+Status SparseColumnMergeReader::seek_to_ordinal(ordinal_t ord) {
+ RETURN_IF_ERROR(_sparse_column_reader->seek_to_ordinal(ord));
+ for (auto& entry : _src_subcolumns_for_sparse) {
+ RETURN_IF_ERROR(entry->data.iterator->seek_to_ordinal(ord));
}
return Status::OK();
}
-ordinal_t SparseColumnExtractReader::get_current_ordinal() const {
- return _sparse_column_reader->get_current_ordinal();
+Status SparseColumnMergeReader::init(const ColumnIteratorOptions& opts) {
+ RETURN_IF_ERROR(_sparse_column_reader->init(opts));
+ for (auto& entry : _src_subcolumns_for_sparse) {
+ entry->data.serde = entry->data.type->get_serde();
+ RETURN_IF_ERROR(entry->data.iterator->init(opts));
+ const auto& path = entry->path.get_path();
+ _sorted_src_subcolumn_for_sparse.emplace_back(StringRef(path.data(),
path.size()), entry);
+ }
+ return Status::OK();
+}
+
+void SparseColumnMergeReader::_serialize_nullable_column_to_sparse(
+ const SubstreamReaderTree::Node* src_subcolumn,
+ vectorized::ColumnString& dst_sparse_column_paths,
+ vectorized::ColumnString& dst_sparse_column_values, const StringRef&
src_path, size_t row) {
+ // every subcolumn is always Nullable
+ const auto& nullable_serde =
+
assert_cast<vectorized::DataTypeNullableSerDe&>(*src_subcolumn->data.serde);
+ const auto& nullable_col =
+ assert_cast<const vectorized::ColumnNullable&,
TypeCheckOnRelease::DISABLE>(
+ *src_subcolumn->data.column);
+ if (nullable_col.is_null_at(row)) {
+ return;
+ }
+ // insert key
+ dst_sparse_column_paths.insert_data(src_path.data, src_path.size);
+ // insert value
+ vectorized::ColumnString::Chars& chars =
dst_sparse_column_values.get_chars();
+
nullable_serde.get_nested_serde()->write_one_cell_to_binary(nullable_col.get_nested_column(),
+ chars, row);
+ dst_sparse_column_values.get_offsets().push_back(chars.size());
+}
+
+void
SparseColumnMergeReader::_process_data_without_sparse_column(vectorized::MutableColumnPtr&
dst,
+ size_t
num_rows) {
+ if (_src_subcolumns_for_sparse.empty()) {
+ dst->insert_many_defaults(num_rows);
+ } else {
+ // merge subcolumns to sparse column
+ // Otherwise insert required src dense columns into sparse column.
+ auto& map_column = assert_cast<vectorized::ColumnMap&>(*dst);
+ auto& sparse_column_keys =
assert_cast<vectorized::ColumnString&>(map_column.get_keys());
+ auto& sparse_column_values =
+
assert_cast<vectorized::ColumnString&>(map_column.get_values());
+ auto& sparse_column_offsets = map_column.get_offsets();
+ for (size_t i = 0; i != num_rows; ++i) {
+ // Paths in sorted_src_subcolumn_for_sparse_column are already
sorted.
+ for (const auto& entry : _src_subcolumns_for_sparse) {
Review Comment:
sorted
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]