adonis0147 commented on code in PR #12293: URL: https://github.com/apache/doris/pull/12293#discussion_r962113979
########## be/src/olap/rowset/segment_v2/column_reader.h: ########## @@ -394,31 +394,47 @@ class ArrayFileColumnIterator final : public ColumnIterator { if (_array_reader->is_nullable()) { RETURN_IF_ERROR(_null_iterator->seek_to_ordinal(ord)); } - - RETURN_IF_ERROR(_length_iterator->seek_to_page_start()); - if (_length_iterator->get_current_ordinal() == ord) { - RETURN_IF_ERROR(_item_iterator->seek_to_ordinal( - _length_iterator->get_current_page()->first_array_item_ordinal)); - } else { - ordinal_t start_offset_in_this_page = - _length_iterator->get_current_page()->first_array_item_ordinal; + if (_length_iterator->get_current_page()->next_array_item_ordinal > 0) { + // using offsets info ColumnBlock ordinal_block(_length_batch.get(), nullptr); - ordinal_t size_to_read = ord - _length_iterator->get_current_ordinal(); + ColumnBlockView ordinal_view(&ordinal_block); + + // peek offset + size_t this_read = 1; bool has_null = false; - ordinal_t item_ordinal = start_offset_in_this_page; - while (size_to_read > 0) { - size_t this_read = _length_batch->capacity() < size_to_read - ? _length_batch->capacity() - : size_to_read; - ColumnBlockView ordinal_view(&ordinal_block); - RETURN_IF_ERROR(_length_iterator->next_batch(&this_read, &ordinal_view, &has_null)); - auto* ordinals = reinterpret_cast<uint64_t*>(_length_batch->data()); - for (int i = 0; i < this_read; ++i) { - item_ordinal += ordinals[i]; + RETURN_IF_ERROR(_length_iterator->next_batch(&this_read, &ordinal_view, &has_null)); + RETURN_IF_ERROR(_length_iterator->seek_to_ordinal(ord)); + + auto* ordinals = reinterpret_cast<uint64_t*>(_length_batch->data()); + RETURN_IF_ERROR(_item_iterator->seek_to_ordinal(ordinals[0])); Review Comment: ditto ########## be/src/olap/rowset/segment_v2/column_reader.cpp: ########## @@ -481,13 +481,38 @@ Status ArrayFileColumnIterator::next_batch(size_t* n, vectorized::MutableColumnP } auto& column_offsets = static_cast<vectorized::ColumnArray::ColumnOffsets&>(*column_offsets_ptr); - auto& offsets_data = column_offsets.get_data(); - for (ssize_t i = start; i < offsets_data.size(); ++i) { - offsets_data[i] += offsets_data[i - 1]; // -1 is ok + size_t num_items = 0; + if (_length_iterator->get_current_page()->next_array_item_ordinal > 0) { + // use offsets info + if (_length_iterator->get_current_page()->has_remaining()) { + // peek read one more to get the last array's length + size_t i = 1; + ordinal_t save = _length_iterator->get_current_ordinal(); + RETURN_IF_ERROR( + _length_iterator->next_batch(&i, column_offsets_ptr, &offsets_has_null)); + RETURN_IF_ERROR(_length_iterator->seek_to_ordinal(save)); + } else { + column_offsets_ptr->insert( + _length_iterator->get_current_page()->next_array_item_ordinal); + } + + auto& offsets_data = column_offsets.get_data(); + num_items = offsets_data.back() - offsets_data[start]; + // caculate real offsets + for (ssize_t i = start; i < offsets_data.size() - 1; ++i) { + offsets_data[i] = offsets_data[i - 1] + (offsets_data[i + 1] - offsets_data[i]); + } + column_offsets.pop_back(1); + } else { + // for compability + auto& offsets_data = column_offsets.get_data(); + for (ssize_t i = start; i < offsets_data.size(); ++i) { + offsets_data[i] += offsets_data[i - 1]; // -1 is ok + } + num_items = offsets_data.back() - offsets_data[start - 1]; Review Comment: This modification makes the function more complex than before, I think you could encapsulate them into a new function. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org