This is an automated email from the ASF dual-hosted git repository.
dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new 9d659073a75 [cherry-pick](branch-3.0) Don't prematurely erase
DeleteRows in reading iceberg table with position delete (#47977) (#48309)
9d659073a75 is described below
commit 9d659073a756c0c1d4d5ffce3967a976f32ced61
Author: Socrates <[email protected]>
AuthorDate: Tue Mar 25 11:40:39 2025 +0800
[cherry-pick](branch-3.0) Don't prematurely erase DeleteRows in reading
iceberg table with position delete (#47977) (#48309)
Issue Number: close #41460
Problem Summary:
When reading the Iceberg table, previously read `DeleteRows` should not
be released immediately, as the Iceberg data file is split into multiple
`IcebergSplit`s for execution. These `IcebergSplit`s belong to the same
data file, meaning they share the same `DeleteRows`. Therefore,
`DeleteRows` in the `DeleteFile` should not be released prematurely.
Instead, they should be released when the shared_kv is reset, at which
point all `DeleteRows` will be freed along with the cached `DeleteFile`.
---
be/src/vec/exec/format/table/iceberg_reader.cpp | 8 +-------
1 file changed, 1 insertion(+), 7 deletions(-)
diff --git a/be/src/vec/exec/format/table/iceberg_reader.cpp
b/be/src/vec/exec/format/table/iceberg_reader.cpp
index 21a98f79acb..e4f8145a2fe 100644
--- a/be/src/vec/exec/format/table/iceberg_reader.cpp
+++ b/be/src/vec/exec/format/table/iceberg_reader.cpp
@@ -302,8 +302,7 @@ Status IcebergTableReader::_position_delete_base(
const std::string data_file_path, const
std::vector<TIcebergDeleteFileDesc>& delete_files) {
std::vector<DeleteRows*> delete_rows_array;
int64_t num_delete_rows = 0;
- std::vector<DeleteFile*> erase_data;
- for (auto& delete_file : delete_files) {
+ for (const auto& delete_file : delete_files) {
SCOPED_TIMER(_iceberg_profile.delete_files_read_time);
Status create_status = Status::OK();
auto* delete_file_cache = _kv_cache->get<DeleteFile>(
@@ -337,7 +336,6 @@ Status IcebergTableReader::_position_delete_base(
if (row_ids->size() > 0) {
delete_rows_array.emplace_back(row_ids);
num_delete_rows += row_ids->size();
- erase_data.emplace_back(delete_file_cache);
}
};
delete_file_map.if_contains(data_file_path, get_value);
@@ -348,10 +346,6 @@ Status IcebergTableReader::_position_delete_base(
this->set_delete_rows();
COUNTER_UPDATE(_iceberg_profile.num_delete_rows, num_delete_rows);
}
- // the deleted rows are copy out, we can erase them.
- for (auto& erase_item : erase_data) {
- erase_item->erase(data_file_path);
- }
return Status::OK();
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]