Copilot commented on code in PR #61007:
URL: https://github.com/apache/doris/pull/61007#discussion_r2879052338


##########
cloud/src/recycler/recycler.cpp:
##########
@@ -3224,6 +3238,225 @@ int 
InstanceRecycler::decrement_packed_file_ref_counts(const doris::RowsetMetaCl
     return ret;
 }
 
+int InstanceRecycler::decrement_delete_bitmap_packed_file_ref_counts(int64_t 
tablet_id,
+                                                                     const 
std::string& rowset_id,
+                                                                     bool* 
out_is_packed) {
+    if (out_is_packed) {
+        *out_is_packed = false;
+    }
+
+    // Get delete bitmap storage info from FDB
+    std::string dbm_key = versioned::meta_delete_bitmap_key({instance_id_, 
tablet_id, rowset_id});
+    std::unique_ptr<Transaction> txn;
+    TxnErrorCode err = txn_kv_->create_txn(&txn);
+    if (err != TxnErrorCode::TXN_OK) {
+        LOG_WARNING("failed to create txn when getting delete bitmap storage")
+                .tag("instance_id", instance_id_)
+                .tag("tablet_id", tablet_id)
+                .tag("rowset_id", rowset_id)
+                .tag("err", err);
+        return -1;
+    }
+
+    std::string dbm_val;
+    err = txn->get(dbm_key, &dbm_val);
+    if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
+        // No delete bitmap for this rowset, nothing to do
+        LOG_INFO("delete bitmap not found, skip packed file ref count 
decrement")
+                .tag("instance_id", instance_id_)
+                .tag("tablet_id", tablet_id)
+                .tag("rowset_id", rowset_id);
+        return 0;
+    }
+    if (err != TxnErrorCode::TXN_OK) {
+        LOG_WARNING("failed to get delete bitmap storage")
+                .tag("instance_id", instance_id_)
+                .tag("tablet_id", tablet_id)
+                .tag("rowset_id", rowset_id)
+                .tag("err", err);
+        return -1;
+    }
+
+    DeleteBitmapStoragePB storage;
+    if (!storage.ParseFromString(dbm_val)) {
+        LOG_WARNING("failed to parse delete bitmap storage")
+                .tag("instance_id", instance_id_)
+                .tag("tablet_id", tablet_id)
+                .tag("rowset_id", rowset_id);
+        return -1;
+    }
+
+    // Check if delete bitmap is stored in packed file
+    if (!storage.has_packed_slice_location() ||
+        storage.packed_slice_location().packed_file_path().empty()) {
+        // Not stored in packed file, nothing to do
+        return 0;
+    }
+
+    if (out_is_packed) {
+        *out_is_packed = true;
+    }
+
+    const auto& packed_loc = storage.packed_slice_location();
+    const std::string& packed_file_path = packed_loc.packed_file_path();
+
+    LOG_INFO("decrementing delete bitmap packed file ref count")
+            .tag("instance_id", instance_id_)
+            .tag("tablet_id", tablet_id)
+            .tag("rowset_id", rowset_id)
+            .tag("packed_file_path", packed_file_path);
+
+    const int max_retry_times = std::max(1, 
config::decrement_packed_file_ref_counts_retry_times);
+    for (int attempt = 1; attempt <= max_retry_times; ++attempt) {
+        std::unique_ptr<Transaction> update_txn;
+        err = txn_kv_->create_txn(&update_txn);
+        if (err != TxnErrorCode::TXN_OK) {
+            LOG_WARNING("failed to create txn for delete bitmap packed file 
update")
+                    .tag("instance_id", instance_id_)
+                    .tag("tablet_id", tablet_id)
+                    .tag("rowset_id", rowset_id)
+                    .tag("err", err);
+            return -1;
+        }
+
+        std::string packed_key = packed_file_key({instance_id_, 
packed_file_path});
+        std::string packed_val;
+        err = update_txn->get(packed_key, &packed_val);
+        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
+            LOG_WARNING("packed file info not found for delete bitmap")
+                    .tag("instance_id", instance_id_)
+                    .tag("tablet_id", tablet_id)
+                    .tag("rowset_id", rowset_id)
+                    .tag("packed_file_path", packed_file_path);
+            return 0;
+        }
+        if (err != TxnErrorCode::TXN_OK) {
+            LOG_WARNING("failed to get packed file info for delete bitmap")
+                    .tag("instance_id", instance_id_)
+                    .tag("tablet_id", tablet_id)
+                    .tag("rowset_id", rowset_id)
+                    .tag("packed_file_path", packed_file_path)
+                    .tag("err", err);
+            return -1;
+        }
+
+        cloud::PackedFileInfoPB packed_info;
+        if (!packed_info.ParseFromString(packed_val)) {
+            LOG_WARNING("failed to parse packed file info for delete bitmap")
+                    .tag("instance_id", instance_id_)
+                    .tag("tablet_id", tablet_id)
+                    .tag("rowset_id", rowset_id)
+                    .tag("packed_file_path", packed_file_path);
+            return -1;
+        }
+
+        // Find and mark the small file entry as deleted
+        // Use tablet_id and rowset_id to match entry instead of path,
+        // because path format may vary with path_version (with or without 
shard prefix)
+        auto* entries = packed_info.mutable_slices();
+        bool found = false;
+        bool already_deleted = false;
+        for (auto& entry : *entries) {
+            if (entry.tablet_id() == tablet_id && entry.rowset_id() == 
rowset_id) {
+                if (!entry.deleted()) {
+                    entry.set_deleted(true);
+                    if (!entry.corrected()) {
+                        entry.set_corrected(true);
+                    }
+                } else {
+                    already_deleted = true;
+                }
+                found = true;
+                break;
+            }
+        }
+

Review Comment:
   `decrement_delete_bitmap_packed_file_ref_counts()` looks up the slice to 
mark deleted using only `tablet_id` + `rowset_id`. A packed file can contain 
multiple slices for the same rowset (all segments + delete-bitmap), and segment 
slices are already marked deleted by `decrement_packed_file_ref_counts()` 
earlier in `delete_rowset_data()`. This can cause this function to match an 
unrelated (already-deleted) segment entry and return early, leaving the 
delete-bitmap slice undeleted and the packed file `ref_cnt` incorrect. Consider 
matching the slice using the persisted `packed_slice_location` (e.g., `offset` 
+ `size`, optionally also `tablet_id/rowset_id`) instead of only 
`tablet_id/rowset_id`.
   ```suggestion
           // Prefer an undeleted slice for this tablet/rowset pair. There may 
be
           // multiple slices (segments + delete-bitmap) for the same rowset;
           // segment slices are expected to be marked deleted earlier, so here
           // we look for a remaining undeleted slice to mark as deleted.
           cloud::PackedFileSlicePB* target_slice = nullptr;
           for (auto& entry : *entries) {
               if (entry.tablet_id() == tablet_id && entry.rowset_id() == 
rowset_id) {
                   found = true;
                   if (!entry.deleted()) {
                       target_slice = &entry;
                       break; // prefer the first undeleted matching slice
                   } else {
                       // Remember that at least one matching slice is already 
deleted
                       already_deleted = true;
                   }
               }
           }
   
           if (target_slice != nullptr) {
               target_slice->set_deleted(true);
               if (!target_slice->corrected()) {
                   target_slice->set_corrected(true);
               }
           }
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to