yixiutt commented on code in PR #11195:
URL: https://github.com/apache/doris/pull/11195#discussion_r929819900


##########
be/src/olap/txn_manager.cpp:
##########
@@ -308,8 +309,108 @@ Status TxnManager::publish_txn(OlapMeta* meta, 
TPartitionId partition_id,
                 _clear_txn_partition_map_unlocked(transaction_id, 
partition_id);
             }
         }
+    }
+    auto tablet = 
StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id);
+#ifdef BE_TEST
+    if (tablet == nullptr) {
+        return Status::OK();
+    }
+#endif
+    // Check if have to build extra delete bitmap for table of UNIQUE_KEY model
+    if (!tablet->enable_unique_key_merge_on_write() ||
+        tablet->tablet_meta()->preferred_rowset_type() != 
RowsetTypePB::BETA_ROWSET ||
+        rowset_ptr->keys_type() != KeysType::UNIQUE_KEYS) {
         return Status::OK();
     }
+    CHECK(version.first == version.second) << "impossible: " << version;
+
+    // For each key in current set, check if it overwrites any previously
+    // written keys
+    OlapStopWatch watch;
+    std::vector<segment_v2::SegmentSharedPtr> segments;
+    std::vector<segment_v2::SegmentSharedPtr> pre_segments;
+    auto beta_rowset = reinterpret_cast<BetaRowset*>(rowset_ptr.get());
+    Status st = beta_rowset->load_segments(&segments);
+    if (!st.ok()) return st;
+    // lock tablet meta to modify delete bitmap
+    std::lock_guard<std::shared_mutex> meta_wrlock(tablet->get_header_lock());
+    for (auto& seg : segments) {
+        seg->load_index(); // We need index blocks to iterate
+        auto pk_idx = seg->get_primary_key_index();
+        int cnt = 0;
+        int total = pk_idx->num_rows();
+        int32_t remaining = total;
+        bool exact_match = false;
+        std::string last_key;
+        int batch_size = 1024;
+        MemPool pool;
+        while (remaining > 0) {
+            std::unique_ptr<segment_v2::IndexedColumnIterator> iter;
+            RETURN_IF_ERROR(pk_idx->new_iterator(&iter));
+
+            size_t num_to_read = std::min(batch_size, remaining);
+            std::unique_ptr<ColumnVectorBatch> cvb;
+            RETURN_IF_ERROR(ColumnVectorBatch::create(num_to_read, false, 
pk_idx->type_info(),
+                                                      nullptr, &cvb));
+            ColumnBlock block(cvb.get(), &pool);
+            ColumnBlockView column_block_view(&block);
+            Slice last_key_slice(last_key);
+            RETURN_IF_ERROR(iter->seek_at_or_after(&last_key_slice, 
&exact_match));
+
+            size_t num_read = num_to_read;
+            RETURN_IF_ERROR(iter->next_batch(&num_read, &column_block_view));
+            DCHECK(num_to_read == num_read);
+            last_key = (reinterpret_cast<const Slice*>(cvb->cell_ptr(num_read 
- 1)))->to_string();
+
+            // exclude last_key, last_key will be read in next batch.
+            if (num_read == batch_size && num_read != remaining) {
+                num_read -= 1;
+            }
+            for (size_t i = 0; i < num_read; i++) {
+                const Slice* key = reinterpret_cast<const 
Slice*>(cvb->cell_ptr(i));
+                // first check if exist in pre segment
+                bool find = _check_pk_in_pre_segments(pre_segments, *key, 
tablet, version);
+                if (find) {
+                    cnt++;

Review Comment:
   fixed



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to