This is an automated email from the ASF dual-hosted git repository.

morrysnow pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-3.1 by this push:
     new dccfc861ed3 branch-3.1: [Opt](cloud-mow) BE can limit the size of 
`get_delete_bitmap` response roughly #55712 (#56532)
dccfc861ed3 is described below

commit dccfc861ed37a411dae3bcf7e1dd4822ccfc868c
Author: bobhan1 <bao...@selectdb.com>
AuthorDate: Sun Sep 28 15:11:27 2025 +0800

    branch-3.1: [Opt](cloud-mow) BE can limit the size of `get_delete_bitmap` 
response roughly #55712 (#56532)
    
    picked from #55712
---
 be/src/cloud/cloud_meta_mgr.cpp                    | 140 ++++++++----
 be/src/cloud/cloud_meta_mgr.h                      |   5 +
 be/src/cloud/config.cpp                            |   7 +
 be/src/cloud/config.h                              |   3 +
 be/test/cloud/cloud_meta_mgr_test.cpp              | 245 +++++++++++++++++++++
 cloud/src/meta-service/meta_service.cpp            |  17 ++
 gensrc/proto/cloud.proto                           |   5 +
 .../pipeline/cloud_p0/conf/be_custom.conf          |   3 +
 8 files changed, 387 insertions(+), 38 deletions(-)

diff --git a/be/src/cloud/cloud_meta_mgr.cpp b/be/src/cloud/cloud_meta_mgr.cpp
index 5be30dcac79..572f168a462 100644
--- a/be/src/cloud/cloud_meta_mgr.cpp
+++ b/be/src/cloud/cloud_meta_mgr.cpp
@@ -834,6 +834,101 @@ bool 
CloudMetaMgr::sync_tablet_delete_bitmap_by_cache(CloudTablet* tablet, int64
     return true;
 }
 
+Status CloudMetaMgr::_get_delete_bitmap_from_ms(GetDeleteBitmapRequest& req,
+                                                GetDeleteBitmapResponse& res) {
+    VLOG_DEBUG << "send GetDeleteBitmapRequest: " << req.ShortDebugString();
+    TEST_SYNC_POINT_CALLBACK("CloudMetaMgr::_get_delete_bitmap_from_ms", &req, 
&res);
+
+    auto st = retry_rpc("get delete bitmap", req, &res, 
&MetaService_Stub::get_delete_bitmap);
+    if (st.code() == ErrorCode::THRIFT_RPC_ERROR) {
+        return st;
+    }
+
+    if (res.status().code() == MetaServiceCode::TABLET_NOT_FOUND) {
+        return Status::NotFound("failed to get delete bitmap: {}", 
res.status().msg());
+    }
+    // The delete bitmap of stale rowsets will be removed when commit 
compaction job,
+    // then delete bitmap of stale rowsets cannot be obtained. But the rowsets 
obtained
+    // by sync_tablet_rowsets may include these stale rowsets. When this case 
happend, the
+    // error code of ROWSETS_EXPIRED will be returned, we need to retry sync 
rowsets again.
+    //
+    // Be query thread             meta-service          Be compaction thread
+    //      |                            |                         |
+    //      |        get rowset          |                         |
+    //      |--------------------------->|                         |
+    //      |    return get rowset       |                         |
+    //      |<---------------------------|                         |
+    //      |                            |        commit job       |
+    //      |                            |<------------------------|
+    //      |                            |    return commit job    |
+    //      |                            |------------------------>|
+    //      |      get delete bitmap     |                         |
+    //      |--------------------------->|                         |
+    //      |  return get delete bitmap  |                         |
+    //      |<---------------------------|                         |
+    //      |                            |                         |
+    if (res.status().code() == MetaServiceCode::ROWSETS_EXPIRED) {
+        return Status::Error<ErrorCode::ROWSETS_EXPIRED, false>("failed to get 
delete bitmap: {}",
+                                                                
res.status().msg());
+    }
+    if (res.status().code() != MetaServiceCode::OK) {
+        return Status::Error<ErrorCode::INTERNAL_ERROR, false>("failed to get 
delete bitmap: {}",
+                                                               
res.status().msg());
+    }
+    return Status::OK();
+}
+
+Status 
CloudMetaMgr::_get_delete_bitmap_from_ms_by_batch(GetDeleteBitmapRequest& req,
+                                                         
GetDeleteBitmapResponse& res,
+                                                         int64_t 
bytes_threadhold) {
+    std::unordered_set<std::string> finished_rowset_ids {};
+    int count = 0;
+    do {
+        GetDeleteBitmapRequest cur_req;
+        GetDeleteBitmapResponse cur_res;
+
+        cur_req.set_cloud_unique_id(config::cloud_unique_id);
+        cur_req.set_tablet_id(req.tablet_id());
+        cur_req.set_base_compaction_cnt(req.base_compaction_cnt());
+        cur_req.set_cumulative_compaction_cnt(req.cumulative_compaction_cnt());
+        cur_req.set_cumulative_point(req.cumulative_point());
+        *(cur_req.mutable_idx()) = req.idx();
+        if (bytes_threadhold > 0) {
+            cur_req.set_dbm_bytes_threshold(bytes_threadhold);
+        }
+        for (int i = 0; i < req.rowset_ids_size(); i++) {
+            if (!finished_rowset_ids.contains(req.rowset_ids(i))) {
+                cur_req.add_rowset_ids(req.rowset_ids(i));
+                cur_req.add_begin_versions(req.begin_versions(i));
+                cur_req.add_end_versions(req.end_versions(i));
+            }
+        }
+
+        RETURN_IF_ERROR(_get_delete_bitmap_from_ms(cur_req, cur_res));
+        ++count;
+
+        res.mutable_rowset_ids()->MergeFrom(cur_res.rowset_ids());
+        res.mutable_segment_ids()->MergeFrom(cur_res.segment_ids());
+        res.mutable_versions()->MergeFrom(cur_res.versions());
+        
res.mutable_segment_delete_bitmaps()->MergeFrom(cur_res.segment_delete_bitmaps());
+
+        for (const auto& rowset_id : cur_res.returned_rowset_ids()) {
+            finished_rowset_ids.insert(rowset_id);
+        }
+
+        bool has_more = cur_res.has_has_more() && cur_res.has_more();
+        if (!has_more) {
+            break;
+        }
+        LOG_INFO("batch get delete bitmap, progress={}/{}", 
finished_rowset_ids.size(),
+                 req.rowset_ids_size())
+                .tag("tablet_id", req.tablet_id())
+                .tag("cur_returned_rowsets", 
cur_res.returned_rowset_ids_size())
+                .tag("rpc_count", count);
+    } while (finished_rowset_ids.size() < req.rowset_ids_size());
+    return Status::OK();
+}
+
 Status CloudMetaMgr::sync_tablet_delete_bitmap(CloudTablet* tablet, int64_t 
old_max_version,
                                                std::ranges::range auto&& 
rs_metas,
                                                const TabletStatsPB& stats, 
const TabletIndexPB& idx,
@@ -887,46 +982,15 @@ Status 
CloudMetaMgr::sync_tablet_delete_bitmap(CloudTablet* tablet, int64_t old_
         sync_stats->get_remote_delete_bitmap_rowsets_num += 
req.rowset_ids_size();
     }
 
-    VLOG_DEBUG << "send GetDeleteBitmapRequest: " << req.ShortDebugString();
-
     auto start = std::chrono::steady_clock::now();
-    auto st = retry_rpc("get delete bitmap", req, &res, 
&MetaService_Stub::get_delete_bitmap);
-    auto end = std::chrono::steady_clock::now();
-    if (st.code() == ErrorCode::THRIFT_RPC_ERROR) {
-        return st;
+    if (config::enable_batch_get_delete_bitmap) {
+        RETURN_IF_ERROR(_get_delete_bitmap_from_ms_by_batch(
+                req, res, config::get_delete_bitmap_bytes_threshold));
+    } else {
+        RETURN_IF_ERROR(_get_delete_bitmap_from_ms(req, res));
     }
+    auto end = std::chrono::steady_clock::now();
 
-    if (res.status().code() == MetaServiceCode::TABLET_NOT_FOUND) {
-        return Status::NotFound("failed to get delete bitmap: {}", 
res.status().msg());
-    }
-    // The delete bitmap of stale rowsets will be removed when commit 
compaction job,
-    // then delete bitmap of stale rowsets cannot be obtained. But the rowsets 
obtained
-    // by sync_tablet_rowsets may include these stale rowsets. When this case 
happend, the
-    // error code of ROWSETS_EXPIRED will be returned, we need to retry sync 
rowsets again.
-    //
-    // Be query thread             meta-service          Be compaction thread
-    //      |                            |                         |
-    //      |        get rowset          |                         |
-    //      |--------------------------->|                         |
-    //      |    return get rowset       |                         |
-    //      |<---------------------------|                         |
-    //      |                            |        commit job       |
-    //      |                            |<------------------------|
-    //      |                            |    return commit job    |
-    //      |                            |------------------------>|
-    //      |      get delete bitmap     |                         |
-    //      |--------------------------->|                         |
-    //      |  return get delete bitmap  |                         |
-    //      |<---------------------------|                         |
-    //      |                            |                         |
-    if (res.status().code() == MetaServiceCode::ROWSETS_EXPIRED) {
-        return Status::Error<ErrorCode::ROWSETS_EXPIRED, false>("failed to get 
delete bitmap: {}",
-                                                                
res.status().msg());
-    }
-    if (res.status().code() != MetaServiceCode::OK) {
-        return Status::Error<ErrorCode::INTERNAL_ERROR, false>("failed to get 
delete bitmap: {}",
-                                                               
res.status().msg());
-    }
     const auto& rowset_ids = res.rowset_ids();
     const auto& segment_ids = res.segment_ids();
     const auto& vers = res.versions();
@@ -959,7 +1023,7 @@ Status 
CloudMetaMgr::sync_tablet_delete_bitmap(CloudTablet* tablet, int64_t old_
                   << ", delete_bitmaps.size()=" << delete_bitmaps.size() << ", 
latency=" << latency
                   << "us";
     } else {
-        LOG_EVERY_N(INFO, 100) << "finish get_delete_bitmap rpc. 
rowset_ids.size()="
+        LOG_EVERY_N(INFO, 100) << "finish get_delete_bitmap rpcs. 
rowset_ids.size()="
                                << rowset_ids.size()
                                << ", delete_bitmaps.size()=" << 
delete_bitmaps.size()
                                << ", latency=" << latency << "us";
diff --git a/be/src/cloud/cloud_meta_mgr.h b/be/src/cloud/cloud_meta_mgr.h
index bd3acbf8215..6fb5fdd03b1 100644
--- a/be/src/cloud/cloud_meta_mgr.h
+++ b/be/src/cloud/cloud_meta_mgr.h
@@ -171,6 +171,11 @@ private:
                                      const TabletIndexPB& idx, DeleteBitmap* 
delete_bitmap,
                                      bool full_sync = false, SyncRowsetStats* 
sync_stats = nullptr);
 
+    Status _get_delete_bitmap_from_ms(GetDeleteBitmapRequest& req, 
GetDeleteBitmapResponse& res);
+    Status _get_delete_bitmap_from_ms_by_batch(GetDeleteBitmapRequest& req,
+                                               GetDeleteBitmapResponse& res,
+                                               int64_t bytes_threadhold);
+
     void check_table_size_correctness(const RowsetMeta& rs_meta);
     int64_t get_segment_file_size(const RowsetMeta& rs_meta);
     int64_t get_inverted_index_file_szie(const RowsetMeta& rs_meta);
diff --git a/be/src/cloud/config.cpp b/be/src/cloud/config.cpp
index 16d6aa7f782..aae959bce06 100644
--- a/be/src/cloud/config.cpp
+++ b/be/src/cloud/config.cpp
@@ -72,6 +72,13 @@ DEFINE_mInt32(delete_bitmap_lock_expiration_seconds, "10");
 
 DEFINE_mInt32(get_delete_bitmap_lock_max_retry_times, "100");
 
+DEFINE_mBool(enable_batch_get_delete_bitmap, "false");
+// used in get_delete_bitmap rpc
+// The MS will return the current results to BE immediately when the size of 
delete bitmap
+// in memory fetched from fdb reached this theshold the first time, and BE 
will make subsequent RPCs
+// to get the remaining rowsets' results.
+DEFINE_mInt64(get_delete_bitmap_bytes_threshold, "524288000"); // 500MB
+
 DEFINE_Bool(enable_cloud_txn_lazy_commit, "false");
 
 DEFINE_mInt32(remove_expired_tablet_txn_info_interval_seconds, "300");
diff --git a/be/src/cloud/config.h b/be/src/cloud/config.h
index a52ac758e67..e45dac886a5 100644
--- a/be/src/cloud/config.h
+++ b/be/src/cloud/config.h
@@ -107,6 +107,9 @@ DECLARE_mInt32(delete_bitmap_lock_expiration_seconds);
 
 DECLARE_mInt32(get_delete_bitmap_lock_max_retry_times);
 
+DECLARE_mBool(enable_batch_get_delete_bitmap);
+DECLARE_mInt64(get_delete_bitmap_bytes_threshold);
+
 // Skip writing empty rowset metadata to meta service
 DECLARE_mBool(skip_writing_empty_rowset_metadata);
 
diff --git a/be/test/cloud/cloud_meta_mgr_test.cpp 
b/be/test/cloud/cloud_meta_mgr_test.cpp
index b925dd6f645..ec266b959f4 100644
--- a/be/test/cloud/cloud_meta_mgr_test.cpp
+++ b/be/test/cloud/cloud_meta_mgr_test.cpp
@@ -25,6 +25,8 @@
 
 #include "cloud/cloud_storage_engine.h"
 #include "cloud/cloud_tablet.h"
+#include "cpp/sync_point.h"
+#include "gen_cpp/cloud.pb.h"
 #include "olap/olap_common.h"
 #include "olap/rowset/rowset_factory.h"
 #include "olap/rowset/rowset_meta.h"
@@ -624,4 +626,247 @@ TEST_F(CloudMetaMgrTest, 
test_fill_version_holes_mixed_holes) {
     }
 }
 
+// Helper class to access private methods for testing
+class CloudMetaMgrTestHelper {
+public:
+    static Status call_get_delete_bitmap_from_ms_by_batch(CloudMetaMgr& 
meta_mgr,
+                                                          
GetDeleteBitmapRequest& req,
+                                                          
GetDeleteBitmapResponse& res,
+                                                          int64_t 
bytes_threshold) {
+        return meta_mgr._get_delete_bitmap_from_ms_by_batch(req, res, 
bytes_threshold);
+    }
+};
+
+TEST_F(CloudMetaMgrTest, test_get_delete_bitmap_from_ms_by_batch) {
+    CloudMetaMgr meta_mgr;
+    auto sp = SyncPoint::get_instance();
+
+    // Test case 1: Single batch (no more data)
+    {
+        sp->clear_all_call_backs();
+        sp->enable_processing();
+
+        GetDeleteBitmapRequest req;
+        req.set_tablet_id(12345);
+        req.set_base_compaction_cnt(1);
+        req.set_cumulative_compaction_cnt(2);
+        req.set_cumulative_point(10);
+        req.add_rowset_ids("rowset_1");
+        req.add_begin_versions(1);
+        req.add_end_versions(1);
+        req.add_rowset_ids("rowset_2");
+        req.add_begin_versions(2);
+        req.add_end_versions(2);
+
+        // Mock the _get_delete_bitmap_from_ms method to return success with 
no more data
+        sp->set_call_back("CloudMetaMgr::_get_delete_bitmap_from_ms", 
[](auto&& args) {
+            auto* res = try_any_cast<GetDeleteBitmapResponse*>(args[1]);
+
+            // Simulate successful response
+            res->mutable_status()->set_code(MetaServiceCode::OK);
+            res->add_rowset_ids("rowset_1");
+            res->add_segment_ids(0);
+            res->add_versions(1);
+            res->add_segment_delete_bitmaps("delete_bitmap_1");
+            res->add_rowset_ids("rowset_2");
+            res->add_segment_ids(0);
+            res->add_versions(2);
+            res->add_segment_delete_bitmaps("delete_bitmap_2");
+            res->add_returned_rowset_ids("rowset_1");
+            res->add_returned_rowset_ids("rowset_2");
+            res->set_has_more(false); // No more data
+        });
+
+        GetDeleteBitmapResponse res;
+        Status status = 
CloudMetaMgrTestHelper::call_get_delete_bitmap_from_ms_by_batch(
+                meta_mgr, req, res, 1024);
+
+        EXPECT_TRUE(status.ok()) << "Status: " << status;
+        EXPECT_EQ(res.rowset_ids_size(), 2);
+        EXPECT_EQ(res.rowset_ids(0), "rowset_1");
+        EXPECT_EQ(res.rowset_ids(1), "rowset_2");
+        EXPECT_EQ(res.segment_delete_bitmaps_size(), 2);
+
+        sp->disable_processing();
+        sp->clear_all_call_backs();
+    }
+
+    // Test case 2: Two batches (has_more = true)
+    {
+        sp->clear_all_call_backs();
+        sp->enable_processing();
+
+        GetDeleteBitmapRequest req;
+        req.set_tablet_id(12345);
+        req.set_base_compaction_cnt(1);
+        req.set_cumulative_compaction_cnt(2);
+        req.set_cumulative_point(10);
+        req.add_rowset_ids("rowset_1");
+        req.add_begin_versions(1);
+        req.add_end_versions(1);
+        req.add_rowset_ids("rowset_2");
+        req.add_begin_versions(2);
+        req.add_end_versions(2);
+        req.add_rowset_ids("rowset_3");
+        req.add_begin_versions(3);
+        req.add_end_versions(3);
+
+        int call_count = 0;
+        // Mock the _get_delete_bitmap_from_ms method to simulate two batches
+        sp->set_call_back("CloudMetaMgr::_get_delete_bitmap_from_ms", 
[&call_count](auto&& args) {
+            auto* res = try_any_cast<GetDeleteBitmapResponse*>(args[1]);
+
+            call_count++;
+            res->mutable_status()->set_code(MetaServiceCode::OK);
+
+            if (call_count == 1) {
+                // First batch: return partial data with has_more=true
+                res->add_rowset_ids("rowset_1");
+                res->add_segment_ids(0);
+                res->add_versions(1);
+                res->add_segment_delete_bitmaps("delete_bitmap_1");
+                res->add_returned_rowset_ids("rowset_1");
+                res->set_has_more(true); // More data available
+            } else if (call_count == 2) {
+                // Second batch: return remaining data with has_more=false
+                res->add_rowset_ids("rowset_2");
+                res->add_segment_ids(0);
+                res->add_versions(2);
+                res->add_segment_delete_bitmaps("delete_bitmap_2");
+                res->add_rowset_ids("rowset_3");
+                res->add_segment_ids(0);
+                res->add_versions(3);
+                res->add_segment_delete_bitmaps("delete_bitmap_3");
+                res->add_returned_rowset_ids("rowset_2");
+                res->add_returned_rowset_ids("rowset_3");
+                res->set_has_more(false); // No more data
+            }
+        });
+
+        GetDeleteBitmapResponse res;
+        Status status = 
CloudMetaMgrTestHelper::call_get_delete_bitmap_from_ms_by_batch(
+                meta_mgr, req, res, 512);
+
+        EXPECT_TRUE(status.ok()) << "Status: " << status;
+        EXPECT_EQ(call_count, 2); // Should have made 2 RPC calls
+        EXPECT_EQ(res.rowset_ids_size(), 3);
+        EXPECT_EQ(res.rowset_ids(0), "rowset_1");
+        EXPECT_EQ(res.rowset_ids(1), "rowset_2");
+        EXPECT_EQ(res.rowset_ids(2), "rowset_3");
+        EXPECT_EQ(res.segment_delete_bitmaps_size(), 3);
+
+        sp->disable_processing();
+        sp->clear_all_call_backs();
+    }
+
+    // Test case 3: Multiple batches (more than 2 batches)
+    {
+        sp->clear_all_call_backs();
+        sp->enable_processing();
+
+        GetDeleteBitmapRequest req;
+        req.set_tablet_id(12345);
+        req.set_base_compaction_cnt(1);
+        req.set_cumulative_compaction_cnt(2);
+        req.set_cumulative_point(10);
+        // Add 5 rowsets to test multiple batches
+        for (int i = 1; i <= 5; ++i) {
+            req.add_rowset_ids("rowset_" + std::to_string(i));
+            req.add_begin_versions(i);
+            req.add_end_versions(i);
+        }
+
+        int call_count = 0;
+        // Mock the _get_delete_bitmap_from_ms method to simulate 3 batches
+        sp->set_call_back("CloudMetaMgr::_get_delete_bitmap_from_ms", 
[&call_count](auto&& args) {
+            auto* res = try_any_cast<GetDeleteBitmapResponse*>(args[1]);
+
+            call_count++;
+            res->mutable_status()->set_code(MetaServiceCode::OK);
+
+            if (call_count == 1) {
+                // First batch: return rowset_1 and rowset_2
+                res->add_rowset_ids("rowset_1");
+                res->add_segment_ids(0);
+                res->add_versions(1);
+                res->add_segment_delete_bitmaps("delete_bitmap_1");
+                res->add_rowset_ids("rowset_2");
+                res->add_segment_ids(0);
+                res->add_versions(2);
+                res->add_segment_delete_bitmaps("delete_bitmap_2");
+                res->add_returned_rowset_ids("rowset_1");
+                res->add_returned_rowset_ids("rowset_2");
+                res->set_has_more(true); // More data available
+            } else if (call_count == 2) {
+                // Second batch: return rowset_3 and rowset_4
+                res->add_rowset_ids("rowset_3");
+                res->add_segment_ids(0);
+                res->add_versions(3);
+                res->add_segment_delete_bitmaps("delete_bitmap_3");
+                res->add_rowset_ids("rowset_4");
+                res->add_segment_ids(0);
+                res->add_versions(4);
+                res->add_segment_delete_bitmaps("delete_bitmap_4");
+                res->add_returned_rowset_ids("rowset_3");
+                res->add_returned_rowset_ids("rowset_4");
+                res->set_has_more(true); // Still more data available
+            } else if (call_count == 3) {
+                // Third batch: return rowset_5
+                res->add_rowset_ids("rowset_5");
+                res->add_segment_ids(0);
+                res->add_versions(5);
+                res->add_segment_delete_bitmaps("delete_bitmap_5");
+                res->add_returned_rowset_ids("rowset_5");
+                res->set_has_more(false); // No more data
+            }
+        });
+
+        GetDeleteBitmapResponse res;
+        Status status = 
CloudMetaMgrTestHelper::call_get_delete_bitmap_from_ms_by_batch(
+                meta_mgr, req, res, 256); // Small threshold to force multiple 
batches
+
+        EXPECT_TRUE(status.ok()) << "Status: " << status;
+        EXPECT_EQ(call_count, 3); // Should have made 3 RPC calls
+        EXPECT_EQ(res.rowset_ids_size(), 5);
+        EXPECT_EQ(res.segment_delete_bitmaps_size(), 5);
+        for (int i = 1; i <= 5; ++i) {
+            EXPECT_EQ(res.rowset_ids(i - 1), "rowset_" + std::to_string(i));
+            EXPECT_EQ(res.segment_delete_bitmaps(i - 1), "delete_bitmap_" + 
std::to_string(i));
+        }
+
+        sp->disable_processing();
+        sp->clear_all_call_backs();
+    }
+
+    // Test case 4: RPC failure
+    {
+        sp->clear_all_call_backs();
+        sp->enable_processing();
+
+        GetDeleteBitmapRequest req;
+        req.set_tablet_id(12345);
+        req.add_rowset_ids("rowset_1");
+        req.add_begin_versions(1);
+        req.add_end_versions(1);
+
+        // Mock to simulate connection failure or service unavailable
+        sp->set_call_back("CloudMetaMgr::_get_delete_bitmap_from_ms", 
[](auto&& args) {
+            auto* res = try_any_cast<GetDeleteBitmapResponse*>(args[1]);
+            res->mutable_status()->set_code(MetaServiceCode::TABLET_NOT_FOUND);
+            res->mutable_status()->set_msg("Tablet not found");
+        });
+
+        GetDeleteBitmapResponse res;
+        Status status = 
CloudMetaMgrTestHelper::call_get_delete_bitmap_from_ms_by_batch(
+                meta_mgr, req, res, 1024);
+
+        // The method should handle the error from _get_delete_bitmap_from_ms
+        // Since _get_delete_bitmap_from_ms_by_batch calls RETURN_IF_ERROR, it 
should propagate the error
+        EXPECT_FALSE(status.ok());
+
+        sp->disable_processing();
+        sp->clear_all_call_backs();
+    }
+}
+
 } // namespace doris
diff --git a/cloud/src/meta-service/meta_service.cpp 
b/cloud/src/meta-service/meta_service.cpp
index e8d96343d9e..c47f5421bc6 100644
--- a/cloud/src/meta-service/meta_service.cpp
+++ b/cloud/src/meta-service/meta_service.cpp
@@ -3190,6 +3190,9 @@ void 
MetaServiceImpl::get_delete_bitmap(google::protobuf::RpcController* control
     auto& rowset_ids = request->rowset_ids();
     auto& begin_versions = request->begin_versions();
     auto& end_versions = request->end_versions();
+    int64_t dbm_bytes_threshold = request->has_dbm_bytes_threshold()
+                                          ? request->dbm_bytes_threshold()
+                                          : 
std::numeric_limits<int64_t>::max();
     if (rowset_ids.size() != begin_versions.size() || rowset_ids.size() != 
end_versions.size()) {
         code = MetaServiceCode::INVALID_ARGUMENT;
         ss << "rowset and version size not match. "
@@ -3326,6 +3329,20 @@ void 
MetaServiceImpl::get_delete_bitmap(google::protobuf::RpcController* control
                   << ", start version=" << begin_versions[i] << ", end 
version=" << end_versions[i]
                   << ", internal round=" << round << ", delete_bitmap_num=" << 
delete_bitmap_num
                   << ", delete_bitmap_byte=" << delete_bitmap_byte;
+
+        response->add_returned_rowset_ids(rowset_ids[i]);
+        if (delete_bitmap_byte >= dbm_bytes_threshold && i < rowset_ids.size() 
- 1) {
+            response->set_has_more(true);
+            LOG_INFO("stop in advance to get delete bitmap due to reach bytes 
threshold")
+                    .tag("instance_id", instance_id)
+                    .tag("tablet_id", tablet_id)
+                    .tag("req_rowset_num", rowset_ids.size())
+                    .tag("finished_rowset_num", 
response->returned_rowset_ids_size())
+                    .tag("delete_bitmap_num", delete_bitmap_num)
+                    .tag("delete_bitmap_byte", delete_bitmap_byte)
+                    .tag("bytes_threshold", dbm_bytes_threshold);
+            break;
+        }
     }
     LOG(INFO) << "finish get delete bitmap for tablet=" << tablet_id
               << ", delete_bitmap_num=" << delete_bitmap_num
diff --git a/gensrc/proto/cloud.proto b/gensrc/proto/cloud.proto
index 51fbf1e6cbc..561d9b3ffc3 100644
--- a/gensrc/proto/cloud.proto
+++ b/gensrc/proto/cloud.proto
@@ -1586,6 +1586,7 @@ message GetDeleteBitmapRequest {
     optional int64 cumulative_compaction_cnt = 8;
     optional int64 cumulative_point = 9;
     optional string request_ip = 10;
+    optional int64 dbm_bytes_threshold = 11;
 }
 
 message GetDeleteBitmapResponse {
@@ -1596,6 +1597,10 @@ message GetDeleteBitmapResponse {
     // Serialized roaring bitmaps indexed with {rowset_id, segment_id, version}
     repeated bytes segment_delete_bitmaps = 5;
     optional int64 tablet_id = 6;
+    // indicates whether the reponse contains all request rowsets' delete 
bitmap
+    // when `has_more` is `true`, BE should launch RPC again to get the 
results for remaining rowsets
+    optional bool has_more = 100;
+    repeated string returned_rowset_ids = 101;
 }
 
 message RemoveDeleteBitmapRequest {
diff --git a/regression-test/pipeline/cloud_p0/conf/be_custom.conf 
b/regression-test/pipeline/cloud_p0/conf/be_custom.conf
index 23d34651ce5..741a3ee9853 100644
--- a/regression-test/pipeline/cloud_p0/conf/be_custom.conf
+++ b/regression-test/pipeline/cloud_p0/conf/be_custom.conf
@@ -48,3 +48,6 @@ 
sys_log_verbose_modules=query_context,runtime_query_statistics_mgr
 enable_parquet_page_index=true
 enable_fuzzy_mode=true
 enable_prefill_all_dbm_agg_cache_after_compaction=true
+
+enable_batch_get_delete_bitmap=true
+get_delete_bitmap_bytes_threshold=10


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to