(doris) branch branch-3.0 updated: branch-3.0: [fix](cloud) fix misuse of file_cache_evict_in_advance_batch_bytes #49336 (#49495)

dataroaring Wed, 26 Mar 2025 19:12:46 -0700

This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git



The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new bafa8d460ff branch-3.0: [fix](cloud) fix misuse of 
file_cache_evict_in_advance_batch_bytes #49336 (#49495)
bafa8d460ff is described below

commit bafa8d460ff703d2ca33dc42dbaba794d384463a
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Thu Mar 27 10:12:01 2025 +0800

    branch-3.0: [fix](cloud) fix misuse of 
file_cache_evict_in_advance_batch_bytes #49336 (#49495)
    
    Cherry-picked from #49336
    
    Co-authored-by: zhengyu <[email protected]>
---
 be/src/common/config.cpp                   |   2 +
 be/src/common/config.h                     |   2 +
 be/src/io/cache/block_file_cache.cpp       |   8 ++-
 be/test/io/cache/block_file_cache_test.cpp | 106 ++++++++++++++++++++++++++++-
 4 files changed, 112 insertions(+), 6 deletions(-)

diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp
index 87f30fe2409..2edc888d31e 100644
--- a/be/src/common/config.cpp
+++ b/be/src/common/config.cpp
@@ -1082,6 +1082,7 @@ 
DEFINE_mInt32(file_cache_enter_need_evict_cache_in_advance_percent, "78");
 DEFINE_mInt32(file_cache_exit_need_evict_cache_in_advance_percent, "75");
 DEFINE_mInt32(file_cache_evict_in_advance_interval_ms, "1000");
 DEFINE_mInt64(file_cache_evict_in_advance_batch_bytes, "31457280"); // 30MB
+DEFINE_mInt64(file_cache_evict_in_advance_recycle_keys_num_threshold, "1000");
 
 DEFINE_mBool(enable_read_cache_file_directly, "false");
 DEFINE_mBool(file_cache_enable_evict_from_other_queue_by_size, "true");
@@ -1095,6 +1096,7 @@ DEFINE_mInt64(cache_lock_wait_long_tail_threshold_us, 
"30000000");
 DEFINE_mInt64(cache_lock_held_long_tail_threshold_us, "30000000");
 DEFINE_mBool(enable_file_cache_keep_base_compaction_output, "false");
 DEFINE_mInt64(file_cache_remove_block_qps_limit, "1000");
+DEFINE_mInt64(file_cache_background_gc_interval_ms, "100");
 
 DEFINE_mInt32(index_cache_entry_stay_time_after_lookup_s, "1800");
 DEFINE_mInt32(inverted_index_cache_stale_sweep_time_sec, "600");
diff --git a/be/src/common/config.h b/be/src/common/config.h
index 8d15b78dfd4..b25c4849d2e 100644
--- a/be/src/common/config.h
+++ b/be/src/common/config.h
@@ -1118,6 +1118,7 @@ 
DECLARE_mInt32(file_cache_enter_need_evict_cache_in_advance_percent);
 DECLARE_mInt32(file_cache_exit_need_evict_cache_in_advance_percent);
 DECLARE_mInt32(file_cache_evict_in_advance_interval_ms);
 DECLARE_mInt64(file_cache_evict_in_advance_batch_bytes);
+DECLARE_mInt64(file_cache_evict_in_advance_recycle_keys_num_threshold);
 DECLARE_mBool(enable_read_cache_file_directly);
 DECLARE_Bool(file_cache_enable_evict_from_other_queue_by_size);
 // If true, evict the ttl cache using LRU when full.
@@ -1136,6 +1137,7 @@ DECLARE_mInt64(cache_lock_held_long_tail_threshold_us);
 // enable this option; otherwise, it is recommended to leave it disabled.
 DECLARE_mBool(enable_file_cache_keep_base_compaction_output);
 DECLARE_mInt64(file_cache_remove_block_qps_limit);
+DECLARE_mInt64(file_cache_background_gc_interval_ms);
 // inverted index searcher cache
 // cache entry stay time after lookup
 DECLARE_mInt32(index_cache_entry_stay_time_after_lookup_s);
diff --git a/be/src/io/cache/block_file_cache.cpp 
b/be/src/io/cache/block_file_cache.cpp
index b5a985fa1f1..ad3c6e99638 100644
--- a/be/src/io/cache/block_file_cache.cpp
+++ b/be/src/io/cache/block_file_cache.cpp
@@ -1841,10 +1841,10 @@ void BlockFileCache::run_background_ttl_gc() { // 
TODO(zhengyu): fix!
 
 void BlockFileCache::run_background_gc() {
     FileCacheKey key;
-    static const size_t interval_ms = 100;
-    const size_t batch_limit = config::file_cache_remove_block_qps_limit * 
interval_ms / 1000;
     size_t batch_count = 0;
     while (!_close) {
+        size_t interval_ms = config::file_cache_background_gc_interval_ms;
+        size_t batch_limit = config::file_cache_remove_block_qps_limit * 
interval_ms / 1000;
         {
             std::unique_lock close_lock(_close_mtx);
             _close_cv.wait_for(close_lock, 
std::chrono::milliseconds(interval_ms));
@@ -1889,7 +1889,9 @@ void BlockFileCache::run_background_evict_in_advance() {
         batch = config::file_cache_evict_in_advance_batch_bytes;
 
         // Skip if eviction not needed or too many pending recycles
-        if (!_need_evict_cache_in_advance || _recycle_keys.size_approx() >= 
(batch * 10)) {
+        if (!_need_evict_cache_in_advance ||
+            _recycle_keys.size_approx() >=
+                    
config::file_cache_evict_in_advance_recycle_keys_num_threshold) {
             continue;
         }
 
diff --git a/be/test/io/cache/block_file_cache_test.cpp 
b/be/test/io/cache/block_file_cache_test.cpp
index 88b9ab8667d..d531f8cc896 100644
--- a/be/test/io/cache/block_file_cache_test.cpp
+++ b/be/test/io/cache/block_file_cache_test.cpp
@@ -5120,7 +5120,6 @@ TEST_F(BlockFileCacheTest, load_cache1) {
     int64_t cur_time = UnixSeconds();
     int64_t expiration_time = cur_time + 120;
     auto key1 = io::BlockFileCache::hash("key1");
-
     ASSERT_TRUE(global_local_filesystem()
                         ->rename(cache_base_path + "/" + 
key1.to_string().substr(0, 3) + "/" +
                                          key1.to_string() + "_0",
@@ -5159,7 +5158,6 @@ TEST_F(BlockFileCacheTest, load_cache1) {
                                     std::to_string(offset) + "_ttl");
     }
 }
-
 TEST_F(BlockFileCacheTest, load_cache2) {
     if (fs::exists(cache_base_path)) {
         fs::remove_all(cache_base_path);
@@ -5167,7 +5165,6 @@ TEST_F(BlockFileCacheTest, load_cache2) {
     fs::create_directories(cache_base_path);
     test_file_cache(FileCacheType::NORMAL);
     auto key1 = io::BlockFileCache::hash("key1");
-
     ASSERT_TRUE(global_local_filesystem()
                         ->rename(cache_base_path + "/" + 
key1.to_string().substr(0, 3) + "/" +
                                          key1.to_string() + "_0/0",
@@ -6983,4 +6980,107 @@ TEST_F(BlockFileCacheTest, 
test_check_need_evict_cache_in_advance) {
     fs::remove_all(cache_base_path);
 }
 
+TEST_F(BlockFileCacheTest, test_evict_cache_in_advance_skip) {
+    // std::string cache_base_path = "./ut_file_cache_dir";
+    if (fs::exists(cache_base_path)) {
+        fs::remove_all(cache_base_path);
+    }
+
+    fs::create_directories(cache_base_path);
+
+    io::FileCacheSettings settings;
+    settings.ttl_queue_size = 5000000;
+    settings.ttl_queue_elements = 50000;
+    settings.query_queue_size = 3000000;
+    settings.query_queue_elements = 30000;
+    settings.index_queue_size = 1000000;
+    settings.index_queue_elements = 10000;
+    settings.disposable_queue_size = 1000000;
+    settings.disposable_queue_elements = 10000;
+    settings.capacity = 10000000;
+    settings.max_file_block_size = 100000;
+    settings.max_query_cache_size = 30;
+
+    // Setup disk usage conditions to trigger need_evict_cache_in_advance
+    int64_t origin_enter = 
config::file_cache_enter_need_evict_cache_in_advance_percent;
+    int64_t origin_exit = 
config::file_cache_exit_need_evict_cache_in_advance_percent;
+    int64_t origin_threshold = 
config::file_cache_evict_in_advance_recycle_keys_num_threshold;
+    config::file_cache_enter_need_evict_cache_in_advance_percent = 70;
+    config::file_cache_exit_need_evict_cache_in_advance_percent = 65;
+    config::file_cache_background_gc_interval_ms = 10000000; // no gc at all
+
+    SyncPoint::get_instance()->set_call_back(
+            "BlockFileCache::disk_used_percentage:1", 
[](std::vector<std::any>&& values) {
+                auto* percent = try_any_cast<std::pair<int, 
int>*>(values.back());
+                percent->first = 75; // set high
+                percent->second = 60;
+            });
+    SyncPoint::get_instance()->enable_processing();
+
+    io::BlockFileCache cache(cache_base_path, settings);
+    ASSERT_TRUE(cache.initialize());
+
+    int i = 0;
+    for (; i < 100; i++) {
+        if (cache.get_async_open_success()) {
+            break;
+        }
+        std::this_thread::sleep_for(std::chrono::milliseconds(1));
+    }
+    ASSERT_TRUE(cache.get_async_open_success());
+
+    cache.check_need_evict_cache_in_advance();
+    ASSERT_TRUE(cache._need_evict_cache_in_advance);
+
+    // Set recycle keys threshold and fill with enough keys
+    config::file_cache_evict_in_advance_recycle_keys_num_threshold = 10;
+    for (int i = 0; i < 15; i++) {
+        io::FileCacheKey key;
+        key.hash = io::BlockFileCache::hash("key" + std::to_string(i));
+        key.offset = 0;
+        key.meta = {};
+        cache._recycle_keys.enqueue(key);
+    }
+
+    // Fill cache similar to evict_in_advance test
+    TUniqueId query_id;
+    query_id.hi = 1;
+    query_id.lo = 1;
+    io::CacheContext context;
+    ReadStatistics rstats;
+    context.stats = &rstats;
+    context.cache_type = io::FileCacheType::NORMAL;
+    context.query_id = query_id;
+    auto key1 = io::BlockFileCache::hash("key1");
+
+    // Fill cache to its limit
+    size_t limit = 1000000;
+    int64_t offset = 0;
+    for (; offset < limit; offset += 100000) {
+        auto holder = cache.get_or_set(key1, offset, 100000, context);
+        auto blocks = fromHolder(holder);
+        ASSERT_EQ(blocks.size(), 1);
+        download(blocks[0]);
+        blocks.clear();
+    }
+
+    // Get initial recycle keys size
+    size_t initial_recycle_keys_size = cache._recycle_keys.size_approx();
+
+    // Run background evict in advance
+
+    std::this_thread::sleep_for(
+            
std::chrono::milliseconds(config::file_cache_evict_in_advance_interval_ms * 
10));
+
+    // Verify no new items were added to recycle keys
+    ASSERT_EQ(cache._recycle_keys.size_approx(), initial_recycle_keys_size);
+
+    SyncPoint::get_instance()->disable_processing();
+    SyncPoint::get_instance()->clear_all_call_backs();
+    fs::remove_all(cache_base_path);
+    config::file_cache_enter_need_evict_cache_in_advance_percent = 
origin_enter;
+    config::file_cache_exit_need_evict_cache_in_advance_percent = origin_exit;
+    config::file_cache_evict_in_advance_recycle_keys_num_threshold = 
origin_threshold;
+}
+
 } // namespace doris::io


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(doris) branch branch-3.0 updated: branch-3.0: [fix](cloud) fix misuse of file_cache_evict_in_advance_batch_bytes #49336 (#49495)

Reply via email to