This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
     new 6915d76731d [opt](file-cache) add evict file number per round (#39721)
6915d76731d is described below

commit 6915d76731def7ec07310e42db7ff29df8062266
Author: Mingyu Chen <morning...@163.com>
AuthorDate: Wed Aug 28 08:49:12 2024 +0800

    [opt](file-cache) add evict file number per round (#39721)
    
    Previously, when getting block from file cache, it may try to evict
    lots of blocks to reserve capacity for lru cache. This operation may
    take long time
    while hold the lock, causing other operation blocked.
    
    This PR add a new BE config `file_cache_max_evict_num_per_round`,
    default is 1000, so that it will not hold lock for a long time.
---
 be/src/common/config.cpp                       |  1 +
 be/src/common/config.h                         |  1 +
 be/src/io/cache/block/block_file_cache.h       |  3 ++-
 be/src/io/cache/block/block_lru_file_cache.cpp | 22 ++++++++++++++++------
 be/src/io/cache/block/block_lru_file_cache.h   |  4 ++--
 5 files changed, 22 insertions(+), 9 deletions(-)

diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp
index 0567793c388..9f5dd9c79d3 100644
--- a/be/src/common/config.cpp
+++ b/be/src/common/config.cpp
@@ -1013,6 +1013,7 @@ DEFINE_Validator(file_cache_min_file_segment_size, 
[](const int64_t config) -> b
 DEFINE_Bool(clear_file_cache, "false");
 DEFINE_Bool(enable_file_cache_query_limit, "false");
 DEFINE_mInt32(file_cache_wait_sec_after_fail, "0"); // // zero for no waiting 
and retrying
+DEFINE_mInt32(file_cache_max_evict_num_per_round, "5000");
 
 DEFINE_mInt32(index_cache_entry_stay_time_after_lookup_s, "1800");
 DEFINE_mInt32(inverted_index_cache_stale_sweep_time_sec, "600");
diff --git a/be/src/common/config.h b/be/src/common/config.h
index 605700170b3..cb38d1754aa 100644
--- a/be/src/common/config.h
+++ b/be/src/common/config.h
@@ -1057,6 +1057,7 @@ DECLARE_Bool(clear_file_cache);
 DECLARE_Bool(enable_file_cache_query_limit);
 // only for debug, will be removed after finding out the root cause
 DECLARE_mInt32(file_cache_wait_sec_after_fail); // zero for no waiting and 
retrying
+DECLARE_mInt32(file_cache_max_evict_num_per_round);
 
 // inverted index searcher cache
 // cache entry stay time after lookup
diff --git a/be/src/io/cache/block/block_file_cache.h 
b/be/src/io/cache/block/block_file_cache.h
index 22f87d7eea6..a8dfd1d4da2 100644
--- a/be/src/io/cache/block/block_file_cache.h
+++ b/be/src/io/cache/block/block_file_cache.h
@@ -168,7 +168,8 @@ protected:
     mutable std::mutex _mutex;
 
     virtual bool try_reserve(const Key& key, const CacheContext& context, 
size_t offset,
-                             size_t size, std::lock_guard<std::mutex>& 
cache_lock) = 0;
+                             size_t size, std::lock_guard<std::mutex>& 
cache_lock,
+                             bool skip_round_check) = 0;
 
     virtual void remove(FileBlockSPtr file_segment, 
std::lock_guard<std::mutex>& cache_lock,
                         std::lock_guard<std::mutex>& segment_lock) = 0;
diff --git a/be/src/io/cache/block/block_lru_file_cache.cpp 
b/be/src/io/cache/block/block_lru_file_cache.cpp
index 302891ad770..020e8d94463 100644
--- a/be/src/io/cache/block/block_lru_file_cache.cpp
+++ b/be/src/io/cache/block/block_lru_file_cache.cpp
@@ -317,7 +317,7 @@ FileBlocks LRUFileCache::split_range_into_cells(const Key& 
key, const CacheConte
     while (current_pos < end_pos_non_included) {
         current_size = std::min(remaining_size, _max_file_segment_size);
         remaining_size -= current_size;
-        state = try_reserve(key, context, current_pos, current_size, 
cache_lock)
+        state = try_reserve(key, context, current_pos, current_size, 
cache_lock, false)
                         ? state
                         : FileBlock::State::SKIP_CACHE;
         if (UNLIKELY(state == FileBlock::State::SKIP_CACHE)) {
@@ -536,16 +536,19 @@ const LRUFileCache::LRUQueue& 
LRUFileCache::get_queue(CacheType type) const {
 //     a. evict from query queue
 //     b. evict from other queue
 bool LRUFileCache::try_reserve(const Key& key, const CacheContext& context, 
size_t offset,
-                               size_t size, std::lock_guard<std::mutex>& 
cache_lock) {
+                               size_t size, std::lock_guard<std::mutex>& 
cache_lock,
+                               bool skip_round_check) {
     auto query_context =
             _enable_file_cache_query_limit && (context.query_id.hi != 0 || 
context.query_id.lo != 0)
                     ? get_query_context(context.query_id, cache_lock)
                     : nullptr;
     if (!query_context) {
-        return try_reserve_for_lru(key, nullptr, context, offset, size, 
cache_lock);
+        return try_reserve_for_lru(key, nullptr, context, offset, size, 
skip_round_check,
+                                   cache_lock);
     } else if (query_context->get_cache_size(cache_lock) + size <=
                query_context->get_max_cache_size()) {
-        return try_reserve_for_lru(key, query_context, context, offset, size, 
cache_lock);
+        return try_reserve_for_lru(key, query_context, context, offset, size, 
skip_round_check,
+                                   cache_lock);
     }
     int64_t cur_time = std::chrono::duration_cast<std::chrono::seconds>(
                                
std::chrono::steady_clock::now().time_since_epoch())
@@ -708,6 +711,7 @@ bool LRUFileCache::try_reserve_from_other_queue(CacheType 
cur_cache_type, size_t
 
 bool LRUFileCache::try_reserve_for_lru(const Key& key, 
QueryFileCacheContextPtr query_context,
                                        const CacheContext& context, size_t 
offset, size_t size,
+                                       bool skip_round_check,
                                        std::lock_guard<std::mutex>& 
cache_lock) {
     int64_t cur_time = std::chrono::duration_cast<std::chrono::seconds>(
                                
std::chrono::steady_clock::now().time_since_epoch())
@@ -729,8 +733,10 @@ bool LRUFileCache::try_reserve_for_lru(const Key& key, 
QueryFileCacheContextPtr
 
         std::vector<FileBlockCell*> to_evict;
         std::vector<FileBlockCell*> trash;
+        size_t evict_num = 0;
         for (const auto& [entry_key, entry_offset, entry_size] : queue) {
-            if (!is_overflow()) {
+            if (!is_overflow() ||
+                (!skip_round_check && evict_num > 
config::file_cache_max_evict_num_per_round)) {
                 break;
             }
             auto* cell = get_cell(entry_key, entry_offset, cache_lock);
@@ -762,6 +768,7 @@ bool LRUFileCache::try_reserve_for_lru(const Key& key, 
QueryFileCacheContextPtr
 
                 removed_size += cell_size;
                 --queue_element_size;
+                ++evict_num;
             }
         }
 
@@ -773,6 +780,9 @@ bool LRUFileCache::try_reserve_for_lru(const Key& key, 
QueryFileCacheContextPtr
             }
         };
 
+        if (evict_num > config::file_cache_max_evict_num_per_round) {
+            LOG(INFO) << "debug evict from file cache number: " << evict_num;
+        }
         std::for_each(trash.begin(), trash.end(), remove_file_block_if);
         std::for_each(to_evict.begin(), to_evict.end(), remove_file_block_if);
 
@@ -931,7 +941,7 @@ Status 
LRUFileCache::load_cache_info_into_memory(std::lock_guard<std::mutex>& ca
                     continue;
                 }
                 context.cache_type = cache_type;
-                if (try_reserve(key, context, offset, size, cache_lock)) {
+                if (try_reserve(key, context, offset, size, cache_lock, true)) 
{
                     add_cell(key, context, offset, size, 
FileBlock::State::DOWNLOADED, cache_lock);
                     queue_entries.emplace_back(key, offset);
                 } else {
diff --git a/be/src/io/cache/block/block_lru_file_cache.h 
b/be/src/io/cache/block/block_lru_file_cache.h
index df8945cfc9c..47952b5ed70 100644
--- a/be/src/io/cache/block/block_lru_file_cache.h
+++ b/be/src/io/cache/block/block_lru_file_cache.h
@@ -147,11 +147,11 @@ private:
                   std::lock_guard<std::mutex>& cache_lock);
 
     bool try_reserve(const Key& key, const CacheContext& context, size_t 
offset, size_t size,
-                     std::lock_guard<std::mutex>& cache_lock) override;
+                     std::lock_guard<std::mutex>& cache_lock, bool 
skip_round_check) override;
 
     bool try_reserve_for_lru(const Key& key, QueryFileCacheContextPtr 
query_context,
                              const CacheContext& context, size_t offset, 
size_t size,
-                             std::lock_guard<std::mutex>& cache_lock);
+                             bool skip_round_check, 
std::lock_guard<std::mutex>& cache_lock);
 
     std::vector<CacheType> get_other_cache_type(CacheType cur_cache_type);
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to