This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch branch-2.1 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push: new 6915d76731d [opt](file-cache) add evict file number per round (#39721) 6915d76731d is described below commit 6915d76731def7ec07310e42db7ff29df8062266 Author: Mingyu Chen <morning...@163.com> AuthorDate: Wed Aug 28 08:49:12 2024 +0800 [opt](file-cache) add evict file number per round (#39721) Previously, when getting block from file cache, it may try to evict lots of blocks to reserve capacity for lru cache. This operation may take long time while hold the lock, causing other operation blocked. This PR add a new BE config `file_cache_max_evict_num_per_round`, default is 1000, so that it will not hold lock for a long time. --- be/src/common/config.cpp | 1 + be/src/common/config.h | 1 + be/src/io/cache/block/block_file_cache.h | 3 ++- be/src/io/cache/block/block_lru_file_cache.cpp | 22 ++++++++++++++++------ be/src/io/cache/block/block_lru_file_cache.h | 4 ++-- 5 files changed, 22 insertions(+), 9 deletions(-) diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp index 0567793c388..9f5dd9c79d3 100644 --- a/be/src/common/config.cpp +++ b/be/src/common/config.cpp @@ -1013,6 +1013,7 @@ DEFINE_Validator(file_cache_min_file_segment_size, [](const int64_t config) -> b DEFINE_Bool(clear_file_cache, "false"); DEFINE_Bool(enable_file_cache_query_limit, "false"); DEFINE_mInt32(file_cache_wait_sec_after_fail, "0"); // // zero for no waiting and retrying +DEFINE_mInt32(file_cache_max_evict_num_per_round, "5000"); DEFINE_mInt32(index_cache_entry_stay_time_after_lookup_s, "1800"); DEFINE_mInt32(inverted_index_cache_stale_sweep_time_sec, "600"); diff --git a/be/src/common/config.h b/be/src/common/config.h index 605700170b3..cb38d1754aa 100644 --- a/be/src/common/config.h +++ b/be/src/common/config.h @@ -1057,6 +1057,7 @@ DECLARE_Bool(clear_file_cache); DECLARE_Bool(enable_file_cache_query_limit); // only for debug, will be removed after finding out the root cause DECLARE_mInt32(file_cache_wait_sec_after_fail); // zero for no waiting and retrying +DECLARE_mInt32(file_cache_max_evict_num_per_round); // inverted index searcher cache // cache entry stay time after lookup diff --git a/be/src/io/cache/block/block_file_cache.h b/be/src/io/cache/block/block_file_cache.h index 22f87d7eea6..a8dfd1d4da2 100644 --- a/be/src/io/cache/block/block_file_cache.h +++ b/be/src/io/cache/block/block_file_cache.h @@ -168,7 +168,8 @@ protected: mutable std::mutex _mutex; virtual bool try_reserve(const Key& key, const CacheContext& context, size_t offset, - size_t size, std::lock_guard<std::mutex>& cache_lock) = 0; + size_t size, std::lock_guard<std::mutex>& cache_lock, + bool skip_round_check) = 0; virtual void remove(FileBlockSPtr file_segment, std::lock_guard<std::mutex>& cache_lock, std::lock_guard<std::mutex>& segment_lock) = 0; diff --git a/be/src/io/cache/block/block_lru_file_cache.cpp b/be/src/io/cache/block/block_lru_file_cache.cpp index 302891ad770..020e8d94463 100644 --- a/be/src/io/cache/block/block_lru_file_cache.cpp +++ b/be/src/io/cache/block/block_lru_file_cache.cpp @@ -317,7 +317,7 @@ FileBlocks LRUFileCache::split_range_into_cells(const Key& key, const CacheConte while (current_pos < end_pos_non_included) { current_size = std::min(remaining_size, _max_file_segment_size); remaining_size -= current_size; - state = try_reserve(key, context, current_pos, current_size, cache_lock) + state = try_reserve(key, context, current_pos, current_size, cache_lock, false) ? state : FileBlock::State::SKIP_CACHE; if (UNLIKELY(state == FileBlock::State::SKIP_CACHE)) { @@ -536,16 +536,19 @@ const LRUFileCache::LRUQueue& LRUFileCache::get_queue(CacheType type) const { // a. evict from query queue // b. evict from other queue bool LRUFileCache::try_reserve(const Key& key, const CacheContext& context, size_t offset, - size_t size, std::lock_guard<std::mutex>& cache_lock) { + size_t size, std::lock_guard<std::mutex>& cache_lock, + bool skip_round_check) { auto query_context = _enable_file_cache_query_limit && (context.query_id.hi != 0 || context.query_id.lo != 0) ? get_query_context(context.query_id, cache_lock) : nullptr; if (!query_context) { - return try_reserve_for_lru(key, nullptr, context, offset, size, cache_lock); + return try_reserve_for_lru(key, nullptr, context, offset, size, skip_round_check, + cache_lock); } else if (query_context->get_cache_size(cache_lock) + size <= query_context->get_max_cache_size()) { - return try_reserve_for_lru(key, query_context, context, offset, size, cache_lock); + return try_reserve_for_lru(key, query_context, context, offset, size, skip_round_check, + cache_lock); } int64_t cur_time = std::chrono::duration_cast<std::chrono::seconds>( std::chrono::steady_clock::now().time_since_epoch()) @@ -708,6 +711,7 @@ bool LRUFileCache::try_reserve_from_other_queue(CacheType cur_cache_type, size_t bool LRUFileCache::try_reserve_for_lru(const Key& key, QueryFileCacheContextPtr query_context, const CacheContext& context, size_t offset, size_t size, + bool skip_round_check, std::lock_guard<std::mutex>& cache_lock) { int64_t cur_time = std::chrono::duration_cast<std::chrono::seconds>( std::chrono::steady_clock::now().time_since_epoch()) @@ -729,8 +733,10 @@ bool LRUFileCache::try_reserve_for_lru(const Key& key, QueryFileCacheContextPtr std::vector<FileBlockCell*> to_evict; std::vector<FileBlockCell*> trash; + size_t evict_num = 0; for (const auto& [entry_key, entry_offset, entry_size] : queue) { - if (!is_overflow()) { + if (!is_overflow() || + (!skip_round_check && evict_num > config::file_cache_max_evict_num_per_round)) { break; } auto* cell = get_cell(entry_key, entry_offset, cache_lock); @@ -762,6 +768,7 @@ bool LRUFileCache::try_reserve_for_lru(const Key& key, QueryFileCacheContextPtr removed_size += cell_size; --queue_element_size; + ++evict_num; } } @@ -773,6 +780,9 @@ bool LRUFileCache::try_reserve_for_lru(const Key& key, QueryFileCacheContextPtr } }; + if (evict_num > config::file_cache_max_evict_num_per_round) { + LOG(INFO) << "debug evict from file cache number: " << evict_num; + } std::for_each(trash.begin(), trash.end(), remove_file_block_if); std::for_each(to_evict.begin(), to_evict.end(), remove_file_block_if); @@ -931,7 +941,7 @@ Status LRUFileCache::load_cache_info_into_memory(std::lock_guard<std::mutex>& ca continue; } context.cache_type = cache_type; - if (try_reserve(key, context, offset, size, cache_lock)) { + if (try_reserve(key, context, offset, size, cache_lock, true)) { add_cell(key, context, offset, size, FileBlock::State::DOWNLOADED, cache_lock); queue_entries.emplace_back(key, offset); } else { diff --git a/be/src/io/cache/block/block_lru_file_cache.h b/be/src/io/cache/block/block_lru_file_cache.h index df8945cfc9c..47952b5ed70 100644 --- a/be/src/io/cache/block/block_lru_file_cache.h +++ b/be/src/io/cache/block/block_lru_file_cache.h @@ -147,11 +147,11 @@ private: std::lock_guard<std::mutex>& cache_lock); bool try_reserve(const Key& key, const CacheContext& context, size_t offset, size_t size, - std::lock_guard<std::mutex>& cache_lock) override; + std::lock_guard<std::mutex>& cache_lock, bool skip_round_check) override; bool try_reserve_for_lru(const Key& key, QueryFileCacheContextPtr query_context, const CacheContext& context, size_t offset, size_t size, - std::lock_guard<std::mutex>& cache_lock); + bool skip_round_check, std::lock_guard<std::mutex>& cache_lock); std::vector<CacheType> get_other_cache_type(CacheType cur_cache_type); --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org