This is an automated email from the ASF dual-hosted git repository.
dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new bafa8d460ff branch-3.0: [fix](cloud) fix misuse of
file_cache_evict_in_advance_batch_bytes #49336 (#49495)
bafa8d460ff is described below
commit bafa8d460ff703d2ca33dc42dbaba794d384463a
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Thu Mar 27 10:12:01 2025 +0800
branch-3.0: [fix](cloud) fix misuse of
file_cache_evict_in_advance_batch_bytes #49336 (#49495)
Cherry-picked from #49336
Co-authored-by: zhengyu <[email protected]>
---
be/src/common/config.cpp | 2 +
be/src/common/config.h | 2 +
be/src/io/cache/block_file_cache.cpp | 8 ++-
be/test/io/cache/block_file_cache_test.cpp | 106 ++++++++++++++++++++++++++++-
4 files changed, 112 insertions(+), 6 deletions(-)
diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp
index 87f30fe2409..2edc888d31e 100644
--- a/be/src/common/config.cpp
+++ b/be/src/common/config.cpp
@@ -1082,6 +1082,7 @@
DEFINE_mInt32(file_cache_enter_need_evict_cache_in_advance_percent, "78");
DEFINE_mInt32(file_cache_exit_need_evict_cache_in_advance_percent, "75");
DEFINE_mInt32(file_cache_evict_in_advance_interval_ms, "1000");
DEFINE_mInt64(file_cache_evict_in_advance_batch_bytes, "31457280"); // 30MB
+DEFINE_mInt64(file_cache_evict_in_advance_recycle_keys_num_threshold, "1000");
DEFINE_mBool(enable_read_cache_file_directly, "false");
DEFINE_mBool(file_cache_enable_evict_from_other_queue_by_size, "true");
@@ -1095,6 +1096,7 @@ DEFINE_mInt64(cache_lock_wait_long_tail_threshold_us,
"30000000");
DEFINE_mInt64(cache_lock_held_long_tail_threshold_us, "30000000");
DEFINE_mBool(enable_file_cache_keep_base_compaction_output, "false");
DEFINE_mInt64(file_cache_remove_block_qps_limit, "1000");
+DEFINE_mInt64(file_cache_background_gc_interval_ms, "100");
DEFINE_mInt32(index_cache_entry_stay_time_after_lookup_s, "1800");
DEFINE_mInt32(inverted_index_cache_stale_sweep_time_sec, "600");
diff --git a/be/src/common/config.h b/be/src/common/config.h
index 8d15b78dfd4..b25c4849d2e 100644
--- a/be/src/common/config.h
+++ b/be/src/common/config.h
@@ -1118,6 +1118,7 @@
DECLARE_mInt32(file_cache_enter_need_evict_cache_in_advance_percent);
DECLARE_mInt32(file_cache_exit_need_evict_cache_in_advance_percent);
DECLARE_mInt32(file_cache_evict_in_advance_interval_ms);
DECLARE_mInt64(file_cache_evict_in_advance_batch_bytes);
+DECLARE_mInt64(file_cache_evict_in_advance_recycle_keys_num_threshold);
DECLARE_mBool(enable_read_cache_file_directly);
DECLARE_Bool(file_cache_enable_evict_from_other_queue_by_size);
// If true, evict the ttl cache using LRU when full.
@@ -1136,6 +1137,7 @@ DECLARE_mInt64(cache_lock_held_long_tail_threshold_us);
// enable this option; otherwise, it is recommended to leave it disabled.
DECLARE_mBool(enable_file_cache_keep_base_compaction_output);
DECLARE_mInt64(file_cache_remove_block_qps_limit);
+DECLARE_mInt64(file_cache_background_gc_interval_ms);
// inverted index searcher cache
// cache entry stay time after lookup
DECLARE_mInt32(index_cache_entry_stay_time_after_lookup_s);
diff --git a/be/src/io/cache/block_file_cache.cpp
b/be/src/io/cache/block_file_cache.cpp
index b5a985fa1f1..ad3c6e99638 100644
--- a/be/src/io/cache/block_file_cache.cpp
+++ b/be/src/io/cache/block_file_cache.cpp
@@ -1841,10 +1841,10 @@ void BlockFileCache::run_background_ttl_gc() { //
TODO(zhengyu): fix!
void BlockFileCache::run_background_gc() {
FileCacheKey key;
- static const size_t interval_ms = 100;
- const size_t batch_limit = config::file_cache_remove_block_qps_limit *
interval_ms / 1000;
size_t batch_count = 0;
while (!_close) {
+ size_t interval_ms = config::file_cache_background_gc_interval_ms;
+ size_t batch_limit = config::file_cache_remove_block_qps_limit *
interval_ms / 1000;
{
std::unique_lock close_lock(_close_mtx);
_close_cv.wait_for(close_lock,
std::chrono::milliseconds(interval_ms));
@@ -1889,7 +1889,9 @@ void BlockFileCache::run_background_evict_in_advance() {
batch = config::file_cache_evict_in_advance_batch_bytes;
// Skip if eviction not needed or too many pending recycles
- if (!_need_evict_cache_in_advance || _recycle_keys.size_approx() >=
(batch * 10)) {
+ if (!_need_evict_cache_in_advance ||
+ _recycle_keys.size_approx() >=
+
config::file_cache_evict_in_advance_recycle_keys_num_threshold) {
continue;
}
diff --git a/be/test/io/cache/block_file_cache_test.cpp
b/be/test/io/cache/block_file_cache_test.cpp
index 88b9ab8667d..d531f8cc896 100644
--- a/be/test/io/cache/block_file_cache_test.cpp
+++ b/be/test/io/cache/block_file_cache_test.cpp
@@ -5120,7 +5120,6 @@ TEST_F(BlockFileCacheTest, load_cache1) {
int64_t cur_time = UnixSeconds();
int64_t expiration_time = cur_time + 120;
auto key1 = io::BlockFileCache::hash("key1");
-
ASSERT_TRUE(global_local_filesystem()
->rename(cache_base_path + "/" +
key1.to_string().substr(0, 3) + "/" +
key1.to_string() + "_0",
@@ -5159,7 +5158,6 @@ TEST_F(BlockFileCacheTest, load_cache1) {
std::to_string(offset) + "_ttl");
}
}
-
TEST_F(BlockFileCacheTest, load_cache2) {
if (fs::exists(cache_base_path)) {
fs::remove_all(cache_base_path);
@@ -5167,7 +5165,6 @@ TEST_F(BlockFileCacheTest, load_cache2) {
fs::create_directories(cache_base_path);
test_file_cache(FileCacheType::NORMAL);
auto key1 = io::BlockFileCache::hash("key1");
-
ASSERT_TRUE(global_local_filesystem()
->rename(cache_base_path + "/" +
key1.to_string().substr(0, 3) + "/" +
key1.to_string() + "_0/0",
@@ -6983,4 +6980,107 @@ TEST_F(BlockFileCacheTest,
test_check_need_evict_cache_in_advance) {
fs::remove_all(cache_base_path);
}
+TEST_F(BlockFileCacheTest, test_evict_cache_in_advance_skip) {
+ // std::string cache_base_path = "./ut_file_cache_dir";
+ if (fs::exists(cache_base_path)) {
+ fs::remove_all(cache_base_path);
+ }
+
+ fs::create_directories(cache_base_path);
+
+ io::FileCacheSettings settings;
+ settings.ttl_queue_size = 5000000;
+ settings.ttl_queue_elements = 50000;
+ settings.query_queue_size = 3000000;
+ settings.query_queue_elements = 30000;
+ settings.index_queue_size = 1000000;
+ settings.index_queue_elements = 10000;
+ settings.disposable_queue_size = 1000000;
+ settings.disposable_queue_elements = 10000;
+ settings.capacity = 10000000;
+ settings.max_file_block_size = 100000;
+ settings.max_query_cache_size = 30;
+
+ // Setup disk usage conditions to trigger need_evict_cache_in_advance
+ int64_t origin_enter =
config::file_cache_enter_need_evict_cache_in_advance_percent;
+ int64_t origin_exit =
config::file_cache_exit_need_evict_cache_in_advance_percent;
+ int64_t origin_threshold =
config::file_cache_evict_in_advance_recycle_keys_num_threshold;
+ config::file_cache_enter_need_evict_cache_in_advance_percent = 70;
+ config::file_cache_exit_need_evict_cache_in_advance_percent = 65;
+ config::file_cache_background_gc_interval_ms = 10000000; // no gc at all
+
+ SyncPoint::get_instance()->set_call_back(
+ "BlockFileCache::disk_used_percentage:1",
[](std::vector<std::any>&& values) {
+ auto* percent = try_any_cast<std::pair<int,
int>*>(values.back());
+ percent->first = 75; // set high
+ percent->second = 60;
+ });
+ SyncPoint::get_instance()->enable_processing();
+
+ io::BlockFileCache cache(cache_base_path, settings);
+ ASSERT_TRUE(cache.initialize());
+
+ int i = 0;
+ for (; i < 100; i++) {
+ if (cache.get_async_open_success()) {
+ break;
+ }
+ std::this_thread::sleep_for(std::chrono::milliseconds(1));
+ }
+ ASSERT_TRUE(cache.get_async_open_success());
+
+ cache.check_need_evict_cache_in_advance();
+ ASSERT_TRUE(cache._need_evict_cache_in_advance);
+
+ // Set recycle keys threshold and fill with enough keys
+ config::file_cache_evict_in_advance_recycle_keys_num_threshold = 10;
+ for (int i = 0; i < 15; i++) {
+ io::FileCacheKey key;
+ key.hash = io::BlockFileCache::hash("key" + std::to_string(i));
+ key.offset = 0;
+ key.meta = {};
+ cache._recycle_keys.enqueue(key);
+ }
+
+ // Fill cache similar to evict_in_advance test
+ TUniqueId query_id;
+ query_id.hi = 1;
+ query_id.lo = 1;
+ io::CacheContext context;
+ ReadStatistics rstats;
+ context.stats = &rstats;
+ context.cache_type = io::FileCacheType::NORMAL;
+ context.query_id = query_id;
+ auto key1 = io::BlockFileCache::hash("key1");
+
+ // Fill cache to its limit
+ size_t limit = 1000000;
+ int64_t offset = 0;
+ for (; offset < limit; offset += 100000) {
+ auto holder = cache.get_or_set(key1, offset, 100000, context);
+ auto blocks = fromHolder(holder);
+ ASSERT_EQ(blocks.size(), 1);
+ download(blocks[0]);
+ blocks.clear();
+ }
+
+ // Get initial recycle keys size
+ size_t initial_recycle_keys_size = cache._recycle_keys.size_approx();
+
+ // Run background evict in advance
+
+ std::this_thread::sleep_for(
+
std::chrono::milliseconds(config::file_cache_evict_in_advance_interval_ms *
10));
+
+ // Verify no new items were added to recycle keys
+ ASSERT_EQ(cache._recycle_keys.size_approx(), initial_recycle_keys_size);
+
+ SyncPoint::get_instance()->disable_processing();
+ SyncPoint::get_instance()->clear_all_call_backs();
+ fs::remove_all(cache_base_path);
+ config::file_cache_enter_need_evict_cache_in_advance_percent =
origin_enter;
+ config::file_cache_exit_need_evict_cache_in_advance_percent = origin_exit;
+ config::file_cache_evict_in_advance_recycle_keys_num_threshold =
origin_threshold;
+}
+
} // namespace doris::io
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]