cambyzju commented on code in PR #12897: URL: https://github.com/apache/doris/pull/12897#discussion_r983017616
########## be/src/io/cache/file_cache_manager.cpp: ########## @@ -56,88 +80,82 @@ void FileCacheManager::remove_file_cache(const std::string& cache_path) { } } -void FileCacheManager::clean_timeout_caches() { - std::shared_lock<std::shared_mutex> rdlock(_cache_map_lock); - for (std::map<std::string, FileCachePtr>::const_iterator iter = _file_cache_map.cbegin(); - iter != _file_cache_map.cend(); ++iter) { - if (iter->second == nullptr) { - continue; +void FileCacheManager::_add_file_cache_for_gc_by_disk(std::vector<GCContextPerDisk>& contexts, + FileCachePtr file_cache) { + // sort file cache by last match time + if (config::file_cache_max_size_per_disk_gb > 0) { + auto file_size = file_cache->cache_file_size(); + if (file_size <= 0) { + return; + } + for (size_t i = 0; i < contexts.size(); ++i) { + if (contexts[i].try_add_file_cache(file_cache, file_size)) { + break; + } } - iter->second->clean_timeout_cache(); } } +void FileCacheManager::gc_file_caches() { + int64_t gc_conf_size = config::file_cache_max_size_per_disk_gb * 1024 * 1024 * 1024; + std::vector<GCContextPerDisk> contexts; + // init for GC by disk size + if (gc_conf_size > 0) { + std::vector<DataDir*> data_dirs = doris::StorageEngine::instance()->get_stores(); + contexts.resize(data_dirs.size()); + for (size_t i = 0; i < contexts.size(); ++i) { + contexts[i].init(data_dirs[i]->path(), gc_conf_size); + } + } -void FileCacheManager::clean_timeout_file_not_in_mem(const std::string& cache_path) { - time_t now = time(nullptr); + // process unused file caches std::shared_lock<std::shared_mutex> rdlock(_cache_map_lock); - // Deal with caches not in _file_cache_map - if (_file_cache_map.find(cache_path) == _file_cache_map.end()) { - std::vector<Path> cache_file_names; - if (io::global_local_filesystem()->list(cache_path, &cache_file_names).ok()) { - std::map<std::string, bool> cache_names; - std::list<std::string> done_names; - for (Path cache_file_name : cache_file_names) { - std::string filename = cache_file_name.native(); - if (!ends_with(filename, CACHE_DONE_FILE_SUFFIX)) { - cache_names[filename] = true; - continue; - } - done_names.push_back(filename); - std::stringstream done_file_ss; - done_file_ss << cache_path << "/" << filename; - std::string done_file_path = done_file_ss.str(); - time_t m_time; - if (!FileUtils::mtime(done_file_path, &m_time).ok()) { - continue; - } - if (now - m_time < config::file_cache_alive_time_sec) { - continue; - } - std::string cache_file_path = - StringReplace(done_file_path, CACHE_DONE_FILE_SUFFIX, "", true); - LOG(INFO) << "Delete timeout done_cache_path: " << done_file_path - << ", cache_file_path: " << cache_file_path << ", m_time: " << m_time; - if (!io::global_local_filesystem()->delete_file(done_file_path).ok()) { - LOG(ERROR) << "delete_file failed: " << done_file_path; + std::vector<TabletSharedPtr> tablets = + StorageEngine::instance()->tablet_manager()->get_all_tablet(); + for (const auto& tablet : tablets) { + std::vector<Path> seg_file_paths; + if (io::global_local_filesystem()->list(tablet->tablet_path(), &seg_file_paths).ok()) { + for (Path seg_file : seg_file_paths) { + std::string seg_filename = seg_file.native(); + // check if it is a dir name + if (ends_with(seg_filename, ".dat")) { continue; } - if (!io::global_local_filesystem()->delete_file(cache_file_path).ok()) { - LOG(ERROR) << "delete_file failed: " << cache_file_path; + // skip file cache already in memory + std::stringstream ss; + ss << tablet->tablet_path() << "/" << seg_filename; + std::string cache_path = ss.str(); + if (_file_cache_map.find(cache_path) != _file_cache_map.end()) { Review Comment: DummyFileCache will be cleared after each GC loop -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org