pengxiangyu commented on code in PR #12897:
URL: https://github.com/apache/doris/pull/12897#discussion_r991766090


##########
be/src/io/cache/file_cache_manager.cpp:
##########
@@ -56,92 +82,101 @@ void FileCacheManager::remove_file_cache(const 
std::string& cache_path) {
     }
 }
 
-void FileCacheManager::clean_timeout_caches() {
-    std::shared_lock<std::shared_mutex> rdlock(_cache_map_lock);
-    for (std::map<std::string, FileCachePtr>::const_iterator iter = 
_file_cache_map.cbegin();
-         iter != _file_cache_map.cend(); ++iter) {
-        if (iter->second == nullptr) {
-            continue;
+void 
FileCacheManager::_add_file_cache_for_gc_by_disk(std::vector<GCContextPerDisk>& 
contexts,
+                                                      FileCachePtr file_cache) 
{
+    // sort file cache by last match time
+    if (config::file_cache_max_size_per_disk > 0) {
+        auto file_size = file_cache->cache_file_size();
+        if (file_size <= 0) {
+            return;
+        }
+        for (size_t i = 0; i < contexts.size(); ++i) {
+            if (contexts[i].try_add_file_cache(file_cache, file_size)) {
+                break;
+            }
         }
-        iter->second->clean_timeout_cache();
     }
 }
 
-void FileCacheManager::clean_timeout_file_not_in_mem(const std::string& 
cache_path) {
-    time_t now = time(nullptr);
-    std::shared_lock<std::shared_mutex> rdlock(_cache_map_lock);
-    // Deal with caches not in _file_cache_map
-    if (_file_cache_map.find(cache_path) == _file_cache_map.end()) {
-        std::vector<Path> cache_file_names;
-        if (io::global_local_filesystem()->list(cache_path, 
&cache_file_names).ok()) {
-            std::map<std::string, bool> cache_names;
-            std::list<std::string> done_names;
-            for (Path cache_file_name : cache_file_names) {
-                std::string filename = cache_file_name.native();
-                if (!ends_with(filename, CACHE_DONE_FILE_SUFFIX)) {
-                    cache_names[filename] = true;
-                    continue;
-                }
-                done_names.push_back(filename);
-                std::stringstream done_file_ss;
-                done_file_ss << cache_path << "/" << filename;
-                std::string done_file_path = done_file_ss.str();
-                time_t m_time;
-                if (!FileUtils::mtime(done_file_path, &m_time).ok()) {
-                    continue;
-                }
-                if (now - m_time < config::file_cache_alive_time_sec) {
-                    continue;
-                }
-                std::string cache_file_path =
-                        StringReplace(done_file_path, CACHE_DONE_FILE_SUFFIX, 
"", true);
-                LOG(INFO) << "Delete timeout done_cache_path: " << 
done_file_path
-                          << ", cache_file_path: " << cache_file_path << ", 
m_time: " << m_time;
-                if 
(!io::global_local_filesystem()->delete_file(done_file_path).ok()) {
-                    LOG(ERROR) << "delete_file failed: " << done_file_path;
-                    continue;
-                }
-                if 
(!io::global_local_filesystem()->delete_file(cache_file_path).ok()) {
-                    LOG(ERROR) << "delete_file failed: " << cache_file_path;
-                    continue;
-                }
-            }
-            // find cache file without done file.
-            for (std::list<std::string>::iterator itr = done_names.begin(); 
itr != done_names.end();
-                 ++itr) {
-                std::string cache_filename = StringReplace(*itr, 
CACHE_DONE_FILE_SUFFIX, "", true);
-                if (cache_names.find(cache_filename) != cache_names.end()) {
-                    cache_names.erase(cache_filename);
-                }
-            }
-            // remove cache file without done file
-            for (std::map<std::string, bool>::iterator itr = 
cache_names.begin();
-                 itr != cache_names.end(); ++itr) {
-                std::stringstream cache_file_ss;
-                cache_file_ss << cache_path << "/" << itr->first;
-                std::string cache_file_path = cache_file_ss.str();
-                time_t m_time;
-                if (!FileUtils::mtime(cache_file_path, &m_time).ok()) {
+void FileCacheManager::_gc_unused_file_caches(std::list<FileCachePtr>& result) 
{
+    std::vector<TabletSharedPtr> tablets =
+            StorageEngine::instance()->tablet_manager()->get_all_tablet();
+    for (const auto& tablet : tablets) {
+        std::vector<Path> seg_file_paths;
+        if (io::global_local_filesystem()->list(tablet->tablet_path(), 
&seg_file_paths).ok()) {
+            for (Path seg_file : seg_file_paths) {
+                std::string seg_filename = seg_file.native();
+                // check if it is a dir name
+                if (ends_with(seg_filename, ".dat")) {
                     continue;
                 }
-                if (now - m_time < config::file_cache_alive_time_sec) {
+                // skip file cache already in memory
+                std::stringstream ss;
+                ss << tablet->tablet_path() << "/" << seg_filename;
+                std::string cache_path = ss.str();
+                if (exist(cache_path)) {

Review Comment:
   unused file cache has the cache_path. It may be: if (!exist(cache_path)) {



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to