github-actions[bot] commented on code in PR #37190: URL: https://github.com/apache/doris/pull/37190#discussion_r1673288292
########## be/src/io/cache/block/block_lru_file_cache.cpp: ########## @@ -460,6 +461,187 @@ size_t LRUFileCache::try_release() { return trash.size(); } +std::pair<size_t, size_t> LRUFileCache::try_merge() { + std::unordered_map<Key, std::vector<std::vector<size_t>>, HashCachedFileKey> merged_files; + { + std::lock_guard<std::mutex> l(_mutex); + for (auto& [key, segments] : _files) { + std::vector<std::vector<size_t>> merged_blocks = find_continuous_cells(segments); + if (!merged_blocks.empty()) { + merged_files[key] = merged_blocks; + } + } + } + if (!merged_files.empty()) { + return merge_continuous_cells(merged_files); + } + return {0, 0}; +} + +Status LRUFileCache::async_merge(const Key& key) { + ThreadPool* merge_pool = ExecEnv::GetInstance()->buffered_reader_prefetch_thread_pool(); + return merge_pool->submit_func([file_key = key, this]() { + std::unordered_map<Key, std::vector<std::vector<size_t>>, HashCachedFileKey> merged_files; + { + std::lock_guard<std::mutex> l(_mutex); + if (_files.contains(file_key)) { + std::vector<std::vector<size_t>> merged_blocks = + find_continuous_cells(_files[file_key]); + if (!merged_blocks.empty()) { + merged_files[file_key] = merged_blocks; + } + } + } + if (!merged_files.empty()) { + merge_continuous_cells(merged_files); + } + }); +} + +std::vector<std::vector<size_t>> LRUFileCache::find_continuous_cells( + const FileBlocksByOffset& segments) { + // vector<vector<offset>>, continuous file segments + std::vector<std::vector<size_t>> merged_blocks; + std::vector<size_t> continuous_blocks; + size_t end_offset; + size_t merged_size = 0; + for (auto& [offset, cell] : segments) { + if (cell.file_block->cache_type() != CacheType::NORMAL) { + // Only try to merge the normal segments + break; + } + if (cell.releasable()) { + // The segment file is not reading currently + if (continuous_blocks.empty()) { + continuous_blocks.push_back(offset); + end_offset = offset + cell.size(); + merged_size = cell.size(); + } else if (offset == end_offset) { + if (merged_size >= MAX_MERGED_SIZE) { + if (continuous_blocks.size() > 1) { + merged_blocks.push_back(continuous_blocks); + } + continuous_blocks.clear(); + merged_size = cell.size(); + } else { + merged_size += cell.size(); + } + continuous_blocks.push_back(offset); + end_offset = offset + cell.size(); + } else { + if (continuous_blocks.size() > 1) { + merged_blocks.push_back(continuous_blocks); + } + continuous_blocks.clear(); + continuous_blocks.push_back(offset); + end_offset = offset + cell.size(); + merged_size = cell.size(); + } + } + } + if (continuous_blocks.size() > 1) { + merged_blocks.push_back(continuous_blocks); + } + return merged_blocks; +} + +std::pair<size_t, size_t> LRUFileCache::merge_continuous_cells( Review Comment: warning: function 'merge_continuous_cells' exceeds recommended size/complexity thresholds [readability-function-size] ```cpp std::pair<size_t, size_t> LRUFileCache::merge_continuous_cells( ^ ``` <details> <summary>Additional context</summary> **be/src/io/cache/block/block_lru_file_cache.cpp:547:** 93 lines including whitespace and comments (threshold 80) ```cpp std::pair<size_t, size_t> LRUFileCache::merge_continuous_cells( ^ ``` </details> -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org