github-actions[bot] commented on code in PR #18652: URL: https://github.com/apache/doris/pull/18652#discussion_r1165483316
########## be/src/io/cache/block/block_lru_file_cache.cpp: ########## @@ -408,235 +475,231 @@ bool LRUFileCache::try_reserve(const Key& key, const TUniqueId& query_id, bool i } } removed_size += cell_size; - --queue_size; } } } - auto remove_file_segment_if = [&](FileBlockCell* cell) { - FileBlockSPtr file_segment = cell->file_segment; - if (file_segment) { - size_t file_segment_size = cell->size(); - query_context->remove(file_segment->key(), file_segment->offset(), - file_segment->is_persistent(), file_segment_size, cache_lock); - - std::lock_guard segment_lock(file_segment->_mutex); - remove(file_segment->key(), file_segment->is_persistent(), file_segment->offset(), - cache_lock, segment_lock); + auto remove_file_block_if = [&](FileBlockCell* cell) { + FileBlockSPtr file_block = cell->file_block; + if (file_block) { + query_context->remove(file_block->key(), file_block->offset(), cache_lock); + std::lock_guard segment_lock(file_block->_mutex); + remove(file_block, cache_lock, segment_lock); } }; for (auto& iter : ghost) { - query_context->remove(iter->key, iter->offset, iter->is_persistent, iter->size, cache_lock); + query_context->remove(iter->key, iter->offset, cache_lock); } - std::for_each(trash.begin(), trash.end(), remove_file_segment_if); - std::for_each(to_evict.begin(), to_evict.end(), remove_file_segment_if); + std::for_each(trash.begin(), trash.end(), remove_file_block_if); + std::for_each(to_evict.begin(), to_evict.end(), remove_file_block_if); - if (is_overflow()) { + if (is_overflow() && + !try_reserve_from_other_queue(context.cache_type, size, cur_time, cache_lock)) { return false; } - - query_context->reserve(key, offset, is_persistent, size, cache_lock); + query_context->reserve(key, offset, size, cache_lock); return true; } -bool LRUFileCache::try_reserve_for_main_list(const Key& key, QueryFileCacheContextPtr query_context, - bool is_persistent, size_t offset, size_t size, - std::lock_guard<std::mutex>& cache_lock) { - LRUQueue* queue = is_persistent ? &_persistent_queue : &_queue; - auto removed_size = 0; - size_t queue_size = queue->get_elements_num(cache_lock); - - size_t max_size = is_persistent ? _persistent_max_size : _max_size; - size_t max_element_size = is_persistent ? _persistent_max_element_size : _max_element_size; - auto is_overflow = [&] { - return (queue->get_total_cache_size(cache_lock) + size - removed_size > max_size) || - queue_size >= max_element_size; - }; +std::vector<CacheType> LRUFileCache::get_other_cache_type(CacheType cur_cache_type) { + switch (cur_cache_type) { + case CacheType::INDEX: + return {CacheType::DISPOSABLE, CacheType::NORMAL}; + case CacheType::NORMAL: + return {CacheType::DISPOSABLE, CacheType::INDEX}; + case CacheType::DISPOSABLE: + return {CacheType::NORMAL, CacheType::INDEX}; + default: + return {}; + } + return {}; +} +bool LRUFileCache::try_reserve_from_other_queue(CacheType cur_cache_type, size_t size, + int64_t cur_time, + std::lock_guard<std::mutex>& cache_lock) { + auto other_cache_types = get_other_cache_type(cur_cache_type); + size_t removed_size = 0; + size_t cur_cache_size = _cur_cache_size; + auto is_overflow = [&] { return cur_cache_size + size - removed_size > _total_size; }; std::vector<FileBlockCell*> to_evict; std::vector<FileBlockCell*> trash; + for (CacheType cache_type : other_cache_types) { + auto& queue = get_queue(cache_type); + for (const auto& [entry_key, entry_offset, entry_size] : queue) { + if (!is_overflow()) { + break; + } + auto* cell = get_cell(entry_key, entry_offset, cache_lock); + DCHECK(cell) << "Cache became inconsistent. Key: " << entry_key.to_string() + << ", offset: " << entry_offset; - for (const auto& [entry_key, entry_offset, entry_size, _] : *queue) { - if (!is_overflow()) { - break; - } - auto* cell = get_cell(entry_key, is_persistent, entry_offset, cache_lock); - - DCHECK(cell) << "Cache became inconsistent. Key: " << key.to_string() - << ", offset: " << offset; - - size_t cell_size = cell->size(); - DCHECK(entry_size == cell_size); + size_t cell_size = cell->size(); + DCHECK(entry_size == cell_size); - /// It is guaranteed that cell is not removed from cache as long as - /// pointer to corresponding file segment is hold by any other thread. + if (cell->atime == 0 ? true : cell->atime + queue.get_hot_data_interval() > cur_time) { + break; + } - if (cell->releasable()) { - auto& file_segment = cell->file_segment; + if (cell->releasable()) { + auto& file_block = cell->file_block; - std::lock_guard segment_lock(file_segment->_mutex); + std::lock_guard segment_lock(file_block->_mutex); - switch (file_segment->_download_state) { - case FileBlock::State::DOWNLOADED: { - /// Cell will actually be removed only if - /// we managed to reserve enough space. + switch (file_block->_download_state) { + case FileBlock::State::DOWNLOADED: { + to_evict.push_back(cell); + break; + } + default: { + trash.push_back(cell); + break; + } + } - to_evict.push_back(cell); - break; - } - default: { - trash.push_back(cell); - break; - } + removed_size += cell_size; } - - removed_size += cell_size; - --queue_size; } } - - auto remove_file_segment_if = [&](FileBlockCell* cell) { - FileBlockSPtr file_segment = cell->file_segment; - if (file_segment) { - std::lock_guard segment_lock(file_segment->_mutex); - remove(file_segment->key(), file_segment->is_persistent(), file_segment->offset(), - cache_lock, segment_lock); + auto remove_file_block_if = [&](FileBlockCell* cell) { + FileBlockSPtr file_block = cell->file_block; + if (file_block) { + std::lock_guard segment_lock(file_block->_mutex); + remove(file_block, cache_lock, segment_lock); } }; - std::for_each(trash.begin(), trash.end(), remove_file_segment_if); - std::for_each(to_evict.begin(), to_evict.end(), remove_file_segment_if); + std::for_each(trash.begin(), trash.end(), remove_file_block_if); + std::for_each(to_evict.begin(), to_evict.end(), remove_file_block_if); if (is_overflow()) { return false; } - if (query_context) { - query_context->reserve(key, offset, is_persistent, size, cache_lock); - } return true; } -void LRUFileCache::remove_if_exists(const Key& key, bool is_persistent) { - std::lock_guard cache_lock(_mutex); +bool LRUFileCache::try_reserve_for_lru(const Key& key, QueryFileCacheContextPtr query_context, + const CacheContext& context, size_t offset, size_t size, + std::lock_guard<std::mutex>& cache_lock) { + int64_t cur_time = std::chrono::duration_cast<std::chrono::seconds>( + std::chrono::steady_clock::now().time_since_epoch()) + .count(); + if (!try_reserve_from_other_queue(context.cache_type, size, cur_time, cache_lock)) { + auto& queue = get_queue(context.cache_type); + size_t removed_size = 0; + size_t queue_element_size = queue.get_elements_num(cache_lock); + size_t queue_size = queue.get_total_cache_size(cache_lock); + size_t cur_cache_size = _cur_cache_size; + + size_t max_size = queue.get_max_size(); + size_t max_element_size = queue.get_max_element_size(); + auto is_overflow = [&] { + return cur_cache_size + size - removed_size > _total_size || + (queue_size + size - removed_size > max_size) || + queue_element_size >= max_element_size; + }; + + std::vector<FileBlockCell*> to_evict; + std::vector<FileBlockCell*> trash; + for (const auto& [entry_key, entry_offset, entry_size] : queue) { + if (!is_overflow()) { + break; + } + auto* cell = get_cell(entry_key, entry_offset, cache_lock); - auto file_key = std::make_pair(key, is_persistent); - auto it = _files.find(file_key); - if (it == _files.end()) { - return; - } + DCHECK(cell) << "Cache became inconsistent. Key: " << entry_key.to_string() + << ", offset: " << entry_offset; - auto& offsets = it->second; + size_t cell_size = cell->size(); + DCHECK(entry_size == cell_size); - std::vector<FileBlockCell*> to_remove; - to_remove.reserve(offsets.size()); + if (cell->releasable()) { + auto& file_block = cell->file_block; - for (auto& [offset, cell] : offsets) { - to_remove.push_back(&cell); - } + std::lock_guard segment_lock(file_block->_mutex); - bool some_cells_were_skipped = false; - for (auto& cell : to_remove) { - /// In ordinary case we remove data from cache when it's not used by anyone. - /// But if we have multiple replicated zero-copy tables on the same server - /// it became possible to start removing something from cache when it is used - /// by other "zero-copy" tables. That is why it's not an error. - if (!cell->releasable()) { - some_cells_were_skipped = true; - continue; - } + switch (file_block->_download_state) { + case FileBlock::State::DOWNLOADED: { + /// Cell will actually be removed only if + /// we managed to reserve enough space. + + to_evict.push_back(cell); + break; + } + default: { + trash.push_back(cell); + break; + } + } - auto file_segment = cell->file_segment; - if (file_segment) { - std::lock_guard<std::mutex> segment_lock(file_segment->_mutex); - remove(file_segment->key(), is_persistent, file_segment->offset(), cache_lock, - segment_lock); + removed_size += cell_size; + --queue_element_size; + } } - } - auto key_path = get_path_in_local_cache(key); + auto remove_file_block_if = [&](FileBlockCell* cell) { + FileBlockSPtr file_block = cell->file_block; + if (file_block) { + std::lock_guard segment_lock(file_block->_mutex); + remove(file_block, cache_lock, segment_lock); + } + }; - if (!some_cells_were_skipped) { - _files.erase(file_key); + std::for_each(trash.begin(), trash.end(), remove_file_block_if); + std::for_each(to_evict.begin(), to_evict.end(), remove_file_block_if); - if (fs::exists(key_path)) { - std::error_code ec; - fs::remove_all(key_path, ec); - if (ec) { - LOG(WARNING) << ec.message(); - } + if (is_overflow()) { + return false; } } -} -void LRUFileCache::remove_if_releasable(bool is_persistent) { - /// Try remove all cached files by cache_base_path. - /// Only releasable file segments are evicted. - /// `remove_persistent_files` defines whether non-evictable by some criteria files - /// (they do not comply with the cache eviction policy) should also be removed. - - std::lock_guard cache_lock(_mutex); - LRUQueue* queue = is_persistent ? &_persistent_queue : &_queue; - for (auto it = queue->begin(); it != queue->end();) { - const auto& [key, offset, size, _] = *it++; - auto* cell = get_cell(key, is_persistent, offset, cache_lock); - - DCHECK(cell) << "Cache is in inconsistent state: LRU queue contains entries with no " - "cache cell"; - - if (cell->releasable()) { - auto file_segment = cell->file_segment; - if (file_segment) { - std::lock_guard segment_lock(file_segment->_mutex); - remove(file_segment->key(), is_persistent, file_segment->offset(), cache_lock, - segment_lock); - } - } + if (query_context) { + query_context->reserve(key, offset, size, cache_lock); } + return true; } -void LRUFileCache::remove(const Key& key, bool is_persistent, size_t offset, - std::lock_guard<std::mutex>& cache_lock, - std::lock_guard<std::mutex>& /* segment_lock */) { - LRUQueue* queue = is_persistent ? &_persistent_queue : &_queue; - auto* cell = get_cell(key, is_persistent, offset, cache_lock); - DCHECK(cell) << "No cache cell for key: " << key.to_string() << ", offset: " << offset; +void LRUFileCache::remove(FileBlockSPtr file_block, std::lock_guard<std::mutex>& cache_lock, + std::lock_guard<std::mutex>&) { + auto key = file_block->key(); + auto offset = file_block->offset(); + auto type = file_block->cache_type(); + auto* cell = get_cell(key, offset, cache_lock); + // It will be removed concurrently + if (!cell) [[unlikely]] + return; Review Comment: warning: statement should be inside braces [readability-braces-around-statements] ```suggestion if (!cell) { [[unlikely]] return; } ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org