The commit is pushed to "branch-rh9-5.14.0-284.25.1.vz9.30.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git after rh9-5.14.0-284.25.1.vz9.30.19 ------> commit 6e85be54c23a1232da56e44bf5a43ded75fb1310 Author: Matthew Wilcox (Oracle) <wi...@infradead.org> Date: Thu Feb 1 14:33:16 2024 +0800
ms/mm/swap: convert __read_swap_cache_async() to use a folio Remove a few hidden (and one visible) calls to compound_head(). Link: https://lkml.kernel.org/r/20220902194653.1739778-12-wi...@infradead.org Signed-off-by: Matthew Wilcox (Oracle) <wi...@infradead.org> Signed-off-by: Andrew Morton <a...@linux-foundation.org> (cherry picked from commit a0d3374b070776e985bbd7b165b178fa688bf37a) Change: Also update vz specific hunk SetPageActive->folio_set_active. https://virtuozzo.atlassian.net/browse/PSBM-153264 Signed-off-by: Pavel Tikhomirov <ptikhomi...@virtuozzo.com> Feature: fix ms/mm ====== Patchset description: mm/swap: fix swapcache page/folio inconsistency This ports the switching from page to folio in add_to_swap_cache(). We saw multiple different hangs on mmap_lock, where the task holding the lock was livelocked spinning in this stack: +-> __x64_sys_ioctl +-> kvm_vcpu_ioctl +-> kvm_arch_vcpu_ioctl_run +-> vcpu_run +-> vcpu_enter_guest +-> kvm_mmu_page_fault +-> kvm_tdp_page_fault +-> kvm_faultin_pfn +-> __kvm_faultin_pfn +-> hva_to_pfn +-> get_user_pages_unlocked +-> get_user_pages_unlocked +-> mmap_read_lock # 1 +-> __get_user_pages_locked # 2 +-> for-loop # taken once +-> __get_user_pages +-> retry-loop # constantly spinning +-> faultin_page # return 0 to trigger retry +-> handle_mm_fault +-> __handle_mm_fault +-> handle_pte_fault +-> do_swap_page +-> lookup_swap_cache # returns non-NULL +-> if (swapcache) +-> if (!folio_test_swapcache || page_private(page) != entry.val) +-> goto out_page +-> return 0 That can be due to an inconsistency in swapcache flag setting/reading, one can see that PageSwapCache reads the flag from folio, but SetPageSwapCache/ClearPageSwapCache instead affect the flag from page. After applying those patches SetPageSwapCache/ClearPageSwapCache become unused, thus all paths seek this flag from folio now. With it I don't see any hangs on mmap_lock anymore (on the same test setup). Matthew Wilcox (Oracle) (2): mm/swap: convert __read_swap_cache_async() to use a folio mm/swap: convert add_to_swap_cache() to take a folio --- mm/swap_state.c | 39 ++++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/mm/swap_state.c b/mm/swap_state.c index a55c42e2cbc4..32395ef698d3 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -427,7 +427,7 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, bool *new_page_allocated, bool activate) { struct swap_info_struct *si; - struct page *page; + struct folio *folio; void *shadow = NULL; *new_page_allocated = false; @@ -442,11 +442,11 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, si = get_swap_device(entry); if (!si) return NULL; - page = find_get_page(swap_address_space(entry), - swp_offset(entry)); + folio = filemap_get_folio(swap_address_space(entry), + swp_offset(entry)); put_swap_device(si); - if (page) - return page; + if (folio) + return folio_file_page(folio, swp_offset(entry)); /* * Just skip read ahead for unused swap slot. @@ -464,8 +464,8 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, * before marking swap_map SWAP_HAS_CACHE, when -EEXIST will * cause any racers to loop around until we add it to cache. */ - page = alloc_page_vma(gfp_mask, vma, addr); - if (!page) + folio = vma_alloc_folio(gfp_mask, 0, vma, addr, false); + if (!folio) return NULL; /* @@ -475,7 +475,7 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, if (!err) break; - put_page(page); + folio_put(folio); if (err != -EEXIST) return NULL; @@ -493,32 +493,33 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, * The swap entry is ours to swap in. Prepare the new page. */ - __SetPageLocked(page); - __SetPageSwapBacked(page); + __folio_set_locked(folio); + __folio_set_swapbacked(folio); - if (mem_cgroup_swapin_charge_page(page, NULL, gfp_mask, entry)) + if (mem_cgroup_swapin_charge_page(&folio->page, NULL, gfp_mask, entry)) goto fail_unlock; /* May fail (-ENOMEM) if XArray node allocation failed. */ - if (add_to_swap_cache(page, entry, gfp_mask & GFP_RECLAIM_MASK, &shadow)) + if (add_to_swap_cache(&folio->page, entry, gfp_mask & GFP_RECLAIM_MASK, &shadow)) goto fail_unlock; mem_cgroup_swapin_uncharge_swap(entry); if (shadow) - workingset_refault(page_folio(page), shadow); + workingset_refault(folio, shadow); /* Caller will initiate read into locked page */ if (activate) - SetPageActive(page); - lru_cache_add(page); + folio_set_active(folio); + /* Caller will initiate read into locked folio */ + folio_add_lru(folio); *new_page_allocated = true; - return page; + return &folio->page; fail_unlock: - put_swap_page(page, entry); - unlock_page(page); - put_page(page); + put_swap_page(&folio->page, entry); + folio_unlock(folio); + folio_put(folio); return NULL; } _______________________________________________ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel