From: Oven Liyang <[email protected]> If the current page fault is using the per-VMA lock, and we only released the lock to wait for I/O completion (e.g., using folio_lock()), then when the fault is retried after the I/O completes, it should still qualify for the per-VMA-lock path.
Cc: Russell King <[email protected]> Cc: Catalin Marinas <[email protected]> Cc: Will Deacon <[email protected]> Cc: Huacai Chen <[email protected]> Cc: WANG Xuerui <[email protected]> Cc: Madhavan Srinivasan <[email protected]> Cc: Michael Ellerman <[email protected]> Cc: Nicholas Piggin <[email protected]> Cc: Christophe Leroy <[email protected]> Cc: Paul Walmsley <[email protected]> Cc: Palmer Dabbelt <[email protected]> Cc: Albert Ou <[email protected]> Cc: Alexandre Ghiti <[email protected]> Cc: Alexander Gordeev <[email protected]> Cc: Gerald Schaefer <[email protected]> Cc: Heiko Carstens <[email protected]> Cc: Vasily Gorbik <[email protected]> Cc: Christian Borntraeger <[email protected]> Cc: Sven Schnelle <[email protected]> Cc: Dave Hansen <[email protected]> Cc: Andy Lutomirski <[email protected]> Cc: Peter Zijlstra <[email protected]> Cc: Thomas Gleixner <[email protected]> Cc: Ingo Molnar <[email protected]> Cc: Borislav Petkov <[email protected]> Cc: [email protected] Cc: H. Peter Anvin <[email protected]> Cc: David Hildenbrand <[email protected]> Cc: Lorenzo Stoakes <[email protected]> Cc: Liam R. Howlett <[email protected]> Cc: Vlastimil Babka <[email protected]> Cc: Mike Rapoport <[email protected]> Cc: Suren Baghdasaryan <[email protected]> Cc: Michal Hocko <[email protected]> Cc: Matthew Wilcox <[email protected]> Cc: Pedro Falcato <[email protected]> Cc: Jarkko Sakkinen <[email protected]> Cc: Oscar Salvador <[email protected]> Cc: Kuninori Morimoto <[email protected]> Cc: Mark Rutland <[email protected]> Cc: Ada Couprie Diaz <[email protected]> Cc: Robin Murphy <[email protected]> Cc: Kristina Martšenko <[email protected]> Cc: Kevin Brodsky <[email protected]> Cc: Yeoreum Yun <[email protected]> Cc: Wentao Guan <[email protected]> Cc: Thorsten Blum <[email protected]> Cc: Steven Rostedt <[email protected]> Cc: Yunhui Cui <[email protected]> Cc: Nam Cao <[email protected]> Cc: [email protected] Cc: [email protected] Cc: [email protected] Cc: [email protected] Cc: [email protected] Cc: [email protected] Cc: [email protected] Cc: [email protected] Cc: Chris Li <[email protected]> Cc: Kairui Song <[email protected]> Cc: Kemeng Shi <[email protected]> Cc: Nhat Pham <[email protected]> Cc: Baoquan He <[email protected]> Signed-off-by: Oven Liyang <[email protected]> Signed-off-by: Barry Song <[email protected]> --- arch/arm/mm/fault.c | 5 +++++ arch/arm64/mm/fault.c | 5 +++++ arch/loongarch/mm/fault.c | 4 ++++ arch/powerpc/mm/fault.c | 5 ++++- arch/riscv/mm/fault.c | 4 ++++ arch/s390/mm/fault.c | 4 ++++ arch/x86/mm/fault.c | 4 ++++ include/linux/mm_types.h | 9 +++++---- mm/filemap.c | 5 ++++- 9 files changed, 39 insertions(+), 6 deletions(-) diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index 2bc828a1940c..49fc0340821c 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -313,6 +313,7 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) if (!(flags & FAULT_FLAG_USER)) goto lock_mmap; +retry_vma: vma = lock_vma_under_rcu(mm, addr); if (!vma) goto lock_mmap; @@ -342,6 +343,10 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) goto no_context; return 0; } + + /* If the first try is only about waiting for the I/O to complete */ + if (fault & VM_FAULT_RETRY_VMA) + goto retry_vma; lock_mmap: retry: diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 125dfa6c613b..842f50b99d3e 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -622,6 +622,7 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr, if (!(mm_flags & FAULT_FLAG_USER)) goto lock_mmap; +retry_vma: vma = lock_vma_under_rcu(mm, addr); if (!vma) goto lock_mmap; @@ -668,6 +669,10 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr, goto no_context; return 0; } + + /* If the first try is only about waiting for the I/O to complete */ + if (fault & VM_FAULT_RETRY_VMA) + goto retry_vma; lock_mmap: retry: diff --git a/arch/loongarch/mm/fault.c b/arch/loongarch/mm/fault.c index 2c93d33356e5..738f495560c0 100644 --- a/arch/loongarch/mm/fault.c +++ b/arch/loongarch/mm/fault.c @@ -219,6 +219,7 @@ static void __kprobes __do_page_fault(struct pt_regs *regs, if (!(flags & FAULT_FLAG_USER)) goto lock_mmap; +retry_vma: vma = lock_vma_under_rcu(mm, address); if (!vma) goto lock_mmap; @@ -265,6 +266,9 @@ static void __kprobes __do_page_fault(struct pt_regs *regs, no_context(regs, write, address); return; } + /* If the first try is only about waiting for the I/O to complete */ + if (fault & VM_FAULT_RETRY_VMA) + goto retry_vma; lock_mmap: retry: diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 806c74e0d5ab..cb7ffc20c760 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -487,6 +487,7 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address, if (!(flags & FAULT_FLAG_USER)) goto lock_mmap; +retry_vma: vma = lock_vma_under_rcu(mm, address); if (!vma) goto lock_mmap; @@ -516,7 +517,9 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address, if (fault_signal_pending(fault, regs)) return user_mode(regs) ? 0 : SIGBUS; - + /* If the first try is only about waiting for the I/O to complete */ + if (fault & VM_FAULT_RETRY_VMA) + goto retry_vma; lock_mmap: /* When running in the kernel we expect faults to occur only to diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c index 04ed6f8acae4..b94cf57c2b9a 100644 --- a/arch/riscv/mm/fault.c +++ b/arch/riscv/mm/fault.c @@ -347,6 +347,7 @@ void handle_page_fault(struct pt_regs *regs) if (!(flags & FAULT_FLAG_USER)) goto lock_mmap; +retry_vma: vma = lock_vma_under_rcu(mm, addr); if (!vma) goto lock_mmap; @@ -376,6 +377,9 @@ void handle_page_fault(struct pt_regs *regs) no_context(regs, addr); return; } + /* If the first try is only about waiting for the I/O to complete */ + if (fault & VM_FAULT_RETRY_VMA) + goto retry_vma; lock_mmap: retry: diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index e1ad05bfd28a..8d91c6495e13 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -286,6 +286,7 @@ static void do_exception(struct pt_regs *regs, int access) flags |= FAULT_FLAG_WRITE; if (!(flags & FAULT_FLAG_USER)) goto lock_mmap; +retry_vma: vma = lock_vma_under_rcu(mm, address); if (!vma) goto lock_mmap; @@ -310,6 +311,9 @@ static void do_exception(struct pt_regs *regs, int access) handle_fault_error_nolock(regs, 0); return; } + /* If the first try is only about waiting for the I/O to complete */ + if (fault & VM_FAULT_RETRY_VMA) + goto retry_vma; lock_mmap: retry: vma = lock_mm_and_find_vma(mm, address, regs); diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 998bd807fc7b..6023d0083903 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -1324,6 +1324,7 @@ void do_user_addr_fault(struct pt_regs *regs, if (!(flags & FAULT_FLAG_USER)) goto lock_mmap; +retry_vma: vma = lock_vma_under_rcu(mm, address); if (!vma) goto lock_mmap; @@ -1353,6 +1354,9 @@ void do_user_addr_fault(struct pt_regs *regs, ARCH_DEFAULT_PKEY); return; } + /* If the first try is only about waiting for the I/O to complete */ + if (fault & VM_FAULT_RETRY_VMA) + goto retry_vma; lock_mmap: retry: diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index b71625378ce3..12b2d65ef1b9 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -1670,10 +1670,11 @@ enum vm_fault_reason { VM_FAULT_NOPAGE = (__force vm_fault_t)0x000100, VM_FAULT_LOCKED = (__force vm_fault_t)0x000200, VM_FAULT_RETRY = (__force vm_fault_t)0x000400, - VM_FAULT_FALLBACK = (__force vm_fault_t)0x000800, - VM_FAULT_DONE_COW = (__force vm_fault_t)0x001000, - VM_FAULT_NEEDDSYNC = (__force vm_fault_t)0x002000, - VM_FAULT_COMPLETED = (__force vm_fault_t)0x004000, + VM_FAULT_RETRY_VMA = (__force vm_fault_t)0x000800, + VM_FAULT_FALLBACK = (__force vm_fault_t)0x001000, + VM_FAULT_DONE_COW = (__force vm_fault_t)0x002000, + VM_FAULT_NEEDDSYNC = (__force vm_fault_t)0x004000, + VM_FAULT_COMPLETED = (__force vm_fault_t)0x008000, VM_FAULT_HINDEX_MASK = (__force vm_fault_t)0x0f0000, }; diff --git a/mm/filemap.c b/mm/filemap.c index 7d15a9c216ef..57dfd2211109 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -3464,6 +3464,7 @@ vm_fault_t filemap_fault(struct vm_fault *vmf) struct folio *folio; vm_fault_t ret = 0; bool mapping_locked = false; + bool retry_by_vma_lock = false; max_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); if (unlikely(index >= max_idx)) @@ -3560,6 +3561,8 @@ vm_fault_t filemap_fault(struct vm_fault *vmf) */ if (fpin) { folio_unlock(folio); + if (vmf->flags & FAULT_FLAG_VMA_LOCK) + retry_by_vma_lock = true; goto out_retry; } if (mapping_locked) @@ -3610,7 +3613,7 @@ vm_fault_t filemap_fault(struct vm_fault *vmf) filemap_invalidate_unlock_shared(mapping); if (fpin) fput(fpin); - return ret | VM_FAULT_RETRY; + return ret | VM_FAULT_RETRY | (retry_by_vma_lock ? VM_FAULT_RETRY_VMA : 0); } EXPORT_SYMBOL(filemap_fault); -- 2.39.3 (Apple Git-146)
