From: Oven Liyang <[email protected]>

If the current page fault is using the per-VMA lock, and we only released
the lock to wait for I/O completion (e.g., using folio_lock()), then when
the fault is retried after the I/O completes, it should still qualify for
the per-VMA-lock path.

Cc: Russell King <[email protected]>
Cc: Catalin Marinas <[email protected]>
Cc: Will Deacon <[email protected]>
Cc: Huacai Chen <[email protected]>
Cc: WANG Xuerui <[email protected]>
Cc: Madhavan Srinivasan <[email protected]>
Cc: Michael Ellerman <[email protected]>
Cc: Nicholas Piggin <[email protected]>
Cc: Christophe Leroy <[email protected]>
Cc: Paul Walmsley <[email protected]>
Cc: Palmer Dabbelt <[email protected]>
Cc: Albert Ou <[email protected]>
Cc: Alexandre Ghiti <[email protected]>
Cc: Alexander Gordeev <[email protected]>
Cc: Gerald Schaefer <[email protected]>
Cc: Heiko Carstens <[email protected]>
Cc: Vasily Gorbik <[email protected]>
Cc: Christian Borntraeger <[email protected]>
Cc: Sven Schnelle <[email protected]>
Cc: Dave Hansen <[email protected]>
Cc: Andy Lutomirski <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Borislav Petkov <[email protected]>
Cc: [email protected]
Cc: H. Peter Anvin <[email protected]>
Cc: David Hildenbrand <[email protected]>
Cc: Lorenzo Stoakes <[email protected]>
Cc: Liam R. Howlett <[email protected]>
Cc: Vlastimil Babka <[email protected]>
Cc: Mike Rapoport <[email protected]>
Cc: Suren Baghdasaryan <[email protected]>
Cc: Michal Hocko <[email protected]>
Cc: Matthew Wilcox <[email protected]>
Cc: Pedro Falcato <[email protected]>
Cc: Jarkko Sakkinen <[email protected]>
Cc: Oscar Salvador <[email protected]>
Cc: Kuninori Morimoto <[email protected]>
Cc: Mark Rutland <[email protected]>
Cc: Ada Couprie Diaz <[email protected]>
Cc: Robin Murphy <[email protected]>
Cc: Kristina MartÅ¡enko <[email protected]>
Cc: Kevin Brodsky <[email protected]>
Cc: Yeoreum Yun <[email protected]>
Cc: Wentao Guan <[email protected]>
Cc: Thorsten Blum <[email protected]>
Cc: Steven Rostedt <[email protected]>
Cc: Yunhui Cui <[email protected]>
Cc: Nam Cao <[email protected]>
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: Chris Li <[email protected]>
Cc: Kairui Song <[email protected]>
Cc: Kemeng Shi <[email protected]>
Cc: Nhat Pham <[email protected]>
Cc: Baoquan He <[email protected]>
Signed-off-by: Oven Liyang <[email protected]>
Signed-off-by: Barry Song <[email protected]>
---
 arch/arm/mm/fault.c       | 5 +++++
 arch/arm64/mm/fault.c     | 5 +++++
 arch/loongarch/mm/fault.c | 4 ++++
 arch/powerpc/mm/fault.c   | 5 ++++-
 arch/riscv/mm/fault.c     | 4 ++++
 arch/s390/mm/fault.c      | 4 ++++
 arch/x86/mm/fault.c       | 4 ++++
 include/linux/mm_types.h  | 9 +++++----
 mm/filemap.c              | 5 ++++-
 9 files changed, 39 insertions(+), 6 deletions(-)

diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index 2bc828a1940c..49fc0340821c 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -313,6 +313,7 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct 
pt_regs *regs)
        if (!(flags & FAULT_FLAG_USER))
                goto lock_mmap;
 
+retry_vma:
        vma = lock_vma_under_rcu(mm, addr);
        if (!vma)
                goto lock_mmap;
@@ -342,6 +343,10 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct 
pt_regs *regs)
                        goto no_context;
                return 0;
        }
+
+       /* If the first try is only about waiting for the I/O to complete */
+       if (fault & VM_FAULT_RETRY_VMA)
+               goto retry_vma;
 lock_mmap:
 
 retry:
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 125dfa6c613b..842f50b99d3e 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -622,6 +622,7 @@ static int __kprobes do_page_fault(unsigned long far, 
unsigned long esr,
        if (!(mm_flags & FAULT_FLAG_USER))
                goto lock_mmap;
 
+retry_vma:
        vma = lock_vma_under_rcu(mm, addr);
        if (!vma)
                goto lock_mmap;
@@ -668,6 +669,10 @@ static int __kprobes do_page_fault(unsigned long far, 
unsigned long esr,
                        goto no_context;
                return 0;
        }
+
+       /* If the first try is only about waiting for the I/O to complete */
+       if (fault & VM_FAULT_RETRY_VMA)
+               goto retry_vma;
 lock_mmap:
 
 retry:
diff --git a/arch/loongarch/mm/fault.c b/arch/loongarch/mm/fault.c
index 2c93d33356e5..738f495560c0 100644
--- a/arch/loongarch/mm/fault.c
+++ b/arch/loongarch/mm/fault.c
@@ -219,6 +219,7 @@ static void __kprobes __do_page_fault(struct pt_regs *regs,
        if (!(flags & FAULT_FLAG_USER))
                goto lock_mmap;
 
+retry_vma:
        vma = lock_vma_under_rcu(mm, address);
        if (!vma)
                goto lock_mmap;
@@ -265,6 +266,9 @@ static void __kprobes __do_page_fault(struct pt_regs *regs,
                        no_context(regs, write, address);
                return;
        }
+       /* If the first try is only about waiting for the I/O to complete */
+       if (fault & VM_FAULT_RETRY_VMA)
+               goto retry_vma;
 lock_mmap:
 
 retry:
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 806c74e0d5ab..cb7ffc20c760 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -487,6 +487,7 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned 
long address,
        if (!(flags & FAULT_FLAG_USER))
                goto lock_mmap;
 
+retry_vma:
        vma = lock_vma_under_rcu(mm, address);
        if (!vma)
                goto lock_mmap;
@@ -516,7 +517,9 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned 
long address,
 
        if (fault_signal_pending(fault, regs))
                return user_mode(regs) ? 0 : SIGBUS;
-
+       /* If the first try is only about waiting for the I/O to complete */
+       if (fault & VM_FAULT_RETRY_VMA)
+               goto retry_vma;
 lock_mmap:
 
        /* When running in the kernel we expect faults to occur only to
diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c
index 04ed6f8acae4..b94cf57c2b9a 100644
--- a/arch/riscv/mm/fault.c
+++ b/arch/riscv/mm/fault.c
@@ -347,6 +347,7 @@ void handle_page_fault(struct pt_regs *regs)
        if (!(flags & FAULT_FLAG_USER))
                goto lock_mmap;
 
+retry_vma:
        vma = lock_vma_under_rcu(mm, addr);
        if (!vma)
                goto lock_mmap;
@@ -376,6 +377,9 @@ void handle_page_fault(struct pt_regs *regs)
                        no_context(regs, addr);
                return;
        }
+       /* If the first try is only about waiting for the I/O to complete */
+       if (fault & VM_FAULT_RETRY_VMA)
+               goto retry_vma;
 lock_mmap:
 
 retry:
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index e1ad05bfd28a..8d91c6495e13 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -286,6 +286,7 @@ static void do_exception(struct pt_regs *regs, int access)
                flags |= FAULT_FLAG_WRITE;
        if (!(flags & FAULT_FLAG_USER))
                goto lock_mmap;
+retry_vma:
        vma = lock_vma_under_rcu(mm, address);
        if (!vma)
                goto lock_mmap;
@@ -310,6 +311,9 @@ static void do_exception(struct pt_regs *regs, int access)
                        handle_fault_error_nolock(regs, 0);
                return;
        }
+       /* If the first try is only about waiting for the I/O to complete */
+       if (fault & VM_FAULT_RETRY_VMA)
+               goto retry_vma;
 lock_mmap:
 retry:
        vma = lock_mm_and_find_vma(mm, address, regs);
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 998bd807fc7b..6023d0083903 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1324,6 +1324,7 @@ void do_user_addr_fault(struct pt_regs *regs,
        if (!(flags & FAULT_FLAG_USER))
                goto lock_mmap;
 
+retry_vma:
        vma = lock_vma_under_rcu(mm, address);
        if (!vma)
                goto lock_mmap;
@@ -1353,6 +1354,9 @@ void do_user_addr_fault(struct pt_regs *regs,
                                                 ARCH_DEFAULT_PKEY);
                return;
        }
+       /* If the first try is only about waiting for the I/O to complete */
+       if (fault & VM_FAULT_RETRY_VMA)
+               goto retry_vma;
 lock_mmap:
 
 retry:
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index b71625378ce3..12b2d65ef1b9 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -1670,10 +1670,11 @@ enum vm_fault_reason {
        VM_FAULT_NOPAGE         = (__force vm_fault_t)0x000100,
        VM_FAULT_LOCKED         = (__force vm_fault_t)0x000200,
        VM_FAULT_RETRY          = (__force vm_fault_t)0x000400,
-       VM_FAULT_FALLBACK       = (__force vm_fault_t)0x000800,
-       VM_FAULT_DONE_COW       = (__force vm_fault_t)0x001000,
-       VM_FAULT_NEEDDSYNC      = (__force vm_fault_t)0x002000,
-       VM_FAULT_COMPLETED      = (__force vm_fault_t)0x004000,
+       VM_FAULT_RETRY_VMA      = (__force vm_fault_t)0x000800,
+       VM_FAULT_FALLBACK       = (__force vm_fault_t)0x001000,
+       VM_FAULT_DONE_COW       = (__force vm_fault_t)0x002000,
+       VM_FAULT_NEEDDSYNC      = (__force vm_fault_t)0x004000,
+       VM_FAULT_COMPLETED      = (__force vm_fault_t)0x008000,
        VM_FAULT_HINDEX_MASK    = (__force vm_fault_t)0x0f0000,
 };
 
diff --git a/mm/filemap.c b/mm/filemap.c
index 7d15a9c216ef..57dfd2211109 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -3464,6 +3464,7 @@ vm_fault_t filemap_fault(struct vm_fault *vmf)
        struct folio *folio;
        vm_fault_t ret = 0;
        bool mapping_locked = false;
+       bool retry_by_vma_lock = false;
 
        max_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
        if (unlikely(index >= max_idx))
@@ -3560,6 +3561,8 @@ vm_fault_t filemap_fault(struct vm_fault *vmf)
         */
        if (fpin) {
                folio_unlock(folio);
+               if (vmf->flags & FAULT_FLAG_VMA_LOCK)
+                       retry_by_vma_lock = true;
                goto out_retry;
        }
        if (mapping_locked)
@@ -3610,7 +3613,7 @@ vm_fault_t filemap_fault(struct vm_fault *vmf)
                filemap_invalidate_unlock_shared(mapping);
        if (fpin)
                fput(fpin);
-       return ret | VM_FAULT_RETRY;
+       return ret | VM_FAULT_RETRY | (retry_by_vma_lock ? VM_FAULT_RETRY_VMA : 
0);
 }
 EXPORT_SYMBOL(filemap_fault);
 
-- 
2.39.3 (Apple Git-146)


Reply via email to