The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=88c8cba0439599c00d9d3d586a66a035004e6e84

commit 88c8cba0439599c00d9d3d586a66a035004e6e84
Author:     Alan Cox <a...@freebsd.org>
AuthorDate: 2025-06-30 07:36:12 +0000
Commit:     Alan Cox <a...@freebsd.org>
CommitDate: 2025-07-06 17:08:37 +0000

    amd64 pmap: preallocate another page table page in pmap_demote_DMAP()
    
    To avoid a possible panic in pmap_demote_DMAP(), preallocate the page
    table page that may be needed by pmap_demote_pde() before acquiring the
    kernel pmap lock, so that we can wait until the allocation succeeds.
    
    Reviewed by:    kib
    MFC after:      1 week
    Differential Revision:  https://reviews.freebsd.org/D51091
---
 sys/amd64/amd64/pmap.c | 121 +++++++++++++++++++++++++++++--------------------
 1 file changed, 73 insertions(+), 48 deletions(-)

diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index 0044f27729f6..cae5436a1ff2 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -1301,6 +1301,8 @@ static int pmap_change_props_locked(vm_offset_t va, 
vm_size_t size,
 static bool    pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va);
 static bool    pmap_demote_pde_locked(pmap_t pmap, pd_entry_t *pde,
     vm_offset_t va, struct rwlock **lockp);
+static bool    pmap_demote_pde_mpte(pmap_t pmap, pd_entry_t *pde,
+    vm_offset_t va, struct rwlock **lockp, vm_page_t mpte);
 static bool    pmap_demote_pdpe(pmap_t pmap, pdp_entry_t *pdpe,
     vm_offset_t va, vm_page_t m);
 static int     pmap_enter_2mpage(pmap_t pmap, vm_offset_t va, vm_page_t m,
@@ -6010,12 +6012,18 @@ pmap_demote_pde_abort(pmap_t pmap, vm_offset_t va, 
pd_entry_t *pde,
 static bool
 pmap_demote_pde_locked(pmap_t pmap, pd_entry_t *pde, vm_offset_t va,
     struct rwlock **lockp)
+{
+       return (pmap_demote_pde_mpte(pmap, pde, va, lockp, NULL));
+}
+
+static bool
+pmap_demote_pde_mpte(pmap_t pmap, pd_entry_t *pde, vm_offset_t va,
+    struct rwlock **lockp, vm_page_t mpte)
 {
        pd_entry_t newpde, oldpde;
        pt_entry_t *firstpte, newpte;
        pt_entry_t PG_A, PG_G, PG_M, PG_PKU_MASK, PG_RW, PG_V;
        vm_paddr_t mptepa;
-       vm_page_t mpte;
        int PG_PTE_CACHE;
        bool in_kernel;
 
@@ -6028,61 +6036,65 @@ pmap_demote_pde_locked(pmap_t pmap, pd_entry_t *pde, 
vm_offset_t va,
        PG_PKU_MASK = pmap_pku_mask_bit(pmap);
 
        PMAP_LOCK_ASSERT(pmap, MA_OWNED);
-       in_kernel = va >= VM_MAXUSER_ADDRESS;
        oldpde = *pde;
        KASSERT((oldpde & (PG_PS | PG_V)) == (PG_PS | PG_V),
            ("pmap_demote_pde: oldpde is missing PG_PS and/or PG_V"));
-
-       /*
-        * Invalidate the 2MB page mapping and return "failure" if the
-        * mapping was never accessed.
-        */
-       if ((oldpde & PG_A) == 0) {
-               KASSERT((oldpde & PG_W) == 0,
-                   ("pmap_demote_pde: a wired mapping is missing PG_A"));
-               pmap_demote_pde_abort(pmap, va, pde, oldpde, lockp);
-               return (false);
-       }
-
-       mpte = pmap_remove_pt_page(pmap, va);
+       KASSERT((oldpde & PG_MANAGED) == 0 || lockp != NULL,
+           ("pmap_demote_pde: lockp for a managed mapping is NULL"));
+       in_kernel = va >= VM_MAXUSER_ADDRESS;
        if (mpte == NULL) {
-               KASSERT((oldpde & PG_W) == 0,
-                   ("pmap_demote_pde: page table page for a wired mapping"
-                   " is missing"));
-
                /*
-                * If the page table page is missing and the mapping
-                * is for a kernel address, the mapping must belong to
-                * the direct map.  Page table pages are preallocated
-                * for every other part of the kernel address space,
-                * so the direct map region is the only part of the
-                * kernel address space that must be handled here.
+                * Invalidate the 2MB page mapping and return "failure" if the
+                * mapping was never accessed.
                 */
-               KASSERT(!in_kernel || (va >= DMAP_MIN_ADDRESS &&
-                   va < DMAP_MAX_ADDRESS),
-                   ("pmap_demote_pde: No saved mpte for va %#lx", va));
-
-               /*
-                * If the 2MB page mapping belongs to the direct map
-                * region of the kernel's address space, then the page
-                * allocation request specifies the highest possible
-                * priority (VM_ALLOC_INTERRUPT).  Otherwise, the
-                * priority is normal.
-                */
-               mpte = pmap_alloc_pt_page(pmap, pmap_pde_pindex(va),
-                   (in_kernel ? VM_ALLOC_INTERRUPT : 0) | VM_ALLOC_WIRED);
-
-               /*
-                * If the allocation of the new page table page fails,
-                * invalidate the 2MB page mapping and return "failure".
-                */
-               if (mpte == NULL) {
+               if ((oldpde & PG_A) == 0) {
+                       KASSERT((oldpde & PG_W) == 0,
+                   ("pmap_demote_pde: a wired mapping is missing PG_A"));
                        pmap_demote_pde_abort(pmap, va, pde, oldpde, lockp);
                        return (false);
                }
 
-               if (!in_kernel)
-                       mpte->ref_count = NPTEPG;
+               mpte = pmap_remove_pt_page(pmap, va);
+               if (mpte == NULL) {
+                       KASSERT((oldpde & PG_W) == 0,
+    ("pmap_demote_pde: page table page for a wired mapping is missing"));
+
+                       /*
+                        * If the page table page is missing and the mapping
+                        * is for a kernel address, the mapping must belong to
+                        * the direct map.  Page table pages are preallocated
+                        * for every other part of the kernel address space,
+                        * so the direct map region is the only part of the
+                        * kernel address space that must be handled here.
+                        */
+                       KASSERT(!in_kernel || (va >= DMAP_MIN_ADDRESS &&
+                           va < DMAP_MAX_ADDRESS),
+                           ("pmap_demote_pde: No saved mpte for va %#lx", va));
+
+                       /*
+                        * If the 2MB page mapping belongs to the direct map
+                        * region of the kernel's address space, then the page
+                        * allocation request specifies the highest possible
+                        * priority (VM_ALLOC_INTERRUPT).  Otherwise, the
+                        * priority is normal.
+                        */
+                       mpte = pmap_alloc_pt_page(pmap, pmap_pde_pindex(va),
+                           (in_kernel ? VM_ALLOC_INTERRUPT : 0) |
+                           VM_ALLOC_WIRED);
+
+                       /*
+                        * If the allocation of the new page table page fails,
+                        * invalidate the 2MB page mapping and return "failure".
+                        */
+                       if (mpte == NULL) {
+                               pmap_demote_pde_abort(pmap, va, pde, oldpde,
+                                   lockp);
+                               return (false);
+                       }
+
+                       if (!in_kernel)
+                               mpte->ref_count = NPTEPG;
+               }
        }
        mptepa = VM_PAGE_TO_PHYS(mpte);
        firstpte = (pt_entry_t *)PHYS_TO_DMAP(mptepa);
@@ -9977,8 +9989,8 @@ pmap_demote_DMAP(vm_paddr_t base, vm_size_t len, bool 
invalidate)
 {
        pdp_entry_t *pdpe;
        pd_entry_t *pde;
-       vm_page_t m;
        vm_offset_t va;
+       vm_page_t m, mpte;
        bool changed;
 
        if (len == 0)
@@ -9998,6 +10010,11 @@ pmap_demote_DMAP(vm_paddr_t base, vm_size_t len, bool 
invalidate)
                 * x86_mr_split_dmap() function.
                 */
                m = vm_page_alloc_noobj(VM_ALLOC_WIRED | VM_ALLOC_WAITOK);
+               if (len < NBPDR) {
+                       mpte = vm_page_alloc_noobj(VM_ALLOC_WIRED |
+                           VM_ALLOC_WAITOK);
+               } else
+                       mpte = NULL;
 
                PMAP_LOCK(kernel_pmap);
                pdpe = pmap_pdpe(kernel_pmap, va);
@@ -10014,9 +10031,13 @@ pmap_demote_DMAP(vm_paddr_t base, vm_size_t len, bool 
invalidate)
                        if ((*pde & X86_PG_V) == 0)
                                panic("pmap_demote_DMAP: invalid PDE");
                        if ((*pde & PG_PS) != 0) {
-                               if (!pmap_demote_pde(kernel_pmap, pde, va))
+                               mpte->pindex = pmap_pde_pindex(va);
+                               pmap_pt_page_count_adj(kernel_pmap, 1);
+                               if (!pmap_demote_pde_mpte(kernel_pmap, pde, va,
+                                   NULL, mpte))
                                        panic("pmap_demote_DMAP: PDE failed");
                                changed = true;
+                               mpte = NULL;
                        }
                }
                if (changed && invalidate)
@@ -10026,6 +10047,10 @@ pmap_demote_DMAP(vm_paddr_t base, vm_size_t len, bool 
invalidate)
                        vm_page_unwire_noq(m);
                        vm_page_free(m);
                }
+               if (mpte != NULL) {
+                       vm_page_unwire_noq(mpte);
+                       vm_page_free(mpte);
+               }
        }
 }
 

Reply via email to