git: 5ee5c40402c9 - main - arm64 pmap: Defer bti lookup
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=5ee5c40402c92a498ed8d6eeb6cf0b5c1680817b commit 5ee5c40402c92a498ed8d6eeb6cf0b5c1680817b Author: Alan Cox AuthorDate: 2024-06-07 05:23:59 + Commit: Alan Cox CommitDate: 2024-06-08 07:26:55 + arm64 pmap: Defer bti lookup Defer the bti lookup until after page table page allocation is complete. We sometimes release the pmap lock and sleep during page table page allocation. Consequently, the result of a bti lookup from before page table page allocation could be stale when we finally create the mapping based on it. Modify pmap_bti_same() to update the prototype PTE at the same time as checking the address range. This eliminates the need for calling pmap_pte_bti() in addition to pmap_bti_same(). pmap_bti_same() was already doing most of the work of pmap_pte_bti(). Reviewed by:markj Differential Revision: https://reviews.freebsd.org/D45502 --- sys/arm64/arm64/pmap.c | 73 ++ 1 file changed, 44 insertions(+), 29 deletions(-) diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c index 92c1c824ba4e..7b30b2a6ae37 100644 --- a/sys/arm64/arm64/pmap.c +++ b/sys/arm64/arm64/pmap.c @@ -508,7 +508,8 @@ static void pmap_update_entry(pmap_t pmap, pd_entry_t *pte, pd_entry_t newpte, static __inline vm_page_t pmap_remove_pt_page(pmap_t pmap, vm_offset_t va); static uma_zone_t pmap_bti_ranges_zone; -static bool pmap_bti_same(pmap_t pmap, vm_offset_t sva, vm_offset_t eva); +static bool pmap_bti_same(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, +pt_entry_t *pte); static pt_entry_t pmap_pte_bti(pmap_t pmap, vm_offset_t va); static void pmap_bti_on_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva); static void *bti_dup_range(void *ctx, void *data); @@ -4955,21 +4956,22 @@ set_l3: #endif /* VM_NRESERVLEVEL > 0 */ static int -pmap_enter_largepage(pmap_t pmap, vm_offset_t va, pt_entry_t newpte, int flags, +pmap_enter_largepage(pmap_t pmap, vm_offset_t va, pt_entry_t pte, int flags, int psind) { - pd_entry_t *l0p, *l1p, *l2p, origpte; + pd_entry_t *l0p, *l1p, *l2p, newpte, origpte; vm_page_t mp; PMAP_LOCK_ASSERT(pmap, MA_OWNED); KASSERT(psind > 0 && psind < MAXPAGESIZES, ("psind %d unexpected", psind)); - KASSERT((PTE_TO_PHYS(newpte) & (pagesizes[psind] - 1)) == 0, - ("unaligned phys address %#lx newpte %#lx psind %d", - PTE_TO_PHYS(newpte), newpte, psind)); + KASSERT((PTE_TO_PHYS(pte) & (pagesizes[psind] - 1)) == 0, + ("unaligned phys address %#lx pte %#lx psind %d", + PTE_TO_PHYS(pte), pte, psind)); restart: - if (!pmap_bti_same(pmap, va, va + pagesizes[psind])) + newpte = pte; + if (!pmap_bti_same(pmap, va, va + pagesizes[psind], &newpte)) return (KERN_PROTECTION_FAILURE); if (psind == 2) { PMAP_ASSERT_L1_BLOCKS_SUPPORTED; @@ -5123,9 +5125,6 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, lock = NULL; PMAP_LOCK(pmap); - /* Wait until we lock the pmap to protect the bti rangeset */ - new_l3 |= pmap_pte_bti(pmap, va); - if ((flags & PMAP_ENTER_LARGEPAGE) != 0) { KASSERT((m->oflags & VPO_UNMANAGED) != 0, ("managed largepage va %#lx flags %#x", va, flags)); @@ -5197,6 +5196,7 @@ havel3: orig_l3 = pmap_load(l3); opa = PTE_TO_PHYS(orig_l3); pv = NULL; + new_l3 |= pmap_pte_bti(pmap, va); /* * Is the specified virtual address already mapped? @@ -5405,7 +5405,6 @@ pmap_enter_l2_rx(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, new_l2 = (pd_entry_t)(VM_PAGE_TO_PTE(m) | ATTR_DEFAULT | ATTR_S1_IDX(m->md.pv_memattr) | ATTR_S1_AP(ATTR_S1_AP_RO) | L2_BLOCK); - new_l2 |= pmap_pte_bti(pmap, va); if ((m->oflags & VPO_UNMANAGED) == 0) { new_l2 |= ATTR_SW_MANAGED; new_l2 &= ~ATTR_AF; @@ -5478,7 +5477,7 @@ pmap_enter_l2(pmap_t pmap, vm_offset_t va, pd_entry_t new_l2, u_int flags, * and let vm_fault() cope. Check after l2 allocation, since * it could sleep. */ - if (!pmap_bti_same(pmap, va, va + L2_SIZE)) { + if (!pmap_bti_same(pmap, va, va + L2_SIZE, &new_l2)) { KASSERT(l2pg != NULL, ("pmap_enter_l2: missing L2 PTP")); pmap_abort_ptp(pmap, va, l2pg); return (KERN_PROTECTION_FAILURE); @@ -5633,7 +5632,6 @@ pmap_enter_l3c_rx(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_page_t *ml3p, l3e = VM_PAGE_TO_PTE(m) | ATTR_DEFAULT | ATTR_S1_IDX(m->md.pv_memattr) | ATTR_S1_AP(ATTR_S
git: 268f19aacc6a - main - vm: Reduce address space fragmentation
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=268f19aacc6af8f64c438e8515213023a2e66ed7 commit 268f19aacc6af8f64c438e8515213023a2e66ed7 Author: Alan Cox AuthorDate: 2024-06-09 16:58:27 + Commit: Alan Cox CommitDate: 2024-06-13 20:13:45 + vm: Reduce address space fragmentation jemalloc performs two types of virtual memory allocations: (1) large chunks of virtual memory, where the chunk size is a multiple of a superpage and explicitly aligned, and (2) small allocations, mostly 128KB, where no alignment is requested. Typically, it starts with a small allocation, and over time it makes both types of allocation. With anon_loc being updated on every allocation, we wind up with a repeating pattern of a small allocation, a large gap, and a large, aligned allocation. (As an aside, we wind up allocating a reservation for these small allocations, but it will never fill because the next large, aligned allocation updates anon_loc, leaving a gap that will never be filled with other small allocations.) With this change, anon_loc isn't updated on every allocation. So, the small allocations will be clustered together, the large allocations will be clustered together, and there will be fewer gaps between the anonymous memory allocations. In addition, I see a small reduction in reservations allocated (e.g., 1.6% during buildworld), fewer partially populated reservations, and a small increase in 64KB page promotions on arm64. Reviewed by:kib MFC after: 1 week Differential Revision: https://reviews.freebsd.org/D39845 --- sys/vm/vm_map.c | 14 +- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c index 3c7afcb6642f..fa71bb8a01d6 100644 --- a/sys/vm/vm_map.c +++ b/sys/vm/vm_map.c @@ -2247,8 +2247,15 @@ again: rv = vm_map_insert(map, object, offset, *addr, *addr + length, prot, max, cow); } - if (rv == KERN_SUCCESS && update_anon) - map->anon_loc = *addr + length; + + /* +* Update the starting address for clustered anonymous memory mappings +* if a starting address was not previously defined or an ASLR restart +* placed an anonymous memory mapping at a lower address. +*/ + if (update_anon && rv == KERN_SUCCESS && (map->anon_loc == 0 || + *addr < map->anon_loc)) + map->anon_loc = *addr; done: vm_map_unlock(map); return (rv); @@ -4041,9 +4048,6 @@ vm_map_delete(vm_map_t map, vm_offset_t start, vm_offset_t end) entry->object.vm_object != NULL) pmap_map_delete(map->pmap, entry->start, entry->end); - if (entry->end == map->anon_loc) - map->anon_loc = entry->start; - /* * Delete the entry only after removing all pmap * entries pointing to its pages. (Otherwise, its
git: 383fd3ea0012 - main - arm64: Handle an unaligned start in pmap_mask_set_locked()
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=383fd3ea00128cf65fbea0e4cbdb9849945c854b commit 383fd3ea00128cf65fbea0e4cbdb9849945c854b Author: Alan Cox AuthorDate: 2024-07-03 05:15:35 + Commit: Alan Cox CommitDate: 2024-07-05 05:42:52 + arm64: Handle an unaligned start in pmap_mask_set_locked() In pmap_mask_set_locked(), correctly handle a starting address that is in the middle of an L3C page. The symptoms arising from this error included assertion failures in pmap_demote_l3c(). Reported by:andrew Reviewed by:markj Fixes: fd6cb031f577 "arm64 pmap: Add ATTR_CONTIGUOUS support [Part 1]" Differential Revision: https://reviews.freebsd.org/D45851 --- sys/arm64/arm64/pmap.c | 18 -- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c index f4a46823428a..a9cb8c7fe468 100644 --- a/sys/arm64/arm64/pmap.c +++ b/sys/arm64/arm64/pmap.c @@ -4403,8 +4403,22 @@ pmap_mask_set_locked(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, pt_entry_t m va = va_next; } if ((l3 & ATTR_CONTIGUOUS) != 0) { - l3p += L3C_ENTRIES - 1; - sva += L3C_SIZE - L3_SIZE; + /* +* Does this L3C page extend beyond +* the requested range? Handle the +* possibility that "va_next" is zero. +*/ + if ((sva | L3C_OFFSET) > va_next - 1) + break; + + /* +* Skip ahead to the last L3_PAGE +* within this L3C page. +*/ + l3p = (pt_entry_t *)((uintptr_t)l3p | + ((L3C_ENTRIES - 1) * + sizeof(pt_entry_t))); + sva |= L3C_SIZE - L3_SIZE; } continue; }
git: fb32ba6aa44d - main - amd64/arm64: Eliminate unnecessary demotions in pmap_protect()
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=fb32ba6aa44dc86e70ad06b44f93a9709e78f3d1 commit fb32ba6aa44dc86e70ad06b44f93a9709e78f3d1 Author: Alan Cox AuthorDate: 2024-07-05 18:20:01 + Commit: Alan Cox CommitDate: 2024-07-06 20:48:10 + amd64/arm64: Eliminate unnecessary demotions in pmap_protect() In pmap_protect(), when the mapping isn't changing, we don't need to perform a superpage demotion, even though the requested change doesn't cover the entire superpage. Reviewed by:kib MFC after: 1 week Differential Revision: https://reviews.freebsd.org/D45886 --- sys/amd64/amd64/pmap.c | 21 + sys/arm64/arm64/pmap.c | 3 ++- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index 57943e815b5b..2bcf671be243 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -6796,8 +6796,7 @@ retry_pdpe: */ if ((ptpaddr & PG_PS) != 0) { /* -* Are we protecting the entire large page? If not, -* demote the mapping and fall through. +* Are we protecting the entire large page? */ if (sva + NBPDR == va_next && eva >= va_next) { /* @@ -6807,9 +6806,23 @@ retry_pdpe: if (pmap_protect_pde(pmap, pde, sva, prot)) anychanged = true; continue; - } else if (!pmap_demote_pde(pmap, pde, sva)) { + } + + /* +* Does the large page mapping need to change? If so, +* demote it and fall through. +*/ + pbits = ptpaddr; + if ((prot & VM_PROT_WRITE) == 0) + pbits &= ~(PG_RW | PG_M); + if ((prot & VM_PROT_EXECUTE) == 0) + pbits |= pg_nx; + if (ptpaddr == pbits || !pmap_demote_pde(pmap, pde, + sva)) { /* -* The large page mapping was destroyed. +* Either the large page mapping doesn't need +* to change, or it was destroyed during +* demotion. */ continue; } diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c index a9cb8c7fe468..29552f722aa4 100644 --- a/sys/arm64/arm64/pmap.c +++ b/sys/arm64/arm64/pmap.c @@ -4373,7 +4373,8 @@ pmap_mask_set_locked(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, pt_entry_t m if (sva + L2_SIZE == va_next && eva >= va_next) { pmap_protect_l2(pmap, l2, sva, mask, nbits); continue; - } else if (pmap_demote_l2(pmap, l2, sva) == NULL) + } else if ((pmap_load(l2) & mask) == nbits || + pmap_demote_l2(pmap, l2, sva) == NULL) continue; } KASSERT((pmap_load(l2) & ATTR_DESCR_MASK) == L2_TABLE,
git: 3e00c11a4f43 - main - arm64: Support the L3 ATTR_CONTIGUOUS page size in pagesizes[]
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=3e00c11a4f43bf1c7b88d25638e2bfee399e7674 commit 3e00c11a4f43bf1c7b88d25638e2bfee399e7674 Author: Alan Cox AuthorDate: 2024-07-12 07:44:56 + Commit: Alan Cox CommitDate: 2024-07-13 17:43:42 + arm64: Support the L3 ATTR_CONTIGUOUS page size in pagesizes[] Update pagesizes[] to include the L3 ATTR_CONTIGUOUS (L3C) page size, which is 64KB when the base page size is 4KB and 2MB when the base page size is 16KB. Add support for L3C pages to shm_create_largepage(). Add support for creating L3C page mappings to pmap_enter(psind=1). Add support for reporting L3C page mappings to mincore(2) and procstat(8). Update vm_fault_soft_fast() and vm_fault_populate() to handle multiple superpage sizes. Declare arm64 as supporting two superpage reservation sizes, and simulate two superpage reservation sizes, updating the vm_page's psind field to reflect the correct page size from pagesizes[]. (The next patch in this series will replace this simulation. This patch is already big enough.) Co-authored-by: Eliot Solomon Reviewed by:kib Differential Revision: https://reviews.freebsd.org/D45766 --- share/man/man7/arch.7 | 2 +- sys/arm64/arm64/pmap.c | 162 +--- sys/arm64/include/param.h | 2 +- sys/arm64/include/vmparam.h | 25 --- sys/kern/imgact_elf.c | 8 ++- sys/kern/kern_mib.c | 8 ++- sys/kern/kern_proc.c| 12 +++- sys/kern/uipc_shm.c | 15 +++- sys/sys/mman.h | 4 +- sys/vm/vm_domainset.c | 3 + sys/vm/vm_fault.c | 32 ++--- sys/vm/vm_glue.c| 5 +- sys/vm/vm_kern.c| 5 +- sys/vm/vm_map.c | 46 ++--- sys/vm/vm_page.c| 6 +- sys/vm/vm_page.h| 2 +- sys/vm/vm_reserv.c | 104 17 files changed, 344 insertions(+), 97 deletions(-) diff --git a/share/man/man7/arch.7 b/share/man/man7/arch.7 index f3d2e1036706..88228b807e6a 100644 --- a/share/man/man7/arch.7 +++ b/share/man/man7/arch.7 @@ -218,7 +218,7 @@ is 8 bytes on all supported architectures except i386. .Ss Page Size .Bl -column -offset indent "Architecture" "Page Sizes" .It Sy Architecture Ta Sy Page Sizes -.It aarch64 Ta 4K, 2M, 1G +.It aarch64 Ta 4K, 64K, 2M, 1G .It amd64 Ta 4K, 2M, 1G .It armv7 Ta 4K, 1M .It i386Ta 4K, 2M (PAE), 4M diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c index c3357900e1be..2540b5eaf4b9 100644 --- a/sys/arm64/arm64/pmap.c +++ b/sys/arm64/arm64/pmap.c @@ -1631,11 +1631,14 @@ pmap_init(void) if (superpages_enabled) { KASSERT(MAXPAGESIZES > 1 && pagesizes[1] == 0, ("pmap_init: can't assign to pagesizes[1]")); - pagesizes[1] = L2_SIZE; + pagesizes[1] = L3C_SIZE; + KASSERT(MAXPAGESIZES > 2 && pagesizes[2] == 0, + ("pmap_init: can't assign to pagesizes[2]")); + pagesizes[2] = L2_SIZE; if (L1_BLOCKS_SUPPORTED) { - KASSERT(MAXPAGESIZES > 2 && pagesizes[2] == 0, - ("pmap_init: can't assign to pagesizes[2]")); - pagesizes[2] = L1_SIZE; + KASSERT(MAXPAGESIZES > 3 && pagesizes[3] == 0, + ("pmap_init: can't assign to pagesizes[3]")); + pagesizes[3] = L1_SIZE; } } @@ -4959,7 +4962,7 @@ static int pmap_enter_largepage(pmap_t pmap, vm_offset_t va, pt_entry_t pte, int flags, int psind) { - pd_entry_t *l0p, *l1p, *l2p, newpte, origpte; + pd_entry_t *l0p, *l1p, *l2p, *l3p, newpte, origpte, *tl3p; vm_page_t mp; PMAP_LOCK_ASSERT(pmap, MA_OWNED); @@ -4973,9 +4976,11 @@ restart: newpte = pte; if (!pmap_bti_same(pmap, va, va + pagesizes[psind], &newpte)) return (KERN_PROTECTION_FAILURE); - if (psind == 2) { + if (psind == 3) { PMAP_ASSERT_L1_BLOCKS_SUPPORTED; + KASSERT(pagesizes[psind] == L1_SIZE, + ("pagesizes[%d] != L1_SIZE", psind)); l0p = pmap_l0(pmap, va); if ((pmap_load(l0p) & ATTR_DESCR_VALID) == 0) { mp = _pmap_alloc_l3(pmap, pmap_l0_pindex(va), NULL); @@ -5005,7 +5010,9 @@ restart: ("va %#lx changing 1G phys page l1 %#lx newpte %#lx", va, origpte, newpte)); pmap_store(l1p, newpte); - } else /* (psind == 1) */ { + } else if (psind == 2) { +
git: 772ae9eddf87 - main - vm ASLR: Handle VM_NRESERV_LEVEL == 0
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=772ae9eddf87d835279ea6aaaf08b8ca421101ff commit 772ae9eddf87d835279ea6aaaf08b8ca421101ff Author: Alan Cox AuthorDate: 2024-07-13 19:40:43 + Commit: Alan Cox CommitDate: 2024-07-13 19:40:43 + vm ASLR: Handle VM_NRESERV_LEVEL == 0 Some flavors of powerpc don't enable superpage reservations. Fixes: 3e00c11a4f43 ("arm64: Support the L3 ATTR_CONTIGUOUS page ...") --- sys/vm/vm_map.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c index b9c27e14d1d0..77297a0e3957 100644 --- a/sys/vm/vm_map.c +++ b/sys/vm/vm_map.c @@ -1993,7 +1993,7 @@ out: return (result); } -#if VM_NRESERVLEVEL == 1 +#if VM_NRESERVLEVEL <= 1 static const int aslr_pages_rnd_64[2] = {0x1000, 0x10}; static const int aslr_pages_rnd_32[2] = {0x100, 0x4}; #elif VM_NRESERVLEVEL == 2
git: cd836f600418 - main - vm: Retire kmem_arena
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=cd836f600418e892869d23cee857ce1a6cd5b863 commit cd836f600418e892869d23cee857ce1a6cd5b863 Author: Alan Cox AuthorDate: 2024-07-21 19:50:47 + Commit: Alan Cox CommitDate: 2024-07-24 19:31:40 + vm: Retire kmem_arena It has simply been an alias for the kernel_arena for many years now. Enough time has passed to retire it. Any out-of-tree kernel modules that directly use kmem_arena should switch to kernel_arena. Reviewed by:kib, markj Differential Revision: https://reviews.freebsd.org/D46057 --- sys/arm/nvidia/drm2/tegra_bo.c | 4 ++-- sys/kern/subr_vmem.c | 2 -- sys/vm/vm_kern.h | 1 - 3 files changed, 2 insertions(+), 5 deletions(-) diff --git a/sys/arm/nvidia/drm2/tegra_bo.c b/sys/arm/nvidia/drm2/tegra_bo.c index c27b9f39c508..08cd3de6a3fe 100644 --- a/sys/arm/nvidia/drm2/tegra_bo.c +++ b/sys/arm/nvidia/drm2/tegra_bo.c @@ -71,7 +71,7 @@ tegra_bo_destruct(struct tegra_bo *bo) vm_object_deallocate(bo->cdev_pager); if (bo->vbase != 0) - vmem_free(kmem_arena, bo->vbase, size); + vmem_free(kernel_arena, bo->vbase, size); } static void @@ -137,7 +137,7 @@ tegra_bo_init_pager(struct tegra_bo *bo) size = round_page(bo->gem_obj.size); bo->pbase = VM_PAGE_TO_PHYS(bo->m[0]); - if (vmem_alloc(kmem_arena, size, M_WAITOK | M_BESTFIT, &bo->vbase)) + if (vmem_alloc(kernel_arena, size, M_WAITOK | M_BESTFIT, &bo->vbase)) return (ENOMEM); VM_OBJECT_WLOCK(bo->cdev_pager); diff --git a/sys/kern/subr_vmem.c b/sys/kern/subr_vmem.c index a706d944dc3f..9288b0935441 100644 --- a/sys/kern/subr_vmem.c +++ b/sys/kern/subr_vmem.c @@ -236,9 +236,7 @@ static uma_zone_t vmem_bt_zone; static struct vmem kernel_arena_storage; static struct vmem buffer_arena_storage; static struct vmem transient_arena_storage; -/* kernel and kmem arenas are aliased for backwards KPI compat. */ vmem_t *kernel_arena = &kernel_arena_storage; -vmem_t *kmem_arena = &kernel_arena_storage; vmem_t *buffer_arena = &buffer_arena_storage; vmem_t *transient_arena = &transient_arena_storage; diff --git a/sys/vm/vm_kern.h b/sys/vm/vm_kern.h index 848f28fe90b8..942c03480364 100644 --- a/sys/vm/vm_kern.h +++ b/sys/vm/vm_kern.h @@ -69,7 +69,6 @@ extern struct vm_map exec_map_store; extern struct vm_map pipe_map_store; #definepipe_map(&pipe_map_store) extern struct vmem *kernel_arena; -extern struct vmem *kmem_arena; extern struct vmem *buffer_arena; extern struct vmem *transient_arena; extern struct vmem *memguard_arena;
git: 096dfa338d73 - main - vm: Retire vm_page_alloc_freelist{,_domain}()
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=096dfa338d7391cc957dba9cca44ceb7f78cb891 commit 096dfa338d7391cc957dba9cca44ceb7f78cb891 Author: Alan Cox AuthorDate: 2024-07-23 07:14:31 + Commit: Alan Cox CommitDate: 2024-07-24 19:31:40 + vm: Retire vm_page_alloc_freelist{,_domain}() Once upon a time, I created vm_page_alloc_freelist{,_domain}() to support faster allocation of pages that were mapped by the partial direct map on 32-bit MIPS. At the time, I expected that these functions might find other uses too, but those other uses never materialized. So, these functions have not been used for some time now. Instead, people use the more general vm_page_alloc_contig(). Reviewed by:kib, markj Differential Revision: https://reviews.freebsd.org/D46063 --- ObsoleteFiles.inc | 4 share/man/man9/Makefile| 2 -- share/man/man9/vm_page_alloc.9 | 26 + sys/vm/vm_page.c | 51 +- sys/vm/vm_page.h | 2 -- 5 files changed, 11 insertions(+), 74 deletions(-) diff --git a/ObsoleteFiles.inc b/ObsoleteFiles.inc index f8f0309d6ccf..02a34a2541eb 100644 --- a/ObsoleteFiles.inc +++ b/ObsoleteFiles.inc @@ -51,6 +51,10 @@ # xargs -n1 | sort | uniq -d; # done +# 20240721: retire vm_page_alloc_freelist +OLD_FILES+=usr/share/man/man9/vm_page_alloc_freelist.9.gz +OLD_FILES+=usr/share/man/man9/vm_page_alloc_freelist_domain.9.gz + # 20240716: retire mergemaster OLD_FILES+=usr/sbin/mergemaster OLD_FILES+=usr/share/man/man8/mergemaster.8.gz diff --git a/share/man/man9/Makefile b/share/man/man9/Makefile index 9880b7b2f5e4..f7c21ab541b6 100644 --- a/share/man/man9/Makefile +++ b/share/man/man9/Makefile @@ -2384,8 +2384,6 @@ MLINKS+=vm_page_alloc.9 vm_page_alloc_after.9 \ vm_page_alloc.9 vm_page_alloc_contig_domain.9 \ vm_page_alloc.9 vm_page_alloc_domain.9 \ vm_page_alloc.9 vm_page_alloc_domain_after.9 \ - vm_page_alloc.9 vm_page_alloc_freelist.9 \ - vm_page_alloc.9 vm_page_alloc_freelist_domain.9 \ vm_page_alloc.9 vm_page_alloc_noobj.9 \ vm_page_alloc.9 vm_page_alloc_noobj_contig.9 \ vm_page_alloc.9 vm_page_alloc_noobj_contig_domain.9 \ diff --git a/share/man/man9/vm_page_alloc.9 b/share/man/man9/vm_page_alloc.9 index de225e05d707..7d6cf1692bb1 100644 --- a/share/man/man9/vm_page_alloc.9 +++ b/share/man/man9/vm_page_alloc.9 @@ -28,7 +28,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH .\" DAMAGE. .\" -.Dd November 11, 2021 +.Dd July 21, 2024 .Dt VM_PAGE_ALLOC 9 .Os .Sh NAME @@ -87,17 +87,6 @@ .Fa "vm_page_t mpred" .Fc .Ft vm_page_t -.Fo vm_page_alloc_freelist -.Fa "int freelist" -.Fa "int req" -.Fc -.Ft vm_page_t -.Fo vm_page_alloc_freelist_domain -.Fa "int domain" -.Fa "int freelist" -.Fa "int req" -.Fc -.Ft vm_page_t .Fo vm_page_alloc_noobj .Fa "int req" .Fc @@ -212,19 +201,6 @@ or will carry the machine-dependent encoding of the memory attribute. Additionally, the direct mapping of the page, if any, will be updated to reflect the requested memory attribute. -.Pp -The -.Fn vm_page_alloc_freelist -and -.Fn vm_page_alloc_freelist_domain -functions behave identically to -.Fn vm_page_alloc_noobj -and -.Fn vm_page_alloc_noobj_domain , -respectively, except that a successful allocation will return a page from the -specified physical memory freelist. -These functions are not intended for use outside of the virtual memory -subsystem and exist only to support the requirements of certain platforms. .Sh REQUEST FLAGS All page allocator functions accept a .Fa req diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c index c9ac79330696..64413ba10bfa 100644 --- a/sys/vm/vm_page.c +++ b/sys/vm/vm_page.c @@ -2406,11 +2406,10 @@ vm_page_alloc_contig_domain(vm_object_t object, vm_pindex_t pindex, int domain, /* * Allocate a physical page that is not intended to be inserted into a VM - * object. If the "freelist" parameter is not equal to VM_NFREELIST, then only - * pages from the specified vm_phys freelist will be returned. + * object. */ -static __always_inline vm_page_t -_vm_page_alloc_noobj_domain(int domain, const int freelist, int req) +vm_page_t +vm_page_alloc_noobj_domain(int domain, int req) { struct vm_domain *vmd; vm_page_t m; @@ -2426,8 +2425,7 @@ _vm_page_alloc_noobj_domain(int domain, const int freelist, int req) flags = (req & VM_ALLOC_NODUMP) != 0 ? PG_NODUMP : 0; vmd = VM_DOMAIN(domain); again: - if (freelist == VM_NFREELIST && - vmd->vmd_pgcache[VM_FREEPOOL_DIRECT].zone != NULL) { + if (vmd->vmd_pgcache[VM_FREEPOOL_DIRECT].zone != NULL) { m = uma_zalloc(vmd->vmd_pgcache[VM_FREEPOOL_DIRECT].zone,
git: 98dd639c94f7 - main - arm64 pmap: Eliminate an unnecessary conditional
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=98dd639c94f716858ae29958f484729b1d2fd387 commit 98dd639c94f716858ae29958f484729b1d2fd387 Author: Alan Cox AuthorDate: 2024-07-24 22:32:32 + Commit: Alan Cox CommitDate: 2024-07-25 06:46:47 + arm64 pmap: Eliminate an unnecessary conditional Eliminate an unnecessary test whether a pointer is non-NULL from pmap_bti_same(). Reviewed by:dougm --- sys/arm64/arm64/pmap.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c index 2540b5eaf4b9..59de6ef37f09 100644 --- a/sys/arm64/arm64/pmap.c +++ b/sys/arm64/arm64/pmap.c @@ -9396,8 +9396,7 @@ pmap_bti_same(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, pt_entry_t *pte) return (false); rs = next_rs; } - if (rs != NULL) - *pte |= ATTR_S1_GP; + *pte |= ATTR_S1_GP; return (true); }
git: 5b8c01d13a09 - main - amd64 pmap: Optimize PKU lookups when creating superpage mappings
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=5b8c01d13a0970b11f47503fcd627d249a6e638a commit 5b8c01d13a0970b11f47503fcd627d249a6e638a Author: Alan Cox AuthorDate: 2024-07-25 06:57:53 + Commit: Alan Cox CommitDate: 2024-07-26 05:38:46 + amd64 pmap: Optimize PKU lookups when creating superpage mappings Modify pmap_pkru_same() to update the prototype PTE at the same time as checking the address range. This eliminates the need for calling pmap_pkru_get() in addition to pmap_pkru_same(). pmap_pkru_same() was already doing most of the work of pmap_pkru_get(). Reviewed by:kib Differential Revision: https://reviews.freebsd.org/D46135 --- sys/amd64/amd64/pmap.c | 29 + 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index dcf9b4f5a4f3..778d07689ff0 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -573,7 +573,8 @@ struct pmap_pkru_range { }; static uma_zone_t pmap_pkru_ranges_zone; -static bool pmap_pkru_same(pmap_t pmap, vm_offset_t sva, vm_offset_t eva); +static bool pmap_pkru_same(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, +pt_entry_t *pte); static pt_entry_t pmap_pkru_get(pmap_t pmap, vm_offset_t va); static void pmap_pkru_on_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva); static void *pkru_dup_range(void *ctx, void *data); @@ -7071,11 +7072,9 @@ pmap_enter_largepage(pmap_t pmap, vm_offset_t va, pt_entry_t newpte, int flags, PG_V = pmap_valid_bit(pmap); restart: - if (!pmap_pkru_same(pmap, va, va + pagesizes[psind])) - return (KERN_PROTECTION_FAILURE); pten = newpte; - if (va < VM_MAXUSER_ADDRESS && pmap->pm_type == PT_X86) - pten |= pmap_pkru_get(pmap, va); + if (!pmap_pkru_same(pmap, va, va + pagesizes[psind], &pten)) + return (KERN_PROTECTION_FAILURE); if (psind == 2) { /* 1G */ pml4e = pmap_pml4e(pmap, va); @@ -7529,14 +7528,10 @@ pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t newpde, u_int flags, * and let vm_fault() cope. Check after pde allocation, since * it could sleep. */ - if (!pmap_pkru_same(pmap, va, va + NBPDR)) { + if (!pmap_pkru_same(pmap, va, va + NBPDR, &newpde)) { pmap_abort_ptp(pmap, va, pdpg); return (KERN_PROTECTION_FAILURE); } - if (va < VM_MAXUSER_ADDRESS && pmap->pm_type == PT_X86) { - newpde &= ~X86_PG_PKU_MASK; - newpde |= pmap_pkru_get(pmap, va); - } /* * If there are existing mappings, either abort or remove them. @@ -11460,13 +11455,21 @@ pmap_pkru_deassign_all(pmap_t pmap) rangeset_remove_all(&pmap->pm_pkru); } +/* + * Returns true if the PKU setting is the same across the specified address + * range, and false otherwise. When returning true, updates the referenced PTE + * to reflect the PKU setting. + */ static bool -pmap_pkru_same(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) +pmap_pkru_same(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, pt_entry_t *pte) { struct pmap_pkru_range *next_ppr, *ppr; vm_offset_t va; + u_int keyidx; PMAP_LOCK_ASSERT(pmap, MA_OWNED); + KASSERT(pmap->pm_type != PT_X86 || (*pte & X86_PG_PKU_MASK) == 0, + ("pte %p has unexpected PKU %ld", pte, *pte & X86_PG_PKU_MASK)); if (pmap->pm_type != PT_X86 || (cpu_stdext_feature2 & CPUID_STDEXT2_PKU) == 0 || sva >= VM_MAXUSER_ADDRESS) @@ -11478,14 +11481,16 @@ pmap_pkru_same(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) return (ppr == NULL || ppr->pkru_rs_el.re_start >= eva); } + keyidx = ppr->pkru_keyidx; while ((va = ppr->pkru_rs_el.re_end) < eva) { next_ppr = rangeset_next(&pmap->pm_pkru, va); if (next_ppr == NULL || va != next_ppr->pkru_rs_el.re_start || - ppr->pkru_keyidx != next_ppr->pkru_keyidx) + keyidx != next_ppr->pkru_keyidx) return (false); ppr = next_ppr; } + *pte |= X86_PG_PKU(keyidx); return (true); }
git: f6ed52c1f010 - main - vm: Stop reducing vm_pageout_page_count at startup
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=f6ed52c1f010aca5083e9c4dd3d0ad15aa8230a2 commit f6ed52c1f010aca5083e9c4dd3d0ad15aa8230a2 Author: Alan Cox AuthorDate: 2024-08-01 17:14:00 + Commit: Alan Cox CommitDate: 2024-08-02 19:41:36 + vm: Stop reducing vm_pageout_page_count at startup Attempting to reduce vm_pageout_page_count at startup when the machine has less than 8MB of physical memory is pointless, since we haven't run on machines with so little memory in ages. Reviewed by:kib, markj Differential Revision: https://reviews.freebsd.org/D46206 --- sys/vm/vm_pageout.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c index 8ad4bf4d3ab4..742e0afbc690 100644 --- a/sys/vm/vm_pageout.c +++ b/sys/vm/vm_pageout.c @@ -2305,9 +2305,6 @@ vm_pageout_init(void) /* * Initialize some paging parameters. */ - if (vm_cnt.v_page_count < 2000) - vm_pageout_page_count = 8; - freecount = 0; for (i = 0; i < vm_ndomains; i++) { struct vm_domain *vmd;
git: f076dd3ef02d - main - imgact_elf: Optimize pagesizes[] loop
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=f076dd3ef02ddf7799eeaab8d405ee9d845e8dc6 commit f076dd3ef02ddf7799eeaab8d405ee9d845e8dc6 Author: Alan Cox AuthorDate: 2024-07-15 06:02:33 + Commit: Alan Cox CommitDate: 2024-08-02 23:26:11 + imgact_elf: Optimize pagesizes[] loop Except for elements whose value is zero, the elements of pagesizes[] are always sorted in increasing order, so once a loop starting from the end of the array has found a non-zero element, it has found the largest valued element and can stop iterating. Reviewed by:kib Differential Revision: https://reviews.freebsd.org/D46215 --- sys/kern/imgact_elf.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sys/kern/imgact_elf.c b/sys/kern/imgact_elf.c index a623a63e9c2e..28ffdd03dd6d 100644 --- a/sys/kern/imgact_elf.c +++ b/sys/kern/imgact_elf.c @@ -1158,8 +1158,10 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp) maxalign = PAGE_SIZE; maxsalign = PAGE_SIZE * 1024; for (i = MAXPAGESIZES - 1; i > 0; i--) { - if (pagesizes[i] > maxsalign) + if (pagesizes[i] > maxsalign) { maxsalign = pagesizes[i]; + break; + } } mapsz = 0;
git: fa290859fa63 - main - vm: Assert that pagesizes[] is sorted
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=fa290859fa63d65b5da3014038ae289c1b336700 commit fa290859fa63d65b5da3014038ae289c1b336700 Author: Alan Cox AuthorDate: 2024-08-03 20:18:16 + Commit: Alan Cox CommitDate: 2024-08-04 19:53:15 + vm: Assert that pagesizes[] is sorted Ensure that pmap_init() properly initialized pagesizes[]. In part, we are making this change to document the requirement that the non-zero elements of pagesizes[] must be in ascending order. Reviewed by:kib, markj --- sys/vm/vm_init.c | 22 ++ 1 file changed, 22 insertions(+) diff --git a/sys/vm/vm_init.c b/sys/vm/vm_init.c index 0fd13f73a180..a0d3651ba266 100644 --- a/sys/vm/vm_init.c +++ b/sys/vm/vm_init.c @@ -100,6 +100,24 @@ long physmem; static void vm_mem_init(void *); SYSINIT(vm_mem, SI_SUB_VM, SI_ORDER_FIRST, vm_mem_init, NULL); +#ifdef INVARIANTS +/* + * Ensure that pmap_init() correctly initialized pagesizes[]. + */ +static void +vm_check_pagesizes(void) +{ + int i; + + KASSERT(pagesizes[0] == PAGE_SIZE, ("pagesizes[0] != PAGE_SIZE")); + for (i = 1; i < MAXPAGESIZES; i++) { + KASSERT((pagesizes[i - 1] != 0 && + pagesizes[i - 1] < pagesizes[i]) || pagesizes[i] == 0, + ("pagesizes[%d ... %d] are misconfigured", i - 1, i)); + } +} +#endif + /* * vm_mem_init() initializes the virtual memory system. * This is done only by the first cpu up. @@ -140,6 +158,10 @@ vm_mem_init(void *dummy) kmem_init_zero_region(); pmap_init(); vm_pager_init(); + +#ifdef INVARIANTS + vm_check_pagesizes(); +#endif } void
git: 841cf52595b6 - main - arm64 pmap: Add ATTR_CONTIGUOUS support [Part 2]
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=841cf52595b6a6b98e266b63e54a7cf6fb6ca73e commit 841cf52595b6a6b98e266b63e54a7cf6fb6ca73e Author: Alan Cox AuthorDate: 2024-04-08 05:05:54 + Commit: Alan Cox CommitDate: 2024-04-09 16:21:08 + arm64 pmap: Add ATTR_CONTIGUOUS support [Part 2] Create ATTR_CONTIGUOUS mappings in pmap_enter_object(). As a result, when the base page size is 4 KB, the read-only data and text sections of large (2 MB+) executables, e.g., clang, can be mapped using 64 KB pages. Similarly, when the base page size is 16 KB, the read-only data section of large executables can be mapped using 2 MB pages. Rename pmap_enter_2mpage(). Given that we have grown support for 16 KB base pages, we should no longer include page sizes that may vary, e.g., 2mpage, in pmap function names. Requested by: andrew Co-authored-by: Eliot Solomon Differential Revision: https://reviews.freebsd.org/D44575 --- sys/arm64/arm64/pmap.c | 252 +++-- 1 file changed, 245 insertions(+), 7 deletions(-) diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c index 258aa141653b..ea7ff18971e4 100644 --- a/sys/arm64/arm64/pmap.c +++ b/sys/arm64/arm64/pmap.c @@ -472,6 +472,8 @@ static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp); static int pmap_enter_l2(pmap_t pmap, vm_offset_t va, pd_entry_t new_l2, u_int flags, vm_page_t m, struct rwlock **lockp); +static int pmap_enter_l3c(pmap_t pmap, vm_offset_t va, pt_entry_t l3e, u_int flags, +vm_page_t m, vm_page_t *ml3p, struct rwlock **lockp); static bool pmap_every_pte_zero(vm_paddr_t pa); static int pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte, bool promoted, bool all_l3e_AF_set); @@ -5177,13 +5179,13 @@ out: } /* - * Tries to create a read- and/or execute-only 2MB page mapping. Returns + * Tries to create a read- and/or execute-only L2 page mapping. Returns * KERN_SUCCESS if the mapping was created. Otherwise, returns an error * value. See pmap_enter_l2() for the possible error values when "no sleep", * "no replace", and "no reclaim" are specified. */ static int -pmap_enter_2mpage(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, +pmap_enter_l2_rx(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, struct rwlock **lockp) { pd_entry_t new_l2; @@ -5233,13 +5235,13 @@ pmap_every_pte_zero(vm_paddr_t pa) } /* - * Tries to create the specified 2MB page mapping. Returns KERN_SUCCESS if + * Tries to create the specified L2 page mapping. Returns KERN_SUCCESS if * the mapping was created, and one of KERN_FAILURE, KERN_NO_SPACE, or * KERN_RESOURCE_SHORTAGE otherwise. Returns KERN_FAILURE if - * PMAP_ENTER_NOREPLACE was specified and a 4KB page mapping already exists - * within the 2MB virtual address range starting at the specified virtual + * PMAP_ENTER_NOREPLACE was specified and a base page mapping already exists + * within the L2 virtual address range starting at the specified virtual * address. Returns KERN_NO_SPACE if PMAP_ENTER_NOREPLACE was specified and a - * 2MB page mapping already exists at the specified virtual address. Returns + * L2 page mapping already exists at the specified virtual address. Returns * KERN_RESOURCE_SHORTAGE if either (1) PMAP_ENTER_NOSLEEP was specified and a * page table page allocation failed or (2) PMAP_ENTER_NORECLAIM was specified * and a PV entry allocation failed. @@ -5405,6 +5407,235 @@ pmap_enter_l2(pmap_t pmap, vm_offset_t va, pd_entry_t new_l2, u_int flags, return (KERN_SUCCESS); } +/* + * Tries to create a read- and/or execute-only L3C page mapping. Returns + * KERN_SUCCESS if the mapping was created. Otherwise, returns an error + * value. + */ +static int +pmap_enter_l3c_rx(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_page_t *ml3p, +vm_prot_t prot, struct rwlock **lockp) +{ + pt_entry_t l3e; + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + PMAP_ASSERT_STAGE1(pmap); + KASSERT(ADDR_IS_CANONICAL(va), + ("%s: Address not in canonical form: %lx", __func__, va)); + + l3e = PHYS_TO_PTE(VM_PAGE_TO_PHYS(m)) | ATTR_DEFAULT | + ATTR_S1_IDX(m->md.pv_memattr) | ATTR_S1_AP(ATTR_S1_AP_RO) | + ATTR_CONTIGUOUS | L3_PAGE; + l3e |= pmap_pte_bti(pmap, va); + if ((m->oflags & VPO_UNMANAGED) == 0) { + l3e |= ATTR_SW_MANAGED; + l3e &= ~ATTR_AF; + } + if ((prot & VM_PROT_EXECUTE) == 0 || + m->md.pv_memattr == VM_MEMATTR_DEVICE) + l3e |= ATTR_S1_XN; + if (!ADDR_IS_KERNEL(va)) + l3e |= ATTR_S1_AP(ATTR_S1_AP_USER) | ATTR_S1_PXN; + else + l3e |= ATTR_S1_UXN; + if (pmap != k
git: a803837cec6e - main - arm64 pmap: Add ATTR_CONTIGUOUS support [Part 3]
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=a803837cec6e17e04849d59afac7b6431c70cb93 commit a803837cec6e17e04849d59afac7b6431c70cb93 Author: Alan Cox AuthorDate: 2024-04-17 16:39:46 + Commit: Alan Cox CommitDate: 2024-05-08 02:31:14 + arm64 pmap: Add ATTR_CONTIGUOUS support [Part 3] Introduce L3C promotion of base page mappings. When the base page size is 4KB, use ATTR_CONTIGUOUS to promote 16 aligned, contiguous base page mappings to a 64KB mapping. Alternatively, when the base page size is 16KB, use ATTR_CONTIGUOUS to promote 128 aligned, contiguous base page mappings to a 2MB mapping. Given the frequency of L3C counter updates, switch to per-CPU counters to avoid cache line ping ponging. Revise the L3C counter descriptions to reflect the fact that the size of an L3C mapping varies depending on the base page size. Co-authored-by: Eliot Solomon Reviewed by:markj Differential Revision: https://reviews.freebsd.org/D44983 --- sys/arm64/arm64/pmap.c | 168 - 1 file changed, 154 insertions(+), 14 deletions(-) diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c index ea7ff18971e4..b1a85befa4e1 100644 --- a/sys/arm64/arm64/pmap.c +++ b/sys/arm64/arm64/pmap.c @@ -1684,15 +1684,23 @@ SYSCTL_ULONG(_vm_pmap_l2, OID_AUTO, promotions, CTLFLAG_RD, &pmap_l2_promotions, 0, "2MB page promotions"); static SYSCTL_NODE(_vm_pmap, OID_AUTO, l3c, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, -"64KB page mapping counters"); +"L3C (64KB/2MB) page mapping counters"); -static u_long pmap_l3c_demotions; -SYSCTL_ULONG(_vm_pmap_l3c, OID_AUTO, demotions, CTLFLAG_RD, -&pmap_l3c_demotions, 0, "64KB page demotions"); +static COUNTER_U64_DEFINE_EARLY(pmap_l3c_demotions); +SYSCTL_COUNTER_U64(_vm_pmap_l3c, OID_AUTO, demotions, CTLFLAG_RD, +&pmap_l3c_demotions, "L3C (64KB/2MB) page demotions"); -static u_long pmap_l3c_mappings; -SYSCTL_ULONG(_vm_pmap_l3c, OID_AUTO, mappings, CTLFLAG_RD, -&pmap_l3c_mappings, 0, "64KB page mappings"); +static COUNTER_U64_DEFINE_EARLY(pmap_l3c_mappings); +SYSCTL_COUNTER_U64(_vm_pmap_l3c, OID_AUTO, mappings, CTLFLAG_RD, +&pmap_l3c_mappings, "L3C (64KB/2MB) page mappings"); + +static COUNTER_U64_DEFINE_EARLY(pmap_l3c_p_failures); +SYSCTL_COUNTER_U64(_vm_pmap_l3c, OID_AUTO, p_failures, CTLFLAG_RD, +&pmap_l3c_p_failures, "L3C (64KB/2MB) page promotion failures"); + +static COUNTER_U64_DEFINE_EARLY(pmap_l3c_promotions); +SYSCTL_COUNTER_U64(_vm_pmap_l3c, OID_AUTO, promotions, CTLFLAG_RD, +&pmap_l3c_promotions, "L3C (64KB/2MB) page promotions"); /* * If the given value for "final_only" is false, then any cached intermediate- @@ -4547,7 +4555,7 @@ pmap_update_entry(pmap_t pmap, pd_entry_t *ptep, pd_entry_t newpte, * be cached, so we invalidate intermediate entries as well as final * entries. */ - pmap_s1_invalidate_range(pmap, va, va + size, false); + pmap_s1_invalidate_range(pmap, va, va + size, size == L3C_SIZE); /* Create the new mapping */ for (lip = ptep; lip < ptep_end; lip++) { @@ -4749,6 +4757,131 @@ setl3: pmap); return (true); } + +/* + * Tries to promote an aligned, contiguous set of base page mappings to a + * single L3C page mapping. For promotion to occur, two conditions must be + * met: (1) the base page mappings must map aligned, contiguous physical + * memory and (2) the base page mappings must have identical characteristics + * except for the accessed flag. + */ +static bool +pmap_promote_l3c(pmap_t pmap, pd_entry_t *l3p, vm_offset_t va) +{ + pd_entry_t all_l3e_AF, firstl3c, *l3, oldl3, pa; + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + + /* +* Currently, this function only supports promotion on stage 1 pmaps +* because it tests stage 1 specific fields and performs a break- +* before-make sequence that is incorrect for stage 2 pmaps. +*/ + if (pmap->pm_stage != PM_STAGE1 || !pmap_ps_enabled(pmap)) + return (false); + + /* +* Compute the address of the first L3 entry in the superpage +* candidate. +*/ + l3p = (pt_entry_t *)((uintptr_t)l3p & ~((L3C_ENTRIES * + sizeof(pt_entry_t)) - 1)); + + firstl3c = pmap_load(l3p); + + /* +* Examine the first L3 entry. Abort if this L3E is ineligible for +* promotion... +*/ + if ((firstl3c & ATTR_SW_NO_PROMOTE) != 0) + return (false); + /* ...is not properly aligned... */ + if ((PTE_TO_PHYS(firstl3c) & L3C_OFFSET) != 0 || + (firstl3c & ATTR_DESCR_MASK) != L3_PAGE) { /* ...or is invalid. */ + count
git: 94b09d388b81 - main - arm64: map kernel using large pages when page size is 16K
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=94b09d388b81eb724769e506cdf0f51bba9b73fb commit 94b09d388b81eb724769e506cdf0f51bba9b73fb Author: Alan Cox AuthorDate: 2024-05-11 06:09:39 + Commit: Alan Cox CommitDate: 2024-05-12 23:22:38 + arm64: map kernel using large pages when page size is 16K When the page size is 16K, use ATTR_CONTIGUOUS to map the kernel code and data sections using 2M pages. Previously, they were mapped using 16K pages. Reviewed by:markj Tested by: markj Differential Revision: https://reviews.freebsd.org/D45162 --- sys/arm64/arm64/locore.S | 26 +++--- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/sys/arm64/arm64/locore.S b/sys/arm64/arm64/locore.S index f53cd365de55..fffebe8f2b02 100644 --- a/sys/arm64/arm64/locore.S +++ b/sys/arm64/arm64/locore.S @@ -516,11 +516,10 @@ booti_no_fdt: common: #if PAGE_SIZE != PAGE_SIZE_4K /* -* Create L3 pages. The kernel will be loaded at a 2M aligned -* address, however L2 blocks are too large when the page size is -* not 4k to map the kernel with such an aligned address. However, -* when the page size is larger than 4k, L2 blocks are too large to -* map the kernel with such an alignment. +* Create L3 and L3C pages. The kernel will be loaded at a 2M aligned +* address, enabling the creation of L3C pages. However, when the page +* size is larger than 4k, L2 blocks are too large to map the kernel +* with 2M alignment. */ #definePTE_SHIFT L3_SHIFT #defineBUILD_PTE_FUNC build_l3_page_pagetable @@ -784,13 +783,17 @@ LENTRY(link_l2_pagetable) LEND(link_l2_pagetable) /* - * Builds count level 3 page table entries + * Builds count level 3 page table entries. Uses ATTR_CONTIGUOUS to create + * large page (L3C) mappings when the current VA and remaining count allow + * it. * x6 = L3 table * x7 = Block attributes * x8 = VA start * x9 = PA start (trashed) * x10 = Entry count (trashed) * x11, x12 and x13 are trashed + * + * VA start (x8) modulo L3C_SIZE must equal PA start (x9) modulo L3C_SIZE. */ LENTRY(build_l3_page_pagetable) /* @@ -811,8 +814,17 @@ LENTRY(build_l3_page_pagetable) /* Only use the output address bits */ lsr x9, x9, #L3_SHIFT + /* Check if an ATTR_CONTIGUOUS mapping is possible */ +1: tst x11, #(L3C_ENTRIES - 1) + b.ne2f + cmp x10, #L3C_ENTRIES + b.lo3f + orr x12, x12, #(ATTR_CONTIGUOUS) + b 2f +3: and x12, x12, #(~ATTR_CONTIGUOUS) + /* Set the physical address for this virtual address */ -1: orr x13, x12, x9, lsl #L3_SHIFT +2: orr x13, x12, x9, lsl #L3_SHIFT /* Store the entry */ str x13, [x6, x11, lsl #3]
git: 4f77144279f2 - main - arm64 pmap: eliminate a redundant variable
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=4f77144279f210ce65d77c13470c6363c3ce3c57 commit 4f77144279f210ce65d77c13470c6363c3ce3c57 Author: Alan Cox AuthorDate: 2024-05-19 19:22:53 + Commit: Alan Cox CommitDate: 2024-05-19 19:33:19 + arm64 pmap: eliminate a redundant variable Moreover, if we attempt an L2 promotion on the kernel pmap from pmap_enter_quick_locked(), this change eliminates the recomputation of the L2 entry's address. MFC after: 1 week --- sys/arm64/arm64/pmap.c | 7 +++ 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c index a6056a5edfc2..269513589d78 100644 --- a/sys/arm64/arm64/pmap.c +++ b/sys/arm64/arm64/pmap.c @@ -5848,7 +5848,6 @@ static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp) { - pd_entry_t *pde; pt_entry_t *l1, *l2, *l3, l3_val; vm_paddr_t pa; int lvl; @@ -5913,13 +5912,13 @@ pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, l3 = &l3[pmap_l3_index(va)]; } else { mpte = NULL; - pde = pmap_pde(kernel_pmap, va, &lvl); - KASSERT(pde != NULL, + l2 = pmap_pde(kernel_pmap, va, &lvl); + KASSERT(l2 != NULL, ("pmap_enter_quick_locked: Invalid page entry, va: 0x%lx", va)); KASSERT(lvl == 2, ("pmap_enter_quick_locked: Invalid level %d", lvl)); - l3 = pmap_l2_to_l3(pde, va); + l3 = pmap_l2_to_l3(l2, va); } /*
git: 9fc5e3fb39ca - main - arm64: set ATTR_CONTIGUOUS on the DMAP's L2 blocks
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=9fc5e3fb39ca5b2239066b750bea2ce5775bd79b commit 9fc5e3fb39ca5b2239066b750bea2ce5775bd79b Author: Alan Cox AuthorDate: 2024-05-13 06:39:28 + Commit: Alan Cox CommitDate: 2024-05-23 03:09:43 + arm64: set ATTR_CONTIGUOUS on the DMAP's L2 blocks On systems configured with 16KB pages, this change creates 1GB page mappings in the direct map where possible. Previously, the largest page size that was used to implement the direct map was 32MB. Similarly, on systems configured with 4KB pages, this change creates 32MB page mappings, instead of 2MB, in the direct map where 1GB is too large. Implement demotion on L2C (32MB/1GB) page mappings within the DMAP. Update sysctl vm.pmap.kernel_maps to report on L2C page mappings. Reviewed by:markj Tested by: gallatin, Eliot Solomon Differential Revision: https://reviews.freebsd.org/D45224 --- sys/arm64/arm64/pmap.c | 264 ++-- sys/arm64/include/pte.h | 5 + 2 files changed, 237 insertions(+), 32 deletions(-) diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c index 269513589d78..2ce313de36cf 100644 --- a/sys/arm64/arm64/pmap.c +++ b/sys/arm64/arm64/pmap.c @@ -429,7 +429,6 @@ void (*pmap_stage2_invalidate_all)(uint64_t); #defineTLBI_VA_SHIFT 12 #defineTLBI_VA_MASK((1ul << 44) - 1) #defineTLBI_VA(addr) (((addr) >> TLBI_VA_SHIFT) & TLBI_VA_MASK) -#defineTLBI_VA_L3_INCR (L3_SIZE >> TLBI_VA_SHIFT) static int __read_frequently superpages_enabled = 1; SYSCTL_INT(_vm_pmap, OID_AUTO, superpages_enabled, @@ -470,6 +469,7 @@ static pt_entry_t *pmap_demote_l1(pmap_t pmap, pt_entry_t *l1, vm_offset_t va); static pt_entry_t *pmap_demote_l2_locked(pmap_t pmap, pt_entry_t *l2, vm_offset_t va, struct rwlock **lockp); static pt_entry_t *pmap_demote_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t va); +static bool pmap_demote_l2c(pmap_t pmap, pt_entry_t *l2p, vm_offset_t va); static bool pmap_demote_l3c(pmap_t pmap, pt_entry_t *l3p, vm_offset_t va); static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp); @@ -1108,6 +1108,7 @@ pmap_bootstrap_l2_table(struct pmap_bootstrap_state *state) static void pmap_bootstrap_l2_block(struct pmap_bootstrap_state *state, int i) { + pt_entry_t contig; u_int l2_slot; bool first; @@ -1118,7 +1119,7 @@ pmap_bootstrap_l2_block(struct pmap_bootstrap_state *state, int i) pmap_bootstrap_l1_table(state); MPASS((state->va & L2_OFFSET) == 0); - for (first = true; + for (first = true, contig = 0; state->va < DMAP_MAX_ADDRESS && (physmap[i + 1] - state->pa) >= L2_SIZE; state->va += L2_SIZE, state->pa += L2_SIZE) { @@ -1129,13 +1130,27 @@ pmap_bootstrap_l2_block(struct pmap_bootstrap_state *state, int i) if (!first && (state->pa & L1_OFFSET) == 0) break; + /* +* If we have an aligned, contiguous chunk of L2C_ENTRIES +* L2 blocks, set the contiguous bit within each PTE so that +* the chunk can be cached using only one TLB entry. +*/ + if ((state->pa & L2C_OFFSET) == 0) { + if (state->va + L2C_SIZE < DMAP_MAX_ADDRESS && + physmap[i + 1] - state->pa >= L2C_SIZE) { + contig = ATTR_CONTIGUOUS; + } else { + contig = 0; + } + } + first = false; l2_slot = pmap_l2_index(state->va); MPASS((state->pa & L2_OFFSET) == 0); MPASS(state->l2[l2_slot] == 0); pmap_store(&state->l2[l2_slot], PHYS_TO_PTE(state->pa) | ATTR_DEFAULT | ATTR_S1_XN | ATTR_KERN_GP | - ATTR_S1_IDX(VM_MEMATTR_WRITE_BACK) | L2_BLOCK); + ATTR_S1_IDX(VM_MEMATTR_WRITE_BACK) | contig | L2_BLOCK); } MPASS(state->va == (state->pa - dmap_phys_base + DMAP_MIN_ADDRESS)); } @@ -1667,6 +1682,20 @@ pmap_init(void) vm_initialized = 1; } +static SYSCTL_NODE(_vm_pmap, OID_AUTO, l1, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, +"L1 (1GB/64GB) page mapping counters"); + +static COUNTER_U64_DEFINE_EARLY(pmap_l1_demotions); +SYSCTL_COUNTER_U64(_vm_pmap_l1, OID_AUTO, demotions, CTLFLAG_RD, +&pmap_l1_demotions, "L1 (1GB/64GB) page demotions"); + +static SYSCTL_NODE(_vm_pmap, OID_AUTO, l2c, CTLFLAG_RD
git: 3dc2a8848986 - main - arm64 pmap: Convert panic()s to KASSERT()s
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=3dc2a8848986df2c10ae7df4ce87a1538f549a85 commit 3dc2a8848986df2c10ae7df4ce87a1538f549a85 Author: Alan Cox AuthorDate: 2024-05-31 17:22:14 + Commit: Alan Cox CommitDate: 2024-05-31 21:54:27 + arm64 pmap: Convert panic()s to KASSERT()s There is no reason for the ATTR_SW_NO_PROMOTE checks in pmap_update_{entry,strided}() to be panic()s instead of KASSERT()s. Requested by: markj Reviewed by:markj Differential Revision: https://reviews.freebsd.org/D45424 --- sys/arm64/arm64/pmap.c | 10 -- 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c index cd7837e58380..aaba6ca189a1 100644 --- a/sys/arm64/arm64/pmap.c +++ b/sys/arm64/arm64/pmap.c @@ -4565,9 +4565,8 @@ pmap_update_entry(pmap_t pmap, pd_entry_t *ptep, pd_entry_t newpte, register_t intr; PMAP_LOCK_ASSERT(pmap, MA_OWNED); - - if ((newpte & ATTR_SW_NO_PROMOTE) != 0) - panic("%s: Updating non-promote pte", __func__); + KASSERT((newpte & ATTR_SW_NO_PROMOTE) == 0, + ("%s: Updating non-promote pte", __func__)); /* * Ensure we don't get switched out with the page table in an @@ -4608,9 +4607,8 @@ pmap_update_strided(pmap_t pmap, pd_entry_t *ptep, pd_entry_t *ptep_end, register_t intr; PMAP_LOCK_ASSERT(pmap, MA_OWNED); - - if ((newpte & ATTR_SW_NO_PROMOTE) != 0) - panic("%s: Updating non-promote pte", __func__); + KASSERT((newpte & ATTR_SW_NO_PROMOTE) == 0, + ("%s: Updating non-promote pte", __func__)); /* * Ensure we don't get switched out with the page table in an
git: f1d73aacdc47 - main - pmap: Skip some superpage promotion attempts that will fail
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=f1d73aacdc47529310e2302094685295c032e28f commit f1d73aacdc47529310e2302094685295c032e28f Author: Alan Cox AuthorDate: 2024-06-02 08:56:47 + Commit: Alan Cox CommitDate: 2024-06-04 05:38:05 + pmap: Skip some superpage promotion attempts that will fail Implement a simple heuristic to skip pointless promotion attempts by pmap_enter_quick_locked() and moea64_enter(). Specifically, when vm_fault() calls pmap_enter_quick() to map neighboring pages at the end of a copy-on-write fault, there is no point in attempting promotion in pmap_enter_quick_locked() and moea64_enter(). Promotion will fail because the base pages have differing protection. Reviewed by:kib Differential Revision: https://reviews.freebsd.org/D45431 MFC after: 1 week --- sys/amd64/amd64/pmap.c | 3 ++- sys/arm64/arm64/pmap.c | 3 ++- sys/i386/i386/pmap.c| 3 ++- sys/powerpc/aim/mmu_oea64.c | 9 +++-- sys/riscv/riscv/pmap.c | 3 ++- sys/vm/vm.h | 1 + sys/vm/vm_fault.c | 11 ++- 7 files changed, 26 insertions(+), 7 deletions(-) diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index 8105c9d92478..2f3119aede67 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -7818,7 +7818,8 @@ pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, * If both the PTP and the reservation are fully populated, then * attempt promotion. */ - if ((mpte == NULL || mpte->ref_count == NPTEPG) && + if ((prot & VM_PROT_NO_PROMOTE) == 0 && + (mpte == NULL || mpte->ref_count == NPTEPG) && (m->flags & PG_FICTITIOUS) == 0 && vm_reserv_level_iffullpop(m) == 0) { if (pde == NULL) diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c index aaba6ca189a1..b6bc113ba8a4 100644 --- a/sys/arm64/arm64/pmap.c +++ b/sys/arm64/arm64/pmap.c @@ -6052,7 +6052,8 @@ pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, * If both the PTP and the reservation are fully populated, then * attempt promotion. */ - if ((mpte == NULL || mpte->ref_count == NL3PG) && + if ((prot & VM_PROT_NO_PROMOTE) == 0 && + (mpte == NULL || mpte->ref_count == NL3PG) && (m->flags & PG_FICTITIOUS) == 0 && vm_reserv_level_iffullpop(m) == 0) { if (l2 == NULL) diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c index 40d8ceaf42b9..5808c31a99af 100644 --- a/sys/i386/i386/pmap.c +++ b/sys/i386/i386/pmap.c @@ -4250,7 +4250,8 @@ pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, * If both the PTP and the reservation are fully populated, then * attempt promotion. */ - if ((mpte == NULL || mpte->ref_count == NPTEPG) && + if ((prot & VM_PROT_NO_PROMOTE) == 0 && + (mpte == NULL || mpte->ref_count == NPTEPG) && (m->flags & PG_FICTITIOUS) == 0 && vm_reserv_level_iffullpop(m) == 0) { if (pde == NULL) diff --git a/sys/powerpc/aim/mmu_oea64.c b/sys/powerpc/aim/mmu_oea64.c index 391f90bb04eb..273dc38214e2 100644 --- a/sys/powerpc/aim/mmu_oea64.c +++ b/sys/powerpc/aim/mmu_oea64.c @@ -1755,10 +1755,14 @@ out: * If the VA of the entered page is not aligned with its PA, * don't try page promotion as it is not possible. * This reduces the number of promotion failures dramatically. +* +* Ignore VM_PROT_NO_PROMOTE unless PMAP_ENTER_QUICK_LOCKED. */ if (moea64_ps_enabled(pmap) && pmap != kernel_pmap && pvo != NULL && (pvo->pvo_vaddr & PVO_MANAGED) != 0 && (va & HPT_SP_MASK) == (pa & HPT_SP_MASK) && + ((prot & VM_PROT_NO_PROMOTE) == 0 || + (flags & PMAP_ENTER_QUICK_LOCKED) == 0) && (m->flags & PG_FICTITIOUS) == 0 && vm_reserv_level_iffullpop(m) == 0) moea64_sp_promote(pmap, va, m); @@ -1850,8 +1854,9 @@ moea64_enter_quick(pmap_t pm, vm_offset_t va, vm_page_t m, vm_prot_t prot) { - moea64_enter(pm, va, m, prot & (VM_PROT_READ | VM_PROT_EXECUTE), - PMAP_ENTER_NOSLEEP | PMAP_ENTER_QUICK_LOCKED, 0); + moea64_enter(pm, va, m, prot & (VM_PROT_READ | VM_PROT_EXECUTE | + VM_PROT_NO_PROMOTE), PMAP_ENTER_NOSLEEP | PMAP_ENTER_QUICK_LOCKED, + 0); } vm_paddr_t diff --git a/sys/riscv/riscv/pmap.c b/sys/riscv/riscv/pmap.c index 1e507f62696e..e8504bcb0f59 100644 --- a/sys/riscv/riscv/pmap.c +++ b/sys/riscv/riscv/pmap.c @@ -3519,7 +3519,8 @@ pmap_ente
git: 41dfea24eec2 - main - arm64 pmap: Enable L3C promotions by pmap_enter_quick()
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=41dfea24eec242e1e083e2a879483a7c05c7e2ff commit 41dfea24eec242e1e083e2a879483a7c05c7e2ff Author: Alan Cox AuthorDate: 2024-06-01 18:17:52 + Commit: Alan Cox CommitDate: 2024-06-05 04:25:51 + arm64 pmap: Enable L3C promotions by pmap_enter_quick() More precisely, implement L3C (64KB/2MB, depending on base page size) promotion in pmap_enter_quick()'s helper function, pmap_enter_quick_locked(). At the same time, use the recently introduced flag VM_PROT_NO_PROMOTE from pmap_enter_object() to pmap_enter_quick_locked() to avoid L3C promotion attempts that will fail. Reviewed by:kib Differential Revision: https://reviews.freebsd.org/D45445 --- sys/arm64/arm64/pmap.c | 29 +++-- 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c index 03d0a1cc6676..8ac7b8f6a135 100644 --- a/sys/arm64/arm64/pmap.c +++ b/sys/arm64/arm64/pmap.c @@ -5883,9 +5883,19 @@ pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, ((rv = pmap_enter_l3c_rx(pmap, va, m, &mpte, prot, &lock)) == KERN_SUCCESS || rv == KERN_NO_SPACE)) m = &m[L3C_ENTRIES - 1]; - else - mpte = pmap_enter_quick_locked(pmap, va, m, prot, mpte, - &lock); + else { + /* +* In general, if a superpage mapping were possible, +* it would have been created above. That said, if +* start and end are not superpage aligned, then +* promotion might be possible at the ends of [start, +* end). However, in practice, those promotion +* attempts are so unlikely to succeed that they are +* not worth trying. +*/ + mpte = pmap_enter_quick_locked(pmap, va, m, prot | + VM_PROT_NO_PROMOTE, mpte, &lock); + } m = TAILQ_NEXT(m, listq); } if (lock != NULL) @@ -6048,12 +6058,19 @@ pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, #if VM_NRESERVLEVEL > 0 /* -* If both the PTP and the reservation are fully populated, then -* attempt promotion. +* First, attempt L3C promotion, if the virtual and physical addresses +* are aligned with each other and an underlying reservation has the +* neighboring L3 pages allocated. The first condition is simply an +* optimization that recognizes some eventual promotion failures early +* at a lower run-time cost. Then, attempt L2 promotion, if both the +* PTP and the reservation are fully populated. */ if ((prot & VM_PROT_NO_PROMOTE) == 0 && - (mpte == NULL || mpte->ref_count == NL3PG) && + (va & L3C_OFFSET) == (pa & L3C_OFFSET) && (m->flags & PG_FICTITIOUS) == 0 && + vm_reserv_is_populated(m, L3C_ENTRIES) && + pmap_promote_l3c(pmap, l3, va) && + (mpte == NULL || mpte->ref_count == NL3PG) && vm_reserv_level_iffullpop(m) == 0) { if (l2 == NULL) l2 = pmap_pde(pmap, va, &lvl);
git: 60847070f908 - main - vm: Eliminate a redundant call to vm_reserv_break_all()
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=60847070f908c7c5ebb2ea4c851f8b98680fd01a commit 60847070f908c7c5ebb2ea4c851f8b98680fd01a Author: Alan Cox AuthorDate: 2024-06-05 06:40:20 + Commit: Alan Cox CommitDate: 2024-06-05 17:39:47 + vm: Eliminate a redundant call to vm_reserv_break_all() When vm_object_collapse() was changed in commit 98087a0 to call vm_object_terminate(), rather than destroying the object directly, its call to vm_reserv_break_all() should have been removed, as vm_object_terminate() calls vm_reserv_break_all(). Reviewed by:kib, markj MFC after: 1 week Differential Revision: https://reviews.freebsd.org/D45495 --- sys/vm/vm_object.c | 8 1 file changed, 8 deletions(-) diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c index 905df5454355..0af4402938ba 100644 --- a/sys/vm/vm_object.c +++ b/sys/vm/vm_object.c @@ -1953,14 +1953,6 @@ vm_object_collapse(vm_object_t object) */ vm_object_collapse_scan(object); -#if VM_NRESERVLEVEL > 0 - /* -* Break any reservations from backing_object. -*/ - if (__predict_false(!LIST_EMPTY(&backing_object->rvq))) - vm_reserv_break_all(backing_object); -#endif - /* * Move the pager from backing_object to object. *
git: 9fabf97682ce - main - arm64: fix free queue and reservation configuration for 16KB pages
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=9fabf97682ce494865c8b26c218f2d00a36c99ea commit 9fabf97682ce494865c8b26c218f2d00a36c99ea Author: Eliot Solomon AuthorDate: 2023-11-18 21:13:21 + Commit: Alan Cox CommitDate: 2024-03-24 17:22:20 + arm64: fix free queue and reservation configuration for 16KB pages Correctly configure the free page queues and the reservation size when the base page size is 16KB. In particular, the reservation size was less than the L2 Block size, making L2 promotions and mappings all but impossible. Reviewed by:markj Tested by: gallatin Differential Revision: https://reviews.freebsd.org/D42737 --- sys/arm64/arm64/copyinout.S | 1 + sys/arm64/include/vmparam.h | 18 +- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/sys/arm64/arm64/copyinout.S b/sys/arm64/arm64/copyinout.S index 005fa61bfe82..23f56ae85daa 100644 --- a/sys/arm64/arm64/copyinout.S +++ b/sys/arm64/arm64/copyinout.S @@ -30,6 +30,7 @@ #include #include +#include #include #include "assym.inc" diff --git a/sys/arm64/include/vmparam.h b/sys/arm64/include/vmparam.h index 0967d3c0aedf..d5d4a5691f37 100644 --- a/sys/arm64/include/vmparam.h +++ b/sys/arm64/include/vmparam.h @@ -99,8 +99,17 @@ * are used by UMA, the physical memory allocator reduces the likelihood of * both 2MB page TLB misses and cache misses during the page table walk when * a 2MB page TLB miss does occur. + * + * When PAGE_SIZE is 16KB, an allocation size of 32MB is supported. This + * size is used by level 0 reservations and L2 BLOCK mappings. */ +#if PAGE_SIZE == PAGE_SIZE_4K #defineVM_NFREEORDER 13 +#elif PAGE_SIZE == PAGE_SIZE_16K +#defineVM_NFREEORDER 12 +#else +#error Unsupported page size +#endif /* * Enable superpage reservations: 1 level. @@ -110,10 +119,17 @@ #endif /* - * Level 0 reservations consist of 512 pages. + * Level 0 reservations consist of 512 pages when PAGE_SIZE is 4KB, and + * 2048 pages when PAGE_SIZE is 16KB. */ #ifndefVM_LEVEL_0_ORDER +#if PAGE_SIZE == PAGE_SIZE_4K #defineVM_LEVEL_0_ORDER9 +#elif PAGE_SIZE == PAGE_SIZE_16K +#defineVM_LEVEL_0_ORDER11 +#else +#error Unsupported page size +#endif #endif /**
git: fd6cb031f577 - main - arm64 pmap: Add ATTR_CONTIGUOUS support [Part 1]
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=fd6cb031f577a449894e73daa8f6bd309ba27c73 commit fd6cb031f577a449894e73daa8f6bd309ba27c73 Author: Eliot Solomon AuthorDate: 2024-03-24 19:01:47 + Commit: Alan Cox CommitDate: 2024-03-30 18:37:17 + arm64 pmap: Add ATTR_CONTIGUOUS support [Part 1] The ATTR_CONTIGUOUS bit within an L3 page table entry designates that L3 page as being part of an aligned, physically contiguous collection of L3 pages. For example, 16 aligned, physically contiguous 4 KB pages can form a 64 KB superpage, occupying a single TLB entry. While this change only creates ATTR_CONTIGUOUS mappings in a few places, specifically, the direct map and pmap_kenter{,_device}(), it adds all of the necessary code for handling them once they exist, including demotion, protection, and removal. Consequently, new ATTR_CONTIGUOUS usage can be added (and tested) incrementally. Modify the implementation of sysctl vm.pmap.kernel_maps so that it correctly reports the number of ATTR_CONTIGUOUS mappings on machines configured to use a 16 KB base page size, where an ATTR_CONTIGUOUS mapping consists of 128 base pages. Additionally, this change adds support for creating L2 superpage mappings to pmap_kenter{,_device}(). Reviewed by:markj Tested by: gallatin Differential Revision: https://reviews.freebsd.org/D42737 --- sys/arm64/arm64/pmap.c | 767 +--- sys/arm64/include/pte.h | 21 ++ 2 files changed, 740 insertions(+), 48 deletions(-) diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c index ba72f1dac8d0..12e5e1d73b38 100644 --- a/sys/arm64/arm64/pmap.c +++ b/sys/arm64/arm64/pmap.c @@ -461,18 +461,33 @@ static bool pmap_activate_int(pmap_t pmap); static void pmap_alloc_asid(pmap_t pmap); static int pmap_change_props_locked(vm_offset_t va, vm_size_t size, vm_prot_t prot, int mode, bool skip_unmapped); +static bool pmap_copy_l3c(pmap_t pmap, pt_entry_t *l3p, vm_offset_t va, +pt_entry_t l3e, vm_page_t ml3, struct rwlock **lockp); static pt_entry_t *pmap_demote_l1(pmap_t pmap, pt_entry_t *l1, vm_offset_t va); static pt_entry_t *pmap_demote_l2_locked(pmap_t pmap, pt_entry_t *l2, vm_offset_t va, struct rwlock **lockp); static pt_entry_t *pmap_demote_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t va); +static bool pmap_demote_l3c(pmap_t pmap, pt_entry_t *l3p, vm_offset_t va); static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp); static int pmap_enter_l2(pmap_t pmap, vm_offset_t va, pd_entry_t new_l2, u_int flags, vm_page_t m, struct rwlock **lockp); +static bool pmap_every_pte_zero(vm_paddr_t pa); +static int pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte, bool promoted, +bool all_l3e_AF_set); +static pt_entry_t pmap_load_l3c(pt_entry_t *l3p); +static void pmap_mask_set_l3c(pmap_t pmap, pt_entry_t *l3p, vm_offset_t va, +vm_offset_t *vap, vm_offset_t va_next, pt_entry_t mask, pt_entry_t nbits); +static bool pmap_pv_insert_l3c(pmap_t pmap, vm_offset_t va, vm_page_t m, +struct rwlock **lockp); +static void pmap_remove_kernel_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t va); static int pmap_remove_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t sva, pd_entry_t l1e, struct spglist *free, struct rwlock **lockp); static int pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t sva, pd_entry_t l2e, struct spglist *free, struct rwlock **lockp); +static bool pmap_remove_l3c(pmap_t pmap, pt_entry_t *l3p, vm_offset_t va, +vm_offset_t *vap, vm_offset_t va_next, vm_page_t ml3, struct spglist *free, +struct rwlock **lockp); static void pmap_reset_asid_set(pmap_t pmap); static bool pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m, struct rwlock **lockp); @@ -483,6 +498,8 @@ static vm_page_t _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, static void _pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free); static int pmap_unuse_pt(pmap_t, vm_offset_t, pd_entry_t, struct spglist *); +static void pmap_update_entry(pmap_t pmap, pd_entry_t *pte, pd_entry_t newpte, +vm_offset_t va, vm_size_t size); static __inline vm_page_t pmap_remove_pt_page(pmap_t pmap, vm_offset_t va); static uma_zone_t pmap_bti_ranges_zone; @@ -1121,19 +1138,20 @@ pmap_bootstrap_l2_block(struct pmap_bootstrap_state *state, int i) static void pmap_bootstrap_l3_page(struct pmap_bootstrap_state *state, int i) { + pt_entry_t contig; u_int l3_slot; bool first; - if ((physmap[i + 1] - state->pa) < L3_SIZE) + if (physmap[i + 1] - state->pa < L3_SIZE) return; /* Make sure there is a valid L2 table */ pmap_bootstrap_l2_table(state); MPASS((state->va &a
git: e0388a906ca7 - main - arm64: enable superpage mappings by pmap_mapdev{,_attr}()
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=e0388a906ca77d07c99e8762d47dccaaaefd8bab commit e0388a906ca77d07c99e8762d47dccaaaefd8bab Author: Alan Cox AuthorDate: 2024-03-30 20:35:32 + Commit: Alan Cox CommitDate: 2024-03-30 20:41:30 + arm64: enable superpage mappings by pmap_mapdev{,_attr}() In order for pmap_kenter{,_device}() to create superpage mappings, either 64 KB or 2 MB, pmap_mapdev{,_attr}() must request appropriately aligned virtual addresses. Reviewed by:markj Tested by: gallatin Differential Revision: https://reviews.freebsd.org/D42737 --- sys/kern/subr_devmap.c | 14 ++ 1 file changed, 14 insertions(+) diff --git a/sys/kern/subr_devmap.c b/sys/kern/subr_devmap.c index 5976f16c7577..441ffeb1270a 100644 --- a/sys/kern/subr_devmap.c +++ b/sys/kern/subr_devmap.c @@ -273,6 +273,13 @@ pmap_mapdev(vm_paddr_t pa, vm_size_t size) KASSERT(va >= VM_MAX_KERNEL_ADDRESS - PMAP_MAPDEV_EARLY_SIZE, ("Too many early devmap mappings")); } else +#endif +#ifdef __aarch64__ + if (size >= L2_SIZE && (pa & L2_OFFSET) == 0) + va = kva_alloc_aligned(size, L2_SIZE); + else if (size >= L3C_SIZE && (pa & L3C_OFFSET) == 0) + va = kva_alloc_aligned(size, L3C_SIZE); + else #endif va = kva_alloc(size); if (!va) @@ -304,6 +311,13 @@ pmap_mapdev_attr(vm_paddr_t pa, vm_size_t size, vm_memattr_t ma) KASSERT(va >= (VM_MAX_KERNEL_ADDRESS - (PMAP_MAPDEV_EARLY_SIZE)), ("Too many early devmap mappings 2")); } else +#ifdef __aarch64__ + if (size >= L2_SIZE && (pa & L2_OFFSET) == 0) + va = kva_alloc_aligned(size, L2_SIZE); + else if (size >= L3C_SIZE && (pa & L3C_OFFSET) == 0) + va = kva_alloc_aligned(size, L3C_SIZE); + else +#endif va = kva_alloc(size); if (!va) panic("pmap_mapdev: Couldn't alloc kernel virtual memory");
git: 22c098843127 - main - arm64: correctly handle a failed BTI check in pmap_enter_l2()
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=22c098843127f6a31e25e94b07b35677f038f6d6 commit 22c098843127f6a31e25e94b07b35677f038f6d6 Author: Alan Cox AuthorDate: 2024-04-03 05:21:08 + Commit: Alan Cox CommitDate: 2024-04-03 16:19:30 + arm64: correctly handle a failed BTI check in pmap_enter_l2() If pmap_enter_l2() does not create a mapping because the BTI check fails, then we should release the reference on the page table page acquired from pmap_alloc_l2(). Otherwise, the page table page will never be reclaimed. --- sys/arm64/arm64/pmap.c | 5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c index 12e5e1d73b38..258aa141653b 100644 --- a/sys/arm64/arm64/pmap.c +++ b/sys/arm64/arm64/pmap.c @@ -5269,8 +5269,11 @@ pmap_enter_l2(pmap_t pmap, vm_offset_t va, pd_entry_t new_l2, u_int flags, * and let vm_fault() cope. Check after l2 allocation, since * it could sleep. */ - if (!pmap_bti_same(pmap, va, va + L2_SIZE)) + if (!pmap_bti_same(pmap, va, va + L2_SIZE)) { + KASSERT(l2pg != NULL, ("pmap_enter_l2: missing L2 PTP")); + pmap_abort_ptp(pmap, va, l2pg); return (KERN_PROTECTION_FAILURE); + } /* * If there are existing mappings, either abort or remove them.
git: 7beeacb27b27 - main - Honor the vm page's PG_NODUMP flag on arm and i386.
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=7beeacb27b2792dfdab9c806c00d50b6ac9fc34b commit 7beeacb27b2792dfdab9c806c00d50b6ac9fc34b Author: Alan Cox AuthorDate: 2021-01-04 21:59:05 + Commit: Alan Cox CommitDate: 2021-01-04 22:15:42 + Honor the vm page's PG_NODUMP flag on arm and i386. Reviewed by:kib, markj MFC after: 1 week Differential Revision: https://reviews.freebsd.org/D27949 --- sys/arm/arm/minidump_machdep.c| 3 +++ sys/i386/i386/minidump_machdep_base.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/sys/arm/arm/minidump_machdep.c b/sys/arm/arm/minidump_machdep.c index 92e15bec860b..c5f9cb58302e 100644 --- a/sys/arm/arm/minidump_machdep.c +++ b/sys/arm/arm/minidump_machdep.c @@ -68,8 +68,11 @@ static uint64_t counter, progress; static int is_dumpable(vm_paddr_t pa) { + vm_page_t m; int i; + if ((m = vm_phys_paddr_to_vm_page(pa)) != NULL) + return ((m->flags & PG_NODUMP) == 0); for (i = 0; dump_avail[i] != 0 || dump_avail[i + 1] != 0; i += 2) { if (pa >= dump_avail[i] && pa < dump_avail[i + 1]) return (1); diff --git a/sys/i386/i386/minidump_machdep_base.c b/sys/i386/i386/minidump_machdep_base.c index 9e803c6fd813..e2b4234eba11 100644 --- a/sys/i386/i386/minidump_machdep_base.c +++ b/sys/i386/i386/minidump_machdep_base.c @@ -65,8 +65,11 @@ static uint64_t counter, progress; static int is_dumpable(vm_paddr_t pa) { + vm_page_t m; int i; + if ((m = vm_phys_paddr_to_vm_page(pa)) != NULL) + return ((m->flags & PG_NODUMP) == 0); for (i = 0; dump_avail[i] != 0 || dump_avail[i + 1] != 0; i += 2) { if (pa >= dump_avail[i] && pa < dump_avail[i + 1]) return (1); ___ dev-commits-src-main@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/dev-commits-src-main To unsubscribe, send any mail to "dev-commits-src-main-unsubscr...@freebsd.org"
git: 5a181b8bce99 - main - Prefer the use of vm_page_domain() to vm_phys_domain().
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=5a181b8bce9958be9e3c2b3840f5a56b712c108e commit 5a181b8bce9958be9e3c2b3840f5a56b712c108e Author: Alan Cox AuthorDate: 2021-01-10 08:51:33 + Commit: Alan Cox CommitDate: 2021-01-10 19:25:33 + Prefer the use of vm_page_domain() to vm_phys_domain(). When we already have the vm page in hand, use vm_page_domain() instead of vm_phys_domain(). The former has a trivial constant-time implementation whereas the latter iterates over the mem_affinity array. Reviewed by:kib, markj Differential Revision: https://reviews.freebsd.org/D28005 --- sys/amd64/amd64/pmap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index 5267203d7473..2ec303d687a7 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -5259,7 +5259,7 @@ retry: pc->pc_map[0] = PC_FREE0 & ~1ul;/* preallocated bit 0 */ pc->pc_map[1] = PC_FREE1; pc->pc_map[2] = PC_FREE2; - pvc = &pv_chunks[vm_phys_domain(m->phys_addr)]; + pvc = &pv_chunks[vm_page_domain(m)]; mtx_lock(&pvc->pvc_lock); TAILQ_INSERT_TAIL(&pvc->pvc_list, pc, pc_lru); mtx_unlock(&pvc->pvc_lock); @@ -5360,7 +5360,7 @@ retry: pc->pc_map[1] = PC_FREE1; pc->pc_map[2] = PC_FREE2; TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); - TAILQ_INSERT_TAIL(&new_tail[pc_to_domain(pc)], pc, pc_lru); + TAILQ_INSERT_TAIL(&new_tail[vm_page_domain(m)], pc, pc_lru); PV_STAT(atomic_add_int(&pv_entry_spare, _NPCPV)); /* ___ dev-commits-src-main@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/dev-commits-src-main To unsubscribe, send any mail to "dev-commits-src-main-unsubscr...@freebsd.org"
git: 6f6a166eaf5e - main - arm64: Use page_to_pvh() when the vm_page_t is known
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=6f6a166eaf5e59dedb761ea6152417433a841e3b commit 6f6a166eaf5e59dedb761ea6152417433a841e3b Author: Alan Cox AuthorDate: 2021-06-21 07:45:21 + Commit: Alan Cox CommitDate: 2021-06-21 22:25:06 + arm64: Use page_to_pvh() when the vm_page_t is known When support for a sparse pv_table was added, the implementation of pa_to_pvh() changed from a simple constant-time calculation to iterating over the array vm_phys_segs[]. To mitigate this issue, an alternative function, page_to_pvh(), was introduced that still runs in constant time but requires the vm_page_t to be known. However, three cases where the vm_page_t is known were not converted to page_to_pvh(). This change converts those three cases. Reviewed by:kib, markj MFC after: 1 week Differential Revision: https://reviews.freebsd.org/D30832 --- sys/arm64/arm64/pmap.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c index ffc83be852bd..5f321be98528 100644 --- a/sys/arm64/arm64/pmap.c +++ b/sys/arm64/arm64/pmap.c @@ -3474,7 +3474,7 @@ pmap_pv_promote_l2(pmap_t pmap, vm_offset_t va, vm_paddr_t pa, va = va & ~L2_OFFSET; pv = pmap_pvh_remove(&m->md, pmap, va); KASSERT(pv != NULL, ("pmap_pv_promote_l2: pv not found")); - pvh = pa_to_pvh(pa); + pvh = page_to_pvh(m); TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); pvh->pv_gen++; /* Free the remaining NPTEPG - 1 pv entries. */ @@ -3896,7 +3896,7 @@ havel3: if ((om->a.flags & PGA_WRITEABLE) != 0 && TAILQ_EMPTY(&om->md.pv_list) && ((om->flags & PG_FICTITIOUS) != 0 || - TAILQ_EMPTY(&pa_to_pvh(opa)->pv_list))) + TAILQ_EMPTY(&page_to_pvh(om)->pv_list))) vm_page_aflag_clear(om, PGA_WRITEABLE); } else { KASSERT((orig_l3 & ATTR_AF) != 0, @@ -5000,7 +5000,7 @@ pmap_remove_pages(pmap_t pmap) case 1: pmap_resident_count_dec(pmap, L2_SIZE / PAGE_SIZE); - pvh = pa_to_pvh(tpte & ~ATTR_MASK); + pvh = page_to_pvh(m); TAILQ_REMOVE(&pvh->pv_list, pv,pv_next); pvh->pv_gen++; if (TAILQ_EMPTY(&pvh->pv_list)) { ___ dev-commits-src-main@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/dev-commits-src-main To unsubscribe, send any mail to "dev-commits-src-main-unsubscr...@freebsd.org"
git: 62ea198e95f1 - main - arm64: remove an unneeded test from pmap_clear_modify()
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=62ea198e95f139e6b8041ec44f75d65aa26970d0 commit 62ea198e95f139e6b8041ec44f75d65aa26970d0 Author: Alan Cox AuthorDate: 2021-06-23 05:10:20 + Commit: Alan Cox CommitDate: 2021-06-23 19:22:46 + arm64: remove an unneeded test from pmap_clear_modify() The page table entry for a 4KB page mapping must be valid if a PV entry for the mapping exists, so there is no point in testing each page table entry's validity when iterating over a PV list. Reviewed by:kib, markj MFC after: 1 week Differential Revision: https://reviews.freebsd.org/D30875 --- sys/arm64/arm64/pmap.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c index 5f321be98528..7def96bca70b 100644 --- a/sys/arm64/arm64/pmap.c +++ b/sys/arm64/arm64/pmap.c @@ -5664,8 +5664,7 @@ restart: l2 = pmap_l2(pmap, pv->pv_va); l3 = pmap_l2_to_l3(l2, pv->pv_va); oldl3 = pmap_load(l3); - if (pmap_l3_valid(oldl3) && - (oldl3 & (ATTR_S1_AP_RW_BIT | ATTR_SW_DBM)) == ATTR_SW_DBM){ + if ((oldl3 & (ATTR_S1_AP_RW_BIT | ATTR_SW_DBM)) == ATTR_SW_DBM){ pmap_set_bits(l3, ATTR_S1_AP(ATTR_S1_AP_RO)); pmap_invalidate_page(pmap, pv->pv_va); } ___ dev-commits-src-main@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/dev-commits-src-main To unsubscribe, send any mail to "dev-commits-src-main-unsubscr...@freebsd.org"
git: 4c57d6d55516 - main - amd64/pmap: fix user page table page accounting
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=4c57d6d5551629df348e2087d2382ae7cbf8b312 commit 4c57d6d5551629df348e2087d2382ae7cbf8b312 Author: Alan Cox AuthorDate: 2021-12-05 23:40:53 + Commit: Alan Cox CommitDate: 2021-12-06 01:13:43 + amd64/pmap: fix user page table page accounting When a superpage mapping is destroyed and the original page table page containing 4KB mappings that was being held in reserve is deallocated, the recently introduced user page table page count was not being decremented. Consequentially, the count was wrong and would grow over time. For example, after multiple iterations of "buildworld", I was seeing implausible counts, like the following: vm.pmap.kernel_pt_page_count: 2184 vm.pmap.user_pt_page_count: 2280849 vm.pmap.pv_page_count: 106 With this change, I now see: vm.pmap.kernel_pt_page_count: 2183 vm.pmap.user_pt_page_count: 344 vm.pmap.pv_page_count: 105 Reviewed by:kib, markj Differential Revision: https://reviews.freebsd.org/D33276 --- sys/amd64/amd64/pmap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index e9973a420de3..153664698e43 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -6140,7 +6140,7 @@ pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva, if (mpte != NULL) { KASSERT(mpte->valid == VM_PAGE_BITS_ALL, ("pmap_remove_pde: pte page not promoted")); - pmap_resident_count_adj(pmap, -1); + pmap_pt_page_count_adj(pmap, -1); KASSERT(mpte->ref_count == NPTEPG, ("pmap_remove_pde: pte page ref count error")); mpte->ref_count = 0; @@ -8408,7 +8408,7 @@ pmap_remove_pages(pmap_t pmap) if (mpte != NULL) { KASSERT(mpte->valid == VM_PAGE_BITS_ALL, ("pmap_remove_pages: pte page not promoted")); - pmap_resident_count_adj(pmap, -1); + pmap_pt_page_count_adj(pmap, -1); KASSERT(mpte->ref_count == NPTEPG, ("pmap_remove_pages: pte page reference count error")); mpte->ref_count = 0;
git: b7ec0d268b73 - main - arm64: Introduce and use pmap_pte_exists()
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=b7ec0d268b73ce20c4f785d21cde9b174c91a553 commit b7ec0d268b73ce20c4f785d21cde9b174c91a553 Author: Alan Cox AuthorDate: 2021-12-23 18:50:14 + Commit: Alan Cox CommitDate: 2021-12-24 04:56:02 + arm64: Introduce and use pmap_pte_exists() Use pmap_pte_exists() instead of pmap_pte() when the caller expects a mapping to exist at a particular level. The caller benefits in two ways from using pmap_pte_exists(). First, because the level is specified to pmap_pte_exists() as a constant, rather than returned, the compiler can specialize the implementation of pmap_pte_exists() to the caller's exact needs, i.e., generate fewer instructions. Consequently, within a GENERIC-NODEBUG kernel, 704 bytes worth of instructions are eliminated from the inner loops of various pmap functions. Second, suppose that the mapping doesn't exist. Rather than requiring every caller to implement its own KASSERT()s to report missing mappings, the caller can optionally have pmap_pte_exists() provide the KASSERT(). Reviewed by:andrew, kib Tested by: andrew (an earlier version) MFC after: 1 week Differential Revision: https://reviews.freebsd.org/D33597 --- sys/arm64/arm64/pmap.c | 90 ++ 1 file changed, 55 insertions(+), 35 deletions(-) diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c index e69d0f9c1d81..0d1e604f22ef 100644 --- a/sys/arm64/arm64/pmap.c +++ b/sys/arm64/arm64/pmap.c @@ -596,6 +596,46 @@ pmap_pte(pmap_t pmap, vm_offset_t va, int *level) return (l3); } +/* + * If the given pmap has an L{1,2}_BLOCK or L3_PAGE entry at the specified + * level that maps the specified virtual address, then a pointer to that entry + * is returned. Otherwise, NULL is returned, unless INVARIANTS are enabled + * and a diagnostic message is provided, in which case this function panics. + */ +static __always_inline pt_entry_t * +pmap_pte_exists(pmap_t pmap, vm_offset_t va, int level, const char *diag) +{ + pd_entry_t *l0p, *l1p, *l2p; + pt_entry_t desc, *l3p; + + KASSERT(level >= 0 && level < 4, + ("%s: %s passed an out-of-range level (%d)", __func__, diag, + level)); + l0p = pmap_l0(pmap, va); + desc = pmap_load(l0p) & ATTR_DESCR_MASK; + if (desc == L0_TABLE && level > 0) { + l1p = pmap_l0_to_l1(l0p, va); + desc = pmap_load(l1p) & ATTR_DESCR_MASK; + if (desc == L1_BLOCK && level == 1) + return (l1p); + else if (desc == L1_TABLE && level > 1) { + l2p = pmap_l1_to_l2(l1p, va); + desc = pmap_load(l2p) & ATTR_DESCR_MASK; + if (desc == L2_BLOCK && level == 2) + return (l2p); + else if (desc == L2_TABLE && level > 2) { + l3p = pmap_l2_to_l3(l2p, va); + desc = pmap_load(l3p) & ATTR_DESCR_MASK; + if (desc == L3_PAGE && level == 3) + return (l3p); + } + } + } + KASSERT(diag == NULL, + ("%s: va %#lx is not mapped at level %d", diag, va, level)); + return (NULL); +} + bool pmap_ps_enabled(pmap_t pmap __unused) { @@ -1483,12 +1523,8 @@ PMAP_INLINE void pmap_kremove(vm_offset_t va) { pt_entry_t *pte; - int lvl; - - pte = pmap_pte(kernel_pmap, va, &lvl); - KASSERT(pte != NULL, ("pmap_kremove: Invalid address")); - KASSERT(lvl == 3, ("pmap_kremove: Invalid pte level %d", lvl)); + pte = pmap_pte_exists(kernel_pmap, va, 3, __func__); pmap_clear(pte); pmap_invalidate_page(kernel_pmap, va); } @@ -1498,7 +1534,6 @@ pmap_kremove_device(vm_offset_t sva, vm_size_t size) { pt_entry_t *pte; vm_offset_t va; - int lvl; KASSERT((sva & L3_OFFSET) == 0, ("pmap_kremove_device: Invalid virtual address")); @@ -1507,10 +1542,7 @@ pmap_kremove_device(vm_offset_t sva, vm_size_t size) va = sva; while (size != 0) { - pte = pmap_pte(kernel_pmap, va, &lvl); - KASSERT(pte != NULL, ("Invalid page table, va: 0x%lx", va)); - KASSERT(lvl == 3, - ("Invalid device pagetable level: %d != 3", lvl)); + pte = pmap_pte_exists(kernel_pmap, va, 3, __func__); pmap_clear(pte); va += PAGE_SIZE; @@ -1584,7 +1616,6 @@ pmap_qremove(vm_offset_t sva, int count) { pt_entry_t *pte; vm
git: 03f9cc89e1f5 - main - arm64: Fix "set-but-not-used" warnings in the pmap
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=03f9cc89e1f5ddf5f54785cb10f551ab94d139ac commit 03f9cc89e1f5ddf5f54785cb10f551ab94d139ac Author: Alan Cox AuthorDate: 2021-12-27 17:37:04 + Commit: Alan Cox CommitDate: 2021-12-27 17:48:15 + arm64: Fix "set-but-not-used" warnings in the pmap MFC after: 1 week --- sys/arm64/arm64/pmap.c | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c index 0d1e604f22ef..6d12f66807c3 100644 --- a/sys/arm64/arm64/pmap.c +++ b/sys/arm64/arm64/pmap.c @@ -2113,7 +2113,7 @@ retry: void pmap_release(pmap_t pmap) { - boolean_t rv; + boolean_t rv __diagused; struct spglist free; struct asid_set *set; vm_page_t m; @@ -2839,7 +2839,7 @@ pmap_pv_insert_l2(pmap_t pmap, vm_offset_t va, pd_entry_t l2e, u_int flags, static void pmap_remove_kernel_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t va) { - pt_entry_t newl2, oldl2; + pt_entry_t newl2, oldl2 __diagused; vm_page_t ml3; vm_paddr_t ml3pa; @@ -5376,7 +5376,7 @@ pmap_ts_referenced(vm_page_t m) pv_entry_t pv, pvf; pmap_t pmap; struct rwlock *lock; - pd_entry_t *pde, tpde; + pd_entry_t *pde, tpde __diagused; pt_entry_t *pte, tpte; vm_offset_t va; vm_paddr_t pa; @@ -6918,7 +6918,7 @@ pmap_map_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count, { vm_paddr_t paddr; boolean_t needs_mapping; - int error, i; + int error __diagused, i; /* * Allocate any KVA space that we need, this is done in a separate
git: e161dfa91897 - main - Fix pmap_is_prefaultable() on arm64 and riscv
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=e161dfa918974b4392c7c5127bd51f28ea5f8b6a commit e161dfa918974b4392c7c5127bd51f28ea5f8b6a Author: Alan Cox AuthorDate: 2021-12-25 03:54:01 + Commit: Alan Cox CommitDate: 2021-12-28 01:17:14 + Fix pmap_is_prefaultable() on arm64 and riscv The current implementations never correctly return TRUE. In all cases, when they currently return TRUE, they should have returned FALSE. And, in some cases, when they currently return FALSE, they should have returned TRUE. Except for its effects on performance, specifically, additional page faults and pointless calls to pmap_enter_quick() that abort, this error is harmless. That is why it has gone unnoticed. Add a comment to the amd64, arm64, and riscv implementations describing how their return values are computed. Reviewed by:kib, markj MFC after: 1 week Differential Revision: https://reviews.freebsd.org/D33659 --- sys/amd64/amd64/pmap.c | 5 + sys/arm64/arm64/pmap.c | 12 +--- sys/riscv/riscv/pmap.c | 6 +- 3 files changed, 19 insertions(+), 4 deletions(-) diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index f6efce1303d4..42ad1bd24136 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -8567,6 +8567,11 @@ pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr) boolean_t rv; PG_V = pmap_valid_bit(pmap); + + /* +* Return TRUE if and only if the PTE for the specified virtual +* address is allocated but invalid. +*/ rv = FALSE; PMAP_LOCK(pmap); pde = pmap_pde(pmap, addr); diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c index 6d12f66807c3..4bd3eef7a18f 100644 --- a/sys/arm64/arm64/pmap.c +++ b/sys/arm64/arm64/pmap.c @@ -5246,15 +5246,21 @@ pmap_is_modified(vm_page_t m) boolean_t pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr) { + pd_entry_t *pde; pt_entry_t *pte; boolean_t rv; int lvl; + /* +* Return TRUE if and only if the L3 entry for the specified virtual +* address is allocated but invalid. +*/ rv = FALSE; PMAP_LOCK(pmap); - pte = pmap_pte(pmap, addr, &lvl); - if (pte != NULL && pmap_load(pte) != 0) { - rv = TRUE; + pde = pmap_pde(pmap, addr, &lvl); + if (pde != NULL && lvl == 2) { + pte = pmap_l2_to_l3(pde, addr); + rv = pmap_load(pte) == 0; } PMAP_UNLOCK(pmap); return (rv); diff --git a/sys/riscv/riscv/pmap.c b/sys/riscv/riscv/pmap.c index 9abf75a731f5..1dc62418b165 100644 --- a/sys/riscv/riscv/pmap.c +++ b/sys/riscv/riscv/pmap.c @@ -3850,10 +3850,14 @@ pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr) pt_entry_t *l3; boolean_t rv; + /* +* Return TRUE if and only if the L3 entry for the specified virtual +* address is allocated but invalid. +*/ rv = FALSE; PMAP_LOCK(pmap); l3 = pmap_l3(pmap, addr); - if (l3 != NULL && pmap_load(l3) != 0) { + if (l3 != NULL && pmap_load(l3) == 0) { rv = TRUE; } PMAP_UNLOCK(pmap);
git: 3c2ee7b28cfd - main - arm64: Enhance pmap_pte_exists()'s error reporting
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=3c2ee7b28cfd715e28e72d76efd89ba3c38aa970 commit 3c2ee7b28cfd715e28e72d76efd89ba3c38aa970 Author: Alan Cox AuthorDate: 2021-12-28 23:17:42 + Commit: Alan Cox CommitDate: 2021-12-28 23:46:21 + arm64: Enhance pmap_pte_exists()'s error reporting Report the descriptor type and level at which the page table does not match the caller's expectations. MFC after: 1 week --- sys/arm64/arm64/pmap.c | 15 +++ 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c index 4bd3eef7a18f..15bb813f591d 100644 --- a/sys/arm64/arm64/pmap.c +++ b/sys/arm64/arm64/pmap.c @@ -607,6 +607,7 @@ pmap_pte_exists(pmap_t pmap, vm_offset_t va, int level, const char *diag) { pd_entry_t *l0p, *l1p, *l2p; pt_entry_t desc, *l3p; + int walk_level __diagused; KASSERT(level >= 0 && level < 4, ("%s: %s passed an out-of-range level (%d)", __func__, diag, @@ -628,11 +629,17 @@ pmap_pte_exists(pmap_t pmap, vm_offset_t va, int level, const char *diag) desc = pmap_load(l3p) & ATTR_DESCR_MASK; if (desc == L3_PAGE && level == 3) return (l3p); - } - } - } + else + walk_level = 3; + } else + walk_level = 2; + } else + walk_level = 1; + } else + walk_level = 0; KASSERT(diag == NULL, - ("%s: va %#lx is not mapped at level %d", diag, va, level)); + ("%s: va %#lx not mapped at level %d, desc %ld at level %d", + diag, va, level, desc, walk_level)); return (NULL); }
git: 24b82aa0c543 - main - arm64: Simplify pmap_ts_referenced
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=24b82aa0c543cc6d63bfbde651b2325ae360dc50 commit 24b82aa0c543cc6d63bfbde651b2325ae360dc50 Author: Alan Cox AuthorDate: 2021-12-28 00:27:52 + Commit: Alan Cox CommitDate: 2021-12-28 23:59:39 + arm64: Simplify pmap_ts_referenced Use pmap_pte_exists() in place of multiple KASSERT()s. Eliminate an unnecessary NULL check. MFC after: 1 week --- sys/arm64/arm64/pmap.c | 26 +- 1 file changed, 5 insertions(+), 21 deletions(-) diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c index 15bb813f591d..68164708dce9 100644 --- a/sys/arm64/arm64/pmap.c +++ b/sys/arm64/arm64/pmap.c @@ -5389,11 +5389,10 @@ pmap_ts_referenced(vm_page_t m) pv_entry_t pv, pvf; pmap_t pmap; struct rwlock *lock; - pd_entry_t *pde, tpde __diagused; pt_entry_t *pte, tpte; vm_offset_t va; vm_paddr_t pa; - int cleared, lvl, md_gen, not_cleared, pvh_gen; + int cleared, md_gen, not_cleared, pvh_gen; struct spglist free; KASSERT((m->oflags & VPO_UNMANAGED) == 0, @@ -5424,14 +5423,7 @@ retry: } } va = pv->pv_va; - pde = pmap_pde(pmap, va, &lvl); - KASSERT(pde != NULL, ("pmap_ts_referenced: no l1 table found")); - KASSERT(lvl == 1, - ("pmap_ts_referenced: invalid pde level %d", lvl)); - tpde = pmap_load(pde); - KASSERT((tpde & ATTR_DESCR_MASK) == L1_TABLE, - ("pmap_ts_referenced: found an invalid l1 table")); - pte = pmap_l1_to_l2(pde, va); + pte = pmap_pte_exists(pmap, va, 2, __func__); tpte = pmap_load(pte); if (pmap_pte_dirty(pmap, tpte)) { /* @@ -5441,7 +5433,6 @@ retry: */ vm_page_dirty(m); } - if ((tpte & ATTR_AF) != 0) { /* * Since this reference bit is shared by 512 4KB pages, @@ -5472,7 +5463,7 @@ retry: } PMAP_UNLOCK(pmap); /* Rotate the PV list if it has more than one entry. */ - if (pv != NULL && TAILQ_NEXT(pv, pv_next) != NULL) { + if (TAILQ_NEXT(pv, pv_next) != NULL) { TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); pvh->pv_gen++; @@ -5499,14 +5490,7 @@ small_mappings: goto retry; } } - pde = pmap_pde(pmap, pv->pv_va, &lvl); - KASSERT(pde != NULL, ("pmap_ts_referenced: no l2 table found")); - KASSERT(lvl == 2, - ("pmap_ts_referenced: invalid pde level %d", lvl)); - tpde = pmap_load(pde); - KASSERT((tpde & ATTR_DESCR_MASK) == L2_TABLE, - ("pmap_ts_referenced: found an invalid l2 table")); - pte = pmap_l2_to_l3(pde, pv->pv_va); + pte = pmap_pte_exists(pmap, pv->pv_va, 3, __func__); tpte = pmap_load(pte); if (pmap_pte_dirty(pmap, tpte)) vm_page_dirty(m); @@ -5520,7 +5504,7 @@ small_mappings: } PMAP_UNLOCK(pmap); /* Rotate the PV list if it has more than one entry. */ - if (pv != NULL && TAILQ_NEXT(pv, pv_next) != NULL) { + if (TAILQ_NEXT(pv, pv_next) != NULL) { TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); m->md.pv_gen++;
git: 5d1ee799de65 - main - arm64 pmap: Eliminate an unused global variable
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=5d1ee799de65ca62cd94c1602b41255bdbc3312d commit 5d1ee799de65ca62cd94c1602b41255bdbc3312d Author: Alan Cox AuthorDate: 2023-05-27 06:23:48 + Commit: Alan Cox CommitDate: 2023-05-27 06:38:20 + arm64 pmap: Eliminate an unused global variable The global variable "pmap_last_pa" was copied from the amd64 pmap as a part of commit c15085278cb5 "arm64 pmap: implement per-superpage locks" but it is neither used nor needed by the arm64 pmap. --- sys/arm64/arm64/pmap.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c index 16e671295ca6..6bc9adba71e0 100644 --- a/sys/arm64/arm64/pmap.c +++ b/sys/arm64/arm64/pmap.c @@ -341,7 +341,6 @@ struct pv_chunks_list __exclusive_cache_line pv_chunks[PMAP_MEMDOM]; __exclusive_cache_line static struct pmap_large_md_page pv_dummy_large; #define pv_dummy pv_dummy_large.pv_page __read_mostly static struct pmap_large_md_page *pv_table; -__read_mostly vm_paddr_t pmap_last_pa; vm_paddr_t dmap_phys_base; /* The start of the dmap region */ vm_paddr_t dmap_phys_max; /* The limit of the dmap region */
git: 3e7e2bb2467e - main - arm64 pmap: Make VM_PAGE_TO_PV_LIST_LOCK() a constant-time operation
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=3e7e2bb2467e8bb682176125397168c88c3913c6 commit 3e7e2bb2467e8bb682176125397168c88c3913c6 Author: Alan Cox AuthorDate: 2023-05-29 06:01:37 + Commit: Alan Cox CommitDate: 2023-05-29 16:22:55 + arm64 pmap: Make VM_PAGE_TO_PV_LIST_LOCK() a constant-time operation The prior implementation of VM_PAGE_TO_PV_LIST_LOCK() performed a linear-time search of the vm_phys_segs[] array. However, in contrast to PHYS_TO_PV_LIST_LOCK(), that search is unnecessary because every (non- fictitious) vm_page contains the index of the vm_phys_seg in which it resides. Change most of the remaining uses of CHANGE_PV_LIST_LOCK_TO_PHYS() and PHYS_TO_PV_LIST_LOCK() to CHANGE_PV_LIST_LOCK_TO_VM_PAGE() and VM_PAGE_TO_PV_LIST_LOCK(), respectively. Collectively, these changes also reduce the size of a GENERIC-NODEBUG kernel's pmap. Before: text databss dec hex filename 70144 3200 2248 75592 0x12748 pmap.o After: text databss dec hex filename 69192 3200 2248 74640 0x12390 pmap.o Reviewed by:kib, markj Differential Revision: https://reviews.freebsd.org/D40306 --- sys/arm64/arm64/pmap.c | 43 ++- 1 file changed, 26 insertions(+), 17 deletions(-) diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c index 6bc9adba71e0..150532b68c75 100644 --- a/sys/arm64/arm64/pmap.c +++ b/sys/arm64/arm64/pmap.c @@ -202,6 +202,10 @@ struct pmap_large_md_page { int pv_pad[2]; }; +__exclusive_cache_line static struct pmap_large_md_page pv_dummy_large; +#define pv_dummy pv_dummy_large.pv_page +__read_mostly static struct pmap_large_md_page *pv_table; + static struct pmap_large_md_page * _pa_to_pmdp(vm_paddr_t pa) { @@ -252,11 +256,19 @@ page_to_pmdp(vm_page_t m) _lock; \ }) -#defineCHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa) do {\ +static struct rwlock * +VM_PAGE_TO_PV_LIST_LOCK(vm_page_t m) +{ + if ((m->flags & PG_FICTITIOUS) == 0) + return (&page_to_pmdp(m)->pv_lock); + else + return (&pv_dummy_large.pv_lock); +} + +#defineCHANGE_PV_LIST_LOCK(lockp, new_lock)do {\ struct rwlock **_lockp = (lockp); \ - struct rwlock *_new_lock; \ + struct rwlock *_new_lock = (new_lock); \ \ - _new_lock = PHYS_TO_PV_LIST_LOCK(pa); \ if (_new_lock != *_lockp) { \ if (*_lockp != NULL)\ rw_wunlock(*_lockp);\ @@ -265,8 +277,11 @@ page_to_pmdp(vm_page_t m) } \ } while (0) +#defineCHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa) \ + CHANGE_PV_LIST_LOCK(lockp, PHYS_TO_PV_LIST_LOCK(pa)) + #defineCHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m)\ - CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, VM_PAGE_TO_PHYS(m)) + CHANGE_PV_LIST_LOCK(lockp, VM_PAGE_TO_PV_LIST_LOCK(m)) #defineRELEASE_PV_LIST_LOCK(lockp) do {\ struct rwlock **_lockp = (lockp); \ @@ -277,9 +292,6 @@ page_to_pmdp(vm_page_t m) } \ } while (0) -#defineVM_PAGE_TO_PV_LIST_LOCK(m) \ - PHYS_TO_PV_LIST_LOCK(VM_PAGE_TO_PHYS(m)) - /* * The presence of this flag indicates that the mapping is writeable. * If the ATTR_S1_AP_RO bit is also set, then the mapping is clean, otherwise @@ -338,10 +350,6 @@ struct pv_chunks_list { struct pv_chunks_list __exclusive_cache_line pv_chunks[PMAP_MEMDOM]; -__exclusive_cache_line static struct pmap_large_md_page pv_dummy_large; -#define pv_dummy pv_dummy_large.pv_page -__read_mostly static struct pmap_large_md_page *pv_table; - vm_paddr_t dmap_phys_base; /* The start of the dmap region */ vm_paddr_t dmap_phys_max; /* The limit of the dmap region */ vm_offset_t dmap_max_addr; /* The virtual address limit of the dmap */ @@ -3427,7 +3435,7 @@ pmap_remove_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t sva, if (old_l2 & ATTR_SW_MANAGED) { m = PHYS_TO_VM_PAGE(PTE_TO_PHYS(old_l2)); pvh = page_to_pvh(m); - CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, PTE_TO_PHYS(old_l2)); + CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m); pmap_pvh_free(pvh, pmap, sva); for (mt = m; mt < &m[L2_SIZE / PAGE_SIZE]; mt++) { if (pmap_pte_dirty(pmap, old_l2)) @@ -3533,7 +3541,7 @@ pmap_remove_l3
git: 8d7ee2047c5e - main - pmap: don't recompute mpte during promotion
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=8d7ee2047c5e8b4db51c682aee4161ebfd1238e5 commit 8d7ee2047c5e8b4db51c682aee4161ebfd1238e5 Author: Alan Cox AuthorDate: 2022-09-09 23:34:58 + Commit: Alan Cox CommitDate: 2022-09-11 06:19:22 + pmap: don't recompute mpte during promotion When attempting to promote 4KB user-space mappings to a 2MB user-space mapping, the address of the struct vm_page representing the page table page that contains the 4KB mappings is already known to the caller. Pass that address to the promotion function rather than making the promotion function recompute it, which on arm64 entails iteration over the vm_phys_segs array by PHYS_TO_VM_PAGE(). And, while I'm here, eliminate unnecessary arithmetic from the calculation of the first PTE's address on arm64. MFC after: 1 week --- sys/amd64/amd64/pmap.c | 12 ++-- sys/arm64/arm64/pmap.c | 14 ++ 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index 326103a1affb..e3f281784893 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -1277,7 +1277,7 @@ static vm_page_t pmap_large_map_getptp_unlocked(void); static vm_paddr_t pmap_large_map_kextract(vm_offset_t va); #if VM_NRESERVLEVEL > 0 static void pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, -struct rwlock **lockp); +vm_page_t mpte, struct rwlock **lockp); #endif static boolean_t pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t sva, vm_prot_t prot); @@ -6737,13 +6737,12 @@ pmap_pde_ept_executable(pmap_t pmap, pd_entry_t pde) * identical characteristics. */ static void -pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, +pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, vm_page_t mpte, struct rwlock **lockp) { pd_entry_t newpde; pt_entry_t *firstpte, oldpte, pa, *pte; pt_entry_t PG_G, PG_A, PG_M, PG_RW, PG_V, PG_PKU_MASK; - vm_page_t mpte; int PG_PTE_CACHE; PG_A = pmap_accessed_bit(pmap); @@ -6823,7 +6822,8 @@ setpte: * mapping the superpage is demoted by pmap_demote_pde() or * destroyed by pmap_remove_pde(). */ - mpte = PHYS_TO_VM_PAGE(*pde & PG_FRAME); + if (mpte == NULL) + mpte = PHYS_TO_VM_PAGE(*pde & PG_FRAME); KASSERT(mpte >= vm_page_array && mpte < &vm_page_array[vm_page_array_size], ("pmap_promote_pde: page table page is out of range")); @@ -7237,7 +7237,7 @@ unchanged: pmap_ps_enabled(pmap) && (m->flags & PG_FICTITIOUS) == 0 && vm_reserv_level_iffullpop(m) == 0) - pmap_promote_pde(pmap, pde, va, &lock); + pmap_promote_pde(pmap, pde, va, mpte, &lock); #endif rv = KERN_SUCCESS; @@ -10183,7 +10183,7 @@ pmap_emulate_accessed_dirty(pmap_t pmap, vm_offset_t va, int ftype) pmap_ps_enabled(pmap) && (m->flags & PG_FICTITIOUS) == 0 && vm_reserv_level_iffullpop(m) == 0) { - pmap_promote_pde(pmap, pde, va, &lock); + pmap_promote_pde(pmap, pde, va, mpte, &lock); #ifdef INVARIANTS atomic_add_long(&ad_emulation_superpage_promotions, 1); #endif diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c index deea00bc5d13..c86e9f562729 100644 --- a/sys/arm64/arm64/pmap.c +++ b/sys/arm64/arm64/pmap.c @@ -3787,18 +3787,15 @@ pmap_pv_promote_l2(pmap_t pmap, vm_offset_t va, vm_paddr_t pa, * identical characteristics. */ static void -pmap_promote_l2(pmap_t pmap, pd_entry_t *l2, vm_offset_t va, +pmap_promote_l2(pmap_t pmap, pd_entry_t *l2, vm_offset_t va, vm_page_t mpte, struct rwlock **lockp) { pt_entry_t *firstl3, *l3, newl2, oldl3, pa; - vm_page_t mpte; - vm_offset_t sva; PMAP_LOCK_ASSERT(pmap, MA_OWNED); PMAP_ASSERT_STAGE1(pmap); - sva = va & ~L2_OFFSET; - firstl3 = pmap_l2_to_l3(l2, sva); + firstl3 = (pt_entry_t *)PHYS_TO_DMAP(pmap_load(l2) & ~ATTR_MASK); newl2 = pmap_load(firstl3); if (((newl2 & (~ATTR_MASK | ATTR_AF)) & L2_OFFSET) != ATTR_AF || @@ -3851,7 +3848,8 @@ setl3: * mapping the superpage is demoted by pmap_demote_l2() or * destroyed by pmap_remove_l3(). */ - mpte = PHYS_TO_VM_PAGE(pmap_load(l2) & ~ATTR_MASK); + if (mpte == NULL) + mpte = PHYS_TO_VM_PAGE(pmap_load(l2) & ~ATTR_MASK); KASSERT(mpte >= vm_page_array && mpte < &vm_page_array[vm_page_array_size], ("pmap_promote_l2: page table page is out of range")); @@ -3871,7 +3869,7 @@ setl3: newl2 &am
git: 1d5ebad06c20 - main - pmap: optimize MADV_WILLNEED on existing superpages
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=1d5ebad06c20b1aed3b0c323c4675678afec5e55 commit 1d5ebad06c20b1aed3b0c323c4675678afec5e55 Author: Alan Cox AuthorDate: 2022-09-30 06:54:02 + Commit: Alan Cox CommitDate: 2022-09-30 17:14:05 + pmap: optimize MADV_WILLNEED on existing superpages Specifically, avoid pointless calls to pmap_enter_quick_locked() when madvise(MADV_WILLNEED) is applied to an existing superpage mapping. Reported by:mhorne Reviewed by:kib, markj MFC after: 1 week Differential Revision: https://reviews.freebsd.org/D36801 --- sys/amd64/amd64/pmap.c | 64 +++--- sys/arm64/arm64/pmap.c | 59 +++--- 2 files changed, 75 insertions(+), 48 deletions(-) diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index f4df664f0cca..b9b031d55d7d 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -1258,7 +1258,7 @@ static boolean_t pmap_demote_pde_locked(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, struct rwlock **lockp); static boolean_t pmap_demote_pdpe(pmap_t pmap, pdp_entry_t *pdpe, vm_offset_t va); -static boolpmap_enter_2mpage(pmap_t pmap, vm_offset_t va, vm_page_t m, +static int pmap_enter_2mpage(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, struct rwlock **lockp); static int pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t newpde, u_int flags, vm_page_t m, struct rwlock **lockp); @@ -7271,13 +7271,12 @@ out: } /* - * Tries to create a read- and/or execute-only 2MB page mapping. Returns true - * if successful. Returns false if (1) a page table page cannot be allocated - * without sleeping, (2) a mapping already exists at the specified virtual - * address, or (3) a PV entry cannot be allocated without reclaiming another - * PV entry. + * Tries to create a read- and/or execute-only 2MB page mapping. Returns + * KERN_SUCCESS if the mapping was created. Otherwise, returns an error + * value. See pmap_enter_pde() for the possible error values when "no sleep", + * "no replace", and "no reclaim" are specified. */ -static bool +static int pmap_enter_2mpage(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, struct rwlock **lockp) { @@ -7295,8 +7294,7 @@ pmap_enter_2mpage(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, if (va < VM_MAXUSER_ADDRESS) newpde |= PG_U; return (pmap_enter_pde(pmap, va, newpde, PMAP_ENTER_NOSLEEP | - PMAP_ENTER_NOREPLACE | PMAP_ENTER_NORECLAIM, NULL, lockp) == - KERN_SUCCESS); + PMAP_ENTER_NOREPLACE | PMAP_ENTER_NORECLAIM, NULL, lockp)); } /* @@ -7319,12 +7317,19 @@ pmap_every_pte_zero(vm_paddr_t pa) /* * Tries to create the specified 2MB page mapping. Returns KERN_SUCCESS if - * the mapping was created, and either KERN_FAILURE or KERN_RESOURCE_SHORTAGE - * otherwise. Returns KERN_FAILURE if PMAP_ENTER_NOREPLACE was specified and - * a mapping already exists at the specified virtual address. Returns - * KERN_RESOURCE_SHORTAGE if PMAP_ENTER_NOSLEEP was specified and a page table - * page allocation failed. Returns KERN_RESOURCE_SHORTAGE if - * PMAP_ENTER_NORECLAIM was specified and a PV entry allocation failed. + * the mapping was created, and one of KERN_FAILURE, KERN_NO_SPACE, + * KERN_PROTECTION_FAILURE, or KERN_RESOURCE_FAILURE otherwise. Returns + * KERN_FAILURE if either (1) PMAP_ENTER_NOREPLACE was specified and a 4KB + * page mapping already exists within the 2MB virtual address range starting + * at the specified virtual address or (2) the requested 2MB page mapping is + * not supported due to hardware errata. Returns KERN_NO_SPACE if + * PMAP_ENTER_NOREPLACE was specified and a 2MB page mapping already exists at + * the specified virtual address. Returns KERN_PROTECTION_FAILURE if the PKRU + * settings are not the same across the 2MB virtual address range starting at + * the specified virtual address. Returns KERN_RESOURCE_SHORTAGE if either + * (1) PMAP_ENTER_NOSLEEP was specified and a page table page allocation + * failed or (2) PMAP_ENTER_NORECLAIM was specified and a PV entry allocation + * failed. * * The parameter "m" is only used when creating a managed, writeable mapping. */ @@ -7380,14 +7385,23 @@ pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t newpde, u_int flags, if ((oldpde & PG_V) != 0) { KASSERT(pdpg == NULL || pdpg->ref_count > 1, ("pmap_enter_pde: pdpg's reference count is too low")); - if ((flags & PMAP_ENTER_NOREPLACE) != 0 && (va < - VM_MAXUSER_ADDRESS || (oldpde & PG_PS) != 0 || - !pmap_every_pte_zero(oldpde & PG_FRAME)))
git: f0878da03b37 - main - pmap: standardize promotion conditions between amd64 and arm64
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=f0878da03b374e3fa3578b363f02bfd50ac0e5bd commit f0878da03b374e3fa3578b363f02bfd50ac0e5bd Author: Alan Cox AuthorDate: 2022-10-08 07:20:25 + Commit: Alan Cox CommitDate: 2022-12-12 17:32:50 + pmap: standardize promotion conditions between amd64 and arm64 On amd64, don't abort promotion due to a missing accessed bit in a mapping before possibly write protecting that mapping. Previously, in some cases, we might not repromote after madvise(MADV_FREE) because there was no write fault to trigger the repromotion. Conversely, on arm64, don't pointlessly, yet harmlessly, write protect physical pages that aren't part of the physical superpage. Don't count aborted promotions due to explicit promotion prohibition (arm64) or hardware errata (amd64) as ordinary promotion failures. Reviewed by:kib, markj MFC after: 2 weeks Differential Revision: https://reviews.freebsd.org/D36916 --- sys/amd64/amd64/pmap.c | 37 ++--- sys/arm64/arm64/pmap.c | 50 -- 2 files changed, 74 insertions(+), 13 deletions(-) diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index eb8980ae4fed..a44993efb409 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -6771,19 +6771,36 @@ pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, vm_page_t mpte, /* * Examine the first PTE in the specified PTP. Abort if this PTE is -* either invalid, unused, or does not map the first 4KB physical page -* within a 2MB page. +* ineligible for promotion due to hardware errata, invalid, or does +* not map the first 4KB physical page within a 2MB page. */ firstpte = (pt_entry_t *)PHYS_TO_DMAP(*pde & PG_FRAME); newpde = *firstpte; - if ((newpde & ((PG_FRAME & PDRMASK) | PG_A | PG_V)) != (PG_A | PG_V) || - !pmap_allow_2m_x_page(pmap, pmap_pde_ept_executable(pmap, - newpde))) { + if (!pmap_allow_2m_x_page(pmap, pmap_pde_ept_executable(pmap, newpde))) + return; + if ((newpde & ((PG_FRAME & PDRMASK) | PG_V)) != PG_V) { counter_u64_add(pmap_pde_p_failures, 1); CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#lx" " in pmap %p", va, pmap); return; } + + /* +* Both here and in the below "for" loop, to allow for repromotion +* after MADV_FREE, conditionally write protect a clean PTE before +* possibly aborting the promotion due to other PTE attributes. Why? +* Suppose that MADV_FREE is applied to a part of a superpage, the +* address range [S, E). pmap_advise() will demote the superpage +* mapping, destroy the 4KB page mapping at the end of [S, E), and +* clear PG_M and PG_A in the PTEs for the rest of [S, E). Later, +* imagine that the memory in [S, E) is recycled, but the last 4KB +* page in [S, E) is not the last to be rewritten, or simply accessed. +* In other words, there is still a 4KB page in [S, E), call it P, +* that is writeable but PG_M and PG_A are clear in P's PTE. Unless +* we write protect P before aborting the promotion, if and when P is +* finally rewritten, there won't be a page fault to trigger +* repromotion. +*/ setpde: if ((newpde & (PG_M | PG_RW)) == PG_RW) { /* @@ -6794,16 +6811,22 @@ setpde: goto setpde; newpde &= ~PG_RW; } + if ((newpde & PG_A) == 0) { + counter_u64_add(pmap_pde_p_failures, 1); + CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#lx" + " in pmap %p", va, pmap); + return; + } /* * Examine each of the other PTEs in the specified PTP. Abort if this * PTE maps an unexpected 4KB physical page or does not have identical * characteristics to the first PTE. */ - pa = (newpde & (PG_PS_FRAME | PG_A | PG_V)) + NBPDR - PAGE_SIZE; + pa = (newpde & (PG_PS_FRAME | PG_V)) + NBPDR - PAGE_SIZE; for (pte = firstpte + NPTEPG - 1; pte > firstpte; pte--) { oldpte = *pte; - if ((oldpte & (PG_FRAME | PG_A | PG_V)) != pa) { + if ((oldpte & (PG_FRAME | PG_V)) != pa) { counter_u64_add(pmap_pde_p_failures, 1); CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#lx" " in pmap %p", va, pmap); diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c index 3f46
git: 4ccd6c137f5b - main - arm64: Implement final level only TLB invalidations
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=4ccd6c137f5b53361efe54b78b815c7902258572 commit 4ccd6c137f5b53361efe54b78b815c7902258572 Author: Alan Cox AuthorDate: 2021-12-29 07:50:05 + Commit: Alan Cox CommitDate: 2022-01-03 19:14:18 + arm64: Implement final level only TLB invalidations A feature of arm64's instruction for TLB invalidation is the ability to determine whether cached intermediate entries, i.e., L{0,1,2}_TABLE entries, are invalidated in addition to the final entry, e.g., an L3_PAGE entry. Update pmap_invalidate_{page,range}() to support both types of invalidation, allowing the caller to determine which type of invalidation is performed. Update the callers to request the appropriate type of invalidation. Eliminate redundant TLB invalidations in pmap_abort_ptp() and pmap_remove_l3_range(). Add a comment to pmap_invalidate_all() making clear that it always invalidates entries at all levels. As expected, these changes result in a tiny yet measurable performance improvement. Reviewed by:kib, markj MFC after: 3 weeks Differential Revision: https://reviews.freebsd.org/D33705 --- sys/arm64/arm64/pmap.c | 144 +++-- 1 file changed, 92 insertions(+), 52 deletions(-) diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c index 68164708dce9..130d4a255286 100644 --- a/sys/arm64/arm64/pmap.c +++ b/sys/arm64/arm64/pmap.c @@ -1223,10 +1223,35 @@ SYSCTL_ULONG(_vm_pmap_l2, OID_AUTO, promotions, CTLFLAG_RD, &pmap_l2_promotions, 0, "2MB page promotions"); /* - * Invalidate a single TLB entry. + * If the given value for "final_only" is false, then any cached intermediate- + * level entries, i.e., L{0,1,2}_TABLE entries, are invalidated in addition to + * any cached final-level entry, i.e., either an L{1,2}_BLOCK or L3_PAGE entry. + * Otherwise, just the cached final-level entry is invalidated. */ static __inline void -pmap_invalidate_page(pmap_t pmap, vm_offset_t va) +pmap_invalidate_kernel(uint64_t r, bool final_only) +{ + if (final_only) + __asm __volatile("tlbi vaale1is, %0" : : "r" (r)); + else + __asm __volatile("tlbi vaae1is, %0" : : "r" (r)); +} + +static __inline void +pmap_invalidate_user(uint64_t r, bool final_only) +{ + if (final_only) + __asm __volatile("tlbi vale1is, %0" : : "r" (r)); + else + __asm __volatile("tlbi vae1is, %0" : : "r" (r)); +} + +/* + * Invalidates any cached final- and optionally intermediate-level TLB entries + * for the specified virtual address in the given virtual address space. + */ +static __inline void +pmap_invalidate_page(pmap_t pmap, vm_offset_t va, bool final_only) { uint64_t r; @@ -1235,17 +1260,22 @@ pmap_invalidate_page(pmap_t pmap, vm_offset_t va) dsb(ishst); if (pmap == kernel_pmap) { r = atop(va); - __asm __volatile("tlbi vaae1is, %0" : : "r" (r)); + pmap_invalidate_kernel(r, final_only); } else { r = ASID_TO_OPERAND(COOKIE_TO_ASID(pmap->pm_cookie)) | atop(va); - __asm __volatile("tlbi vae1is, %0" : : "r" (r)); + pmap_invalidate_user(r, final_only); } dsb(ish); isb(); } +/* + * Invalidates any cached final- and optionally intermediate-level TLB entries + * for the specified virtual address range in the given virtual address space. + */ static __inline void -pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) +pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, +bool final_only) { uint64_t end, r, start; @@ -1256,18 +1286,22 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) start = atop(sva); end = atop(eva); for (r = start; r < end; r++) - __asm __volatile("tlbi vaae1is, %0" : : "r" (r)); + pmap_invalidate_kernel(r, final_only); } else { start = end = ASID_TO_OPERAND(COOKIE_TO_ASID(pmap->pm_cookie)); start |= atop(sva); end |= atop(eva); for (r = start; r < end; r++) - __asm __volatile("tlbi vae1is, %0" : : "r" (r)); + pmap_invalidate_user(r, final_only); } dsb(ish); isb(); } +/* + * Invalidates all cached intermediate- and final-level TLB entries for the + * given virtual address space. + */ static __inline void pmap_invalidate_all(pmap_t pmap) { @@ -1513,7 +1547,7 @
git: 0c188c06c627 - main - arm64: replace pa_to_pvh() with page_to_pvh() in pmap_remove_l2()
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=0c188c06c627b5de30b7cde00d071a80ecfa commit 0c188c06c627b5de30b7cde00d071a80ecfa Author: Alan Cox AuthorDate: 2021-06-23 19:14:31 + Commit: Alan Cox CommitDate: 2021-06-24 03:35:46 + arm64: replace pa_to_pvh() with page_to_pvh() in pmap_remove_l2() Revise pmap_remove_l2() to use the constant-time function page_to_pvh() instead of the linear-time function pa_to_pvh(). Reviewed by:kib, markj MFC after: 1 week Differential Revision: https://reviews.freebsd.org/D30876 --- sys/arm64/arm64/pmap.c | 18 -- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c index 7def96bca70b..bc3d4fd6446b 100644 --- a/sys/arm64/arm64/pmap.c +++ b/sys/arm64/arm64/pmap.c @@ -2834,8 +2834,7 @@ pmap_remove_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t sva, { struct md_page *pvh; pt_entry_t old_l2; - vm_offset_t eva, va; - vm_page_t m, ml3; + vm_page_t m, ml3, mt; PMAP_LOCK_ASSERT(pmap, MA_OWNED); KASSERT((sva & L2_OFFSET) == 0, ("pmap_remove_l2: sva is not aligned")); @@ -2853,19 +2852,18 @@ pmap_remove_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t sva, pmap->pm_stats.wired_count -= L2_SIZE / PAGE_SIZE; pmap_resident_count_dec(pmap, L2_SIZE / PAGE_SIZE); if (old_l2 & ATTR_SW_MANAGED) { + m = PHYS_TO_VM_PAGE(old_l2 & ~ATTR_MASK); + pvh = page_to_pvh(m); CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, old_l2 & ~ATTR_MASK); - pvh = pa_to_pvh(old_l2 & ~ATTR_MASK); pmap_pvh_free(pvh, pmap, sva); - eva = sva + L2_SIZE; - for (va = sva, m = PHYS_TO_VM_PAGE(old_l2 & ~ATTR_MASK); - va < eva; va += PAGE_SIZE, m++) { + for (mt = m; mt < &m[L2_SIZE / PAGE_SIZE]; mt++) { if (pmap_pte_dirty(pmap, old_l2)) - vm_page_dirty(m); + vm_page_dirty(mt); if (old_l2 & ATTR_AF) - vm_page_aflag_set(m, PGA_REFERENCED); - if (TAILQ_EMPTY(&m->md.pv_list) && + vm_page_aflag_set(mt, PGA_REFERENCED); + if (TAILQ_EMPTY(&mt->md.pv_list) && TAILQ_EMPTY(&pvh->pv_list)) - vm_page_aflag_clear(m, PGA_WRITEABLE); + vm_page_aflag_clear(mt, PGA_WRITEABLE); } } if (pmap == kernel_pmap) { ___ dev-commits-src-main@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/dev-commits-src-main To unsubscribe, send any mail to "dev-commits-src-main-unsubscr...@freebsd.org"
git: c94249decd16 - main - arm64: make it possible to define PV_STATS
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=c94249decd16de71a00d837ee132954d9f259e49 commit c94249decd16de71a00d837ee132954d9f259e49 Author: Alan Cox AuthorDate: 2021-06-24 23:09:23 + Commit: Alan Cox CommitDate: 2021-06-24 23:32:56 + arm64: make it possible to define PV_STATS Remove an #if 0 that results in a compilation error if PV_STATS is defined. Aside from this #if 0, there is nothing wrong with the PV_STATS code. MFC after: 1 week --- sys/arm64/arm64/pmap.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c index bc3d4fd6446b..a6f716370810 100644 --- a/sys/arm64/arm64/pmap.c +++ b/sys/arm64/arm64/pmap.c @@ -2226,7 +2226,6 @@ pv_to_chunk(pv_entry_t pv) static const uint64_t pc_freemask[_NPCM] = { PC_FREE0, PC_FREE1, PC_FREE2 }; -#if 0 #ifdef PV_STATS static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail; @@ -2251,7 +2250,6 @@ SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0, SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0, "Current number of spare pv entries"); #endif -#endif /* 0 */ /* * We are in a serious low memory condition. Resort to ___ dev-commits-src-main@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/dev-commits-src-main To unsubscribe, send any mail to "dev-commits-src-main-unsubscr...@freebsd.org"
git: 5dd84e315a9f - main - arm64: fix a potential KVA leak in pmap_demote_l1()
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=5dd84e315a9f72017f9f628aa67f08a6493a commit 5dd84e315a9f72017f9f628aa67f08a6493a Author: Alan Cox AuthorDate: 2021-06-26 03:29:38 + Commit: Alan Cox CommitDate: 2021-06-26 04:01:32 + arm64: fix a potential KVA leak in pmap_demote_l1() In the unlikely event that the 1 GB page mapping being demoted is used to access the L1 page table page containing the 1 GB page mapping and the vm_page_alloc() to allocate a new L2 page table page fails, we would leak a page of kernel virtual address space. Fix this leak. MFC after: 1 week --- sys/arm64/arm64/pmap.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c index a6f716370810..76ca8eab70ff 100644 --- a/sys/arm64/arm64/pmap.c +++ b/sys/arm64/arm64/pmap.c @@ -6010,7 +6010,8 @@ pmap_demote_l1(pmap_t pmap, pt_entry_t *l1, vm_offset_t va) VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) { CTR2(KTR_PMAP, "pmap_demote_l1: failure for va %#lx" " in pmap %p", va, pmap); - return (NULL); + l2 = NULL; + goto fail; } l2phys = VM_PAGE_TO_PHYS(ml2); @@ -6039,6 +6040,7 @@ pmap_demote_l1(pmap_t pmap, pt_entry_t *l1, vm_offset_t va) pmap_update_entry(pmap, l1, l2phys | L1_TABLE, va, PAGE_SIZE); +fail: if (tmpl1 != 0) { pmap_kremove(tmpl1); kva_free(tmpl1, PAGE_SIZE); ___ dev-commits-src-main@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/dev-commits-src-main To unsubscribe, send any mail to "dev-commits-src-main-unsubscr...@freebsd.org"
git: 19c288b3a664 - main - arm64: eliminate a duplicated #define
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=19c288b3a6640742ab45200031661fe5be710d7f commit 19c288b3a6640742ab45200031661fe5be710d7f Author: Alan Cox AuthorDate: 2021-06-27 06:40:23 + Commit: Alan Cox CommitDate: 2021-06-27 06:44:58 + arm64: eliminate a duplicated #define --- sys/arm64/include/pte.h | 1 - 1 file changed, 1 deletion(-) diff --git a/sys/arm64/include/pte.h b/sys/arm64/include/pte.h index 16a72be65fd0..5d4412d2c141 100644 --- a/sys/arm64/include/pte.h +++ b/sys/arm64/include/pte.h @@ -131,7 +131,6 @@ typedef uint64_tpt_entry_t; /* page table entry */ #defineL3_SHIFT12 #defineL3_SIZE (1 << L3_SHIFT) #defineL3_OFFSET (L3_SIZE - 1) -#defineL3_SHIFT12 #defineL3_INVAL0x0 /* 0x1 is reserved */ /* 0x2 also marks an invalid address */ ___ dev-commits-src-main@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/dev-commits-src-main To unsubscribe, send any mail to "dev-commits-src-main-unsubscr...@freebsd.org"
git: 26a357245f21 - main - arm64: a few simplications to pmap_remove_{all, write}
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=26a357245f2197eea4dbbae0956d5c71ef8ba4f1 commit 26a357245f2197eea4dbbae0956d5c71ef8ba4f1 Author: Alan Cox AuthorDate: 2021-06-29 02:57:04 + Commit: Alan Cox CommitDate: 2021-06-29 03:21:24 + arm64: a few simplications to pmap_remove_{all,write} Eliminate some unnecessary unlocking and relocking when we have to retry the operation to avoid deadlock. (All of the other pmap functions that iterate over a PV list already implemented retries without these same unlocking and relocking operations.) Avoid a pointer dereference by using an existing local variable that already holds the desired value. Eliminate some unnecessary repetition of code on a failed fcmpset. Specifically, there is no point in retesting the DBM bit because it cannot change state while the pmap lock is held. Reviewed by:kib, markj MFC after: 1 week Differential Revision: https://reviews.freebsd.org/D30931 --- sys/arm64/arm64/pmap.c | 20 +++- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c index 76ca8eab70ff..79b9d20231aa 100644 --- a/sys/arm64/arm64/pmap.c +++ b/sys/arm64/arm64/pmap.c @@ -3130,8 +3130,8 @@ pmap_remove_all(vm_page_t m) SLIST_INIT(&free); lock = VM_PAGE_TO_PV_LIST_LOCK(m); pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : page_to_pvh(m); -retry: rw_wlock(lock); +retry: while ((pv = TAILQ_FIRST(&pvh->pv_list)) != NULL) { pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { @@ -3140,7 +3140,6 @@ retry: PMAP_LOCK(pmap); rw_wlock(lock); if (pvh_gen != pvh->pv_gen) { - rw_wunlock(lock); PMAP_UNLOCK(pmap); goto retry; } @@ -3151,7 +3150,6 @@ retry: ("pmap_remove_all: no page table entry found")); KASSERT(lvl == 2, ("pmap_remove_all: invalid pte level %d", lvl)); - pmap_demote_l2_locked(pmap, pte, va, &lock); PMAP_UNLOCK(pmap); } @@ -3165,7 +3163,6 @@ retry: PMAP_LOCK(pmap); rw_wlock(lock); if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) { - rw_wunlock(lock); PMAP_UNLOCK(pmap); goto retry; } @@ -5224,8 +5221,8 @@ pmap_remove_write(vm_page_t m) return; lock = VM_PAGE_TO_PV_LIST_LOCK(m); pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : page_to_pvh(m); -retry_pv_loop: rw_wlock(lock); +retry: TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) { pmap = PV_PMAP(pv); PMAP_ASSERT_STAGE1(pmap); @@ -5236,12 +5233,11 @@ retry_pv_loop: rw_wlock(lock); if (pvh_gen != pvh->pv_gen) { PMAP_UNLOCK(pmap); - rw_wunlock(lock); - goto retry_pv_loop; + goto retry; } } va = pv->pv_va; - pte = pmap_pte(pmap, pv->pv_va, &lvl); + pte = pmap_pte(pmap, va, &lvl); if ((pmap_load(pte) & ATTR_SW_DBM) != 0) (void)pmap_demote_l2_locked(pmap, pte, va, &lock); KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m), @@ -5261,17 +5257,15 @@ retry_pv_loop: if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) { PMAP_UNLOCK(pmap); - rw_wunlock(lock); - goto retry_pv_loop; + goto retry; } } pte = pmap_pte(pmap, pv->pv_va, &lvl); oldpte = pmap_load(pte); -retry: if ((oldpte & ATTR_SW_DBM) != 0) { - if (!atomic_fcmpset_long(pte, &oldpte, + while (!atomic_fcmpset_64(pte, &oldpte, (oldpte | ATTR_S1_AP_RW_BIT) & ~ATTR_SW_DBM)) - goto retry; + cpu_spinwait(); if ((oldpte & ATTR_S1_AP_RW_BIT) == ATTR_S1_AP(ATTR_S1_AP_RW)) vm_page_dirty(m); ___ dev-commits
git: 1a8bcf30f97e - main - amd64: a simplication to pmap_remove_{all, write}
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=1a8bcf30f97e6153def2af781db2fe54f5c0d106 commit 1a8bcf30f97e6153def2af781db2fe54f5c0d106 Author: Alan Cox AuthorDate: 2021-06-30 05:59:21 + Commit: Alan Cox CommitDate: 2021-06-30 18:12:25 + amd64: a simplication to pmap_remove_{all,write} Eliminate some unnecessary unlocking and relocking when we have to retry the operation to avoid deadlock. (All of the other pmap functions that iterate over a PV list already implemented retries without these same unlocking and relocking operations.) Reviewed by:kib, markj MFC after: 1 week Differential Revision: https://reviews.freebsd.org/D30951 --- sys/amd64/amd64/pmap.c | 8 ++-- 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index a4046cc1f687..ea017b8a61a8 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -6307,8 +6307,8 @@ pmap_remove_all(vm_page_t m) lock = VM_PAGE_TO_PV_LIST_LOCK(m); pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : pa_to_pvh(VM_PAGE_TO_PHYS(m)); -retry: rw_wlock(lock); +retry: while ((pv = TAILQ_FIRST(&pvh->pv_list)) != NULL) { pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { @@ -6317,7 +6317,6 @@ retry: PMAP_LOCK(pmap); rw_wlock(lock); if (pvh_gen != pvh->pv_gen) { - rw_wunlock(lock); PMAP_UNLOCK(pmap); goto retry; } @@ -6336,7 +6335,6 @@ retry: PMAP_LOCK(pmap); rw_wlock(lock); if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) { - rw_wunlock(lock); PMAP_UNLOCK(pmap); goto retry; } @@ -8460,8 +8458,8 @@ pmap_remove_write(vm_page_t m) lock = VM_PAGE_TO_PV_LIST_LOCK(m); pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : pa_to_pvh(VM_PAGE_TO_PHYS(m)); -retry_pv_loop: rw_wlock(lock); +retry_pv_loop: TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) { pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { @@ -8471,7 +8469,6 @@ retry_pv_loop: rw_wlock(lock); if (pvh_gen != pvh->pv_gen) { PMAP_UNLOCK(pmap); - rw_wunlock(lock); goto retry_pv_loop; } } @@ -8496,7 +8493,6 @@ retry_pv_loop: if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) { PMAP_UNLOCK(pmap); - rw_wunlock(lock); goto retry_pv_loop; } } ___ dev-commits-src-main@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/dev-commits-src-main To unsubscribe, send any mail to "dev-commits-src-main-unsubscr...@freebsd.org"
git: e41fde3ed71c - main - On a failed fcmpset don't pointlessly repeat tests
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=e41fde3ed71c1e4fce81eac002c9f5b0926e6c49 commit e41fde3ed71c1e4fce81eac002c9f5b0926e6c49 Author: Alan Cox AuthorDate: 2021-07-04 05:20:42 + Commit: Alan Cox CommitDate: 2021-07-06 02:07:40 + On a failed fcmpset don't pointlessly repeat tests In a few places, on a failed compare-and-set, both the amd64 pmap and the arm64 pmap repeat tests on bits that won't change state while the pmap is locked. Eliminate some of these unnecessary tests. Reviewed by:andrew, kib, markj MFC after: 1 week Differential Revision: https://reviews.freebsd.org/D31014 --- sys/amd64/amd64/pmap.c | 11 +-- sys/arm64/arm64/pmap.c | 15 --- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index ea017b8a61a8..5e0b6d76ae0a 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -8459,7 +8459,7 @@ pmap_remove_write(vm_page_t m) pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : pa_to_pvh(VM_PAGE_TO_PHYS(m)); rw_wlock(lock); -retry_pv_loop: +retry: TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) { pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { @@ -8469,7 +8469,7 @@ retry_pv_loop: rw_wlock(lock); if (pvh_gen != pvh->pv_gen) { PMAP_UNLOCK(pmap); - goto retry_pv_loop; + goto retry; } } PG_RW = pmap_rw_bit(pmap); @@ -8493,7 +8493,7 @@ retry_pv_loop: if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) { PMAP_UNLOCK(pmap); - goto retry_pv_loop; + goto retry; } } PG_M = pmap_modified_bit(pmap); @@ -8503,12 +8503,11 @@ retry_pv_loop: ("pmap_remove_write: found a 2mpage in page %p's pv list", m)); pte = pmap_pde_to_pte(pde, pv->pv_va); -retry: oldpte = *pte; if (oldpte & PG_RW) { - if (!atomic_cmpset_long(pte, oldpte, oldpte & + while (!atomic_fcmpset_long(pte, &oldpte, oldpte & ~(PG_RW | PG_M))) - goto retry; + cpu_spinwait(); if ((oldpte & PG_M) != 0) vm_page_dirty(m); pmap_invalidate_page(pmap, pv->pv_va); diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c index 79b9d20231aa..bf476490b6be 100644 --- a/sys/arm64/arm64/pmap.c +++ b/sys/arm64/arm64/pmap.c @@ -3223,10 +3223,12 @@ pmap_protect_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t sva, pt_entry_t mask, * Return if the L2 entry already has the desired access restrictions * in place. */ -retry: if ((old_l2 & mask) == nbits) return; + while (!atomic_fcmpset_64(l2, &old_l2, (old_l2 & ~mask) | nbits)) + cpu_spinwait(); + /* * When a dirty read/write superpage mapping is write protected, * update the dirty field of each of the superpage's constituent 4KB @@ -3240,9 +3242,6 @@ retry: vm_page_dirty(mt); } - if (!atomic_fcmpset_64(l2, &old_l2, (old_l2 & ~mask) | nbits)) - goto retry; - /* * Since a promotion must break the 4KB page mappings before making * the 2MB page mapping, a pmap_invalidate_page() suffices. @@ -3334,7 +,7 @@ pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) for (l3p = pmap_l2_to_l3(l2, sva); sva != va_next; l3p++, sva += L3_SIZE) { l3 = pmap_load(l3p); -retry: + /* * Go to the next L3 entry if the current one is * invalid or already has the desired access @@ -3351,6 +3350,10 @@ retry: continue; } + while (!atomic_fcmpset_64(l3p, &l3, (l3 & ~mask) | + nbits)) + cpu_spinwait(); + /* * When a dirty read/write mapping is write protected, * update the page's dirty field. @@ -3360,8 +3363,6 @@ retry: pmap_pte_dirty(pmap, l3)) vm_page_dirt
git: 0add3c9945c8 - main - arm64: Simplify fcmpset failure in pmap_promote_l2()
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=0add3c9945c85c7f766f9225866e99e2a805819b commit 0add3c9945c85c7f766f9225866e99e2a805819b Author: Alan Cox AuthorDate: 2021-07-07 18:16:03 + Commit: Alan Cox CommitDate: 2021-07-07 18:34:11 + arm64: Simplify fcmpset failure in pmap_promote_l2() When the initial fcmpset in pmap_promote_l2() fails, there is no need to repeat the check for the physical address being 2MB aligned or for the accessed bit being set. While the pmap is locked the hardware can only transition the accessed bit from 0 to 1, and we have already determined that it is 1 when the fcmpset fails. MFC after: 1 week --- sys/arm64/arm64/pmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c index bf476490b6be..7758a84d81d5 100644 --- a/sys/arm64/arm64/pmap.c +++ b/sys/arm64/arm64/pmap.c @@ -3502,7 +3502,6 @@ pmap_promote_l2(pmap_t pmap, pd_entry_t *l2, vm_offset_t va, firstl3 = pmap_l2_to_l3(l2, sva); newl2 = pmap_load(firstl3); -setl2: if (((newl2 & (~ATTR_MASK | ATTR_AF)) & L2_OFFSET) != ATTR_AF) { atomic_add_long(&pmap_l2_p_failures, 1); CTR2(KTR_PMAP, "pmap_promote_l2: failure for va %#lx" @@ -3510,6 +3509,7 @@ setl2: return; } +setl2: if ((newl2 & (ATTR_S1_AP_RW_BIT | ATTR_SW_DBM)) == (ATTR_S1_AP(ATTR_S1_AP_RO) | ATTR_SW_DBM)) { /* ___ dev-commits-src-main@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/dev-commits-src-main To unsubscribe, send any mail to "dev-commits-src-main-unsubscr...@freebsd.org"
git: d411b285bc29 - main - pmap: Micro-optimize pmap_remove_pages() on amd64 and arm64
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=d411b285bc293a37e062d8fb15b85212ce16abab commit d411b285bc293a37e062d8fb15b85212ce16abab Author: Alan Cox AuthorDate: 2021-07-12 23:25:37 + Commit: Alan Cox CommitDate: 2021-07-13 22:33:23 + pmap: Micro-optimize pmap_remove_pages() on amd64 and arm64 Reduce the live ranges for three variables so that they do not span the call to PHYS_TO_VM_PAGE(). This enables the compiler to generate slightly smaller machine code. Reviewed by:kib, markj MFC after: 1 week Differential Revision: https://reviews.freebsd.org/D31161 --- sys/amd64/amd64/pmap.c | 14 ++ sys/arm64/arm64/pmap.c | 19 ++- 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index f8bd17dc6238..31681e255af1 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -8201,6 +8201,16 @@ pmap_remove_pages(pmap_t pmap) continue; } + /* Mark free */ + pc->pc_map[field] |= bitmask; + + /* +* Because this pmap is not active on other +* processors, the dirty bit cannot have +* changed state since we last loaded pte. +*/ + pte_clear(pte); + if (superpage) pa = tpte & PG_PS_FRAME; else @@ -8217,8 +8227,6 @@ pmap_remove_pages(pmap_t pmap) ("pmap_remove_pages: bad tpte %#jx", (uintmax_t)tpte)); - pte_clear(pte); - /* * Update the vm_page_t clean/reference bits. */ @@ -8232,8 +8240,6 @@ pmap_remove_pages(pmap_t pmap) CHANGE_PV_LIST_LOCK_TO_VM_PAGE(&lock, m); - /* Mark free */ - pc->pc_map[field] |= bitmask; if (superpage) { pmap_resident_count_adj(pmap, -NBPDR / PAGE_SIZE); pvh = pa_to_pvh(tpte & PG_PS_FRAME); diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c index 7758a84d81d5..8ed1b86bd58c 100644 --- a/sys/arm64/arm64/pmap.c +++ b/sys/arm64/arm64/pmap.c @@ -4951,6 +4951,16 @@ pmap_remove_pages(pmap_t pmap) continue; } + /* Mark free */ + pc->pc_map[field] |= bitmask; + + /* +* Because this pmap is not active on other +* processors, the dirty bit cannot have +* changed state since we last loaded pte. +*/ + pmap_clear(pte); + pa = tpte & ~ATTR_MASK; m = PHYS_TO_VM_PAGE(pa); @@ -4964,13 +4974,6 @@ pmap_remove_pages(pmap_t pmap) ("pmap_remove_pages: bad pte %#jx", (uintmax_t)tpte)); - /* -* Because this pmap is not active on other -* processors, the dirty bit cannot have -* changed state since we last loaded pte. -*/ - pmap_clear(pte); - /* * Update the vm_page_t clean/reference bits. */ @@ -4988,8 +4991,6 @@ pmap_remove_pages(pmap_t pmap) CHANGE_PV_LIST_LOCK_TO_VM_PAGE(&lock, m); - /* Mark free */ - pc->pc_map[field] |= bitmask; switch (lvl) { case 1: pmap_resident_count_dec(pmap, ___ dev-commits-src-main@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/dev-commits-src-main To unsubscribe, send any mail to "dev-commits-src-main-unsubscr...@freebsd.org"
git: 325ff9327459 - main - Clear the accessed bit when copying a managed superpage mapping
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=325ff9327459bc7307130675fa19367ff8b02310 commit 325ff9327459bc7307130675fa19367ff8b02310 Author: Alan Cox AuthorDate: 2021-07-13 07:30:43 + Commit: Alan Cox CommitDate: 2021-07-14 18:06:10 + Clear the accessed bit when copying a managed superpage mapping pmap_copy() is used to speculatively create mappings, so those mappings should not have their access bit preset. Reviewed by:kib, markj MFC after: 1 week Differential Revision: https://reviews.freebsd.org/D31162 --- sys/amd64/amd64/pmap.c | 17 - sys/arm64/arm64/pmap.c | 18 -- 2 files changed, 32 insertions(+), 3 deletions(-) diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index 31681e255af1..427fbdf44830 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -7775,6 +7775,9 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, continue; if (srcptepaddr & PG_PS) { + /* +* We can only virtual copy whole superpages. +*/ if ((addr & PDRMASK) != 0 || addr + NBPDR > end_addr) continue; pde = pmap_alloc_pde(dst_pmap, addr, &dst_pdpg, NULL); @@ -7783,7 +7786,19 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, if (*pde == 0 && ((srcptepaddr & PG_MANAGED) == 0 || pmap_pv_insert_pde(dst_pmap, addr, srcptepaddr, PMAP_ENTER_NORECLAIM, &lock))) { - *pde = srcptepaddr & ~PG_W; + /* +* We leave the dirty bit unchanged because +* managed read/write superpage mappings are +* required to be dirty. However, managed +* superpage mappings are not required to +* have their accessed bit set, so we clear +* it because we don't know if this mapping +* will be used. +*/ + srcptepaddr &= ~PG_W; + if ((srcptepaddr & PG_MANAGED) != 0) + srcptepaddr &= ~PG_A; + *pde = srcptepaddr; pmap_resident_count_adj(dst_pmap, NBPDR / PAGE_SIZE); counter_u64_add(pmap_pde_mappings, 1); diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c index 8ed1b86bd58c..678feae55c25 100644 --- a/sys/arm64/arm64/pmap.c +++ b/sys/arm64/arm64/pmap.c @@ -4557,6 +4557,9 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, if (srcptepaddr == 0) continue; if ((srcptepaddr & ATTR_DESCR_MASK) == L2_BLOCK) { + /* +* We can only virtual copy whole superpages. +*/ if ((addr & L2_OFFSET) != 0 || addr + L2_SIZE > end_addr) continue; @@ -4567,8 +4570,19 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, ((srcptepaddr & ATTR_SW_MANAGED) == 0 || pmap_pv_insert_l2(dst_pmap, addr, srcptepaddr, PMAP_ENTER_NORECLAIM, &lock))) { - mask = ATTR_SW_WIRED; - pmap_store(l2, srcptepaddr & ~mask); + /* +* We leave the dirty bit unchanged because +* managed read/write superpage mappings are +* required to be dirty. However, managed +* superpage mappings are not required to +* have their accessed bit set, so we clear +* it because we don't know if this mapping +* will be used. +*/ + srcptepaddr &= ~ATTR_SW_WIRED; + if ((srcptepaddr & ATTR_SW_MANAGED) != 0) + srcptepaddr &= ~ATTR_AF; + pmap_store(l2, srcptepaddr); pmap_resident_count_inc(dst_pmap, L2_SIZE /
git: 7fb152d22935 - main - arm64: Sync icache when creating executable superpage mappings
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=7fb152d22935e014afcad4ddc0b3a7e3c2795762 commit 7fb152d22935e014afcad4ddc0b3a7e3c2795762 Author: Alan Cox AuthorDate: 2021-07-14 17:59:49 + Commit: Alan Cox CommitDate: 2021-07-15 22:34:54 + arm64: Sync icache when creating executable superpage mappings Reviewed by:andrew, kib, markj MFC after: 1 week Differential Revision: https://reviews.freebsd.org/D31181 --- sys/arm64/arm64/pmap.c | 14 +++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c index 190b56285e76..13941f4f61ea 100644 --- a/sys/arm64/arm64/pmap.c +++ b/sys/arm64/arm64/pmap.c @@ -4041,7 +4041,7 @@ pmap_enter_2mpage(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, if (pmap != kernel_pmap) new_l2 |= ATTR_S1_nG; return (pmap_enter_l2(pmap, va, new_l2, PMAP_ENTER_NOSLEEP | - PMAP_ENTER_NOREPLACE | PMAP_ENTER_NORECLAIM, NULL, lockp) == + PMAP_ENTER_NOREPLACE | PMAP_ENTER_NORECLAIM, m, lockp) == KERN_SUCCESS); } @@ -4071,8 +4071,6 @@ pmap_every_pte_zero(vm_paddr_t pa) * KERN_RESOURCE_SHORTAGE if PMAP_ENTER_NOSLEEP was specified and a page table * page allocation failed. Returns KERN_RESOURCE_SHORTAGE if * PMAP_ENTER_NORECLAIM was specified and a PV entry allocation failed. - * - * The parameter "m" is only used when creating a managed, writeable mapping. */ static int pmap_enter_l2(pmap_t pmap, vm_offset_t va, pd_entry_t new_l2, u_int flags, @@ -4159,6 +4157,16 @@ pmap_enter_l2(pmap_t pmap, vm_offset_t va, pd_entry_t new_l2, u_int flags, pmap->pm_stats.wired_count += L2_SIZE / PAGE_SIZE; pmap->pm_stats.resident_count += L2_SIZE / PAGE_SIZE; + /* +* Conditionally sync the icache. See pmap_enter() for details. +*/ + if ((new_l2 & ATTR_S1_XN) == 0 && ((new_l2 & ~ATTR_MASK) != + (old_l2 & ~ATTR_MASK) || (old_l2 & ATTR_S1_XN) != 0) && + pmap != kernel_pmap && m->md.pv_memattr == VM_MEMATTR_WRITE_BACK) { + cpu_icache_sync_range(PHYS_TO_DMAP(new_l2 & ~ATTR_MASK), + L2_SIZE); + } + /* * Map the superpage. */ ___ dev-commits-src-main@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/dev-commits-src-main To unsubscribe, send any mail to "dev-commits-src-main-unsubscr...@freebsd.org"
git: b7de53528836 - main - amd64: Eliminate a redundant test from pmap_enter_object()
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=b7de535288362b072cf2801007e4d7e0e903d467 commit b7de535288362b072cf2801007e4d7e0e903d467 Author: Alan Cox AuthorDate: 2021-07-24 03:50:10 + Commit: Alan Cox CommitDate: 2021-07-24 04:15:42 + amd64: Eliminate a redundant test from pmap_enter_object() The call to pmap_allow_2m_x_page() in pmap_enter_object() is redundant. Specifically, even without the call to pmap_allow_2m_x_page() in pmap_enter_object(), pmap_allow_2m_x_page() is eventually called by pmap_enter_pde(), so the outcome will be the same. Essentially, calling pmap_allow_2m_x_page() in pmap_enter_object() amounts to "optimizing" for the unexpected case. Reviewed by:kib MFC after: 1 week --- sys/amd64/amd64/pmap.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index 427fbdf44830..688412594e6c 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -7334,7 +7334,6 @@ pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, va = start + ptoa(diff); if ((va & PDRMASK) == 0 && va + NBPDR <= end && m->psind == 1 && pmap_ps_enabled(pmap) && - pmap_allow_2m_x_page(pmap, (prot & VM_PROT_EXECUTE) != 0) && pmap_enter_2mpage(pmap, va, m, prot, &lock)) m = &m[NBPDR / PAGE_SIZE - 1]; else ___ dev-commits-src-main@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/dev-commits-src-main To unsubscribe, send any mail to "dev-commits-src-main-unsubscr...@freebsd.org"
git: 3687797618b6 - main - amd64: Don't repeat unnecessary tests when cmpset fails
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=3687797618b6c978ad733bd206a623e5df47dbe3 commit 3687797618b6c978ad733bd206a623e5df47dbe3 Author: Alan Cox AuthorDate: 2021-07-24 08:50:27 + Commit: Alan Cox CommitDate: 2021-07-24 18:06:47 + amd64: Don't repeat unnecessary tests when cmpset fails When a cmpset for removing the PG_RW bit in pmap_promote_pde() fails, there is no need to repeat the alignment, PG_A, and PG_V tests just to reload the PTE's value. The only bit that we need be concerned with at this point is PG_M. Use fcmpset instead. MFC after: 1 week --- sys/amd64/amd64/pmap.c | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index aea4394ebcc0..47315c560831 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -6615,7 +6615,6 @@ pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, * within a 2MB page. */ firstpte = (pt_entry_t *)PHYS_TO_DMAP(*pde & PG_FRAME); -setpde: newpde = *firstpte; if ((newpde & ((PG_FRAME & PDRMASK) | PG_A | PG_V)) != (PG_A | PG_V) || !pmap_allow_2m_x_page(pmap, pmap_pde_ept_executable(pmap, @@ -6625,12 +6624,13 @@ setpde: " in pmap %p", va, pmap); return; } +setpde: if ((newpde & (PG_M | PG_RW)) == PG_RW) { /* * When PG_M is already clear, PG_RW can be cleared without * a TLB invalidation. */ - if (!atomic_cmpset_long(firstpte, newpde, newpde & ~PG_RW)) + if (!atomic_fcmpset_long(firstpte, &newpde, newpde & ~PG_RW)) goto setpde; newpde &= ~PG_RW; } @@ -6642,7 +6642,6 @@ setpde: */ pa = (newpde & (PG_PS_FRAME | PG_A | PG_V)) + NBPDR - PAGE_SIZE; for (pte = firstpte + NPTEPG - 1; pte > firstpte; pte--) { -setpte: oldpte = *pte; if ((oldpte & (PG_FRAME | PG_A | PG_V)) != pa) { counter_u64_add(pmap_pde_p_failures, 1); @@ -6650,12 +6649,13 @@ setpte: " in pmap %p", va, pmap); return; } +setpte: if ((oldpte & (PG_M | PG_RW)) == PG_RW) { /* * When PG_M is already clear, PG_RW can be cleared * without a TLB invalidation. */ - if (!atomic_cmpset_long(pte, oldpte, oldpte & ~PG_RW)) + if (!atomic_fcmpset_long(pte, &oldpte, oldpte & ~PG_RW)) goto setpte; oldpte &= ~PG_RW; CTR2(KTR_PMAP, "pmap_promote_pde: protect for va %#lx" ___ dev-commits-src-main@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/dev-commits-src-main To unsubscribe, send any mail to "dev-commits-src-main-unsubscr...@freebsd.org"
git: fc2e4f15a904 - main - iommu_gas: Eliminate unnecessary wrappers
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=fc2e4f15a9047bbf546cd675ed590b88e54362bd commit fc2e4f15a9047bbf546cd675ed590b88e54362bd Author: Alan Cox AuthorDate: 2022-06-14 19:01:36 + Commit: Alan Cox CommitDate: 2022-06-17 06:06:52 + iommu_gas: Eliminate unnecessary wrappers Eliminate trivial wrappers for several iommu_gas functions that serve no functional purpose. Reviewed by:br, dougm, kib MFC after: 3 weeks Differential Revision: https://reviews.freebsd.org/D35487 --- sys/dev/iommu/busdma_iommu.c | 8 sys/dev/iommu/iommu.h| 8 sys/dev/iommu/iommu_gas.c| 41 - 3 files changed, 4 insertions(+), 53 deletions(-) diff --git a/sys/dev/iommu/busdma_iommu.c b/sys/dev/iommu/busdma_iommu.c index e06d96dad027..ae8c98922e41 100644 --- a/sys/dev/iommu/busdma_iommu.c +++ b/sys/dev/iommu/busdma_iommu.c @@ -594,7 +594,7 @@ iommu_bus_dmamap_load_something1(struct bus_dma_tag_iommu *tag, if (seg + 1 < tag->common.nsegments) gas_flags |= IOMMU_MF_CANSPLIT; - error = iommu_map(domain, &tag->common, buflen1, + error = iommu_gas_map(domain, &tag->common, buflen1, offset, e_flags, gas_flags, ma + idx, &entry); if (error != 0) break; @@ -1046,7 +1046,7 @@ bus_dma_iommu_load_ident(bus_dma_tag_t dmat, bus_dmamap_t map1, map = (struct bus_dmamap_iommu *)map1; waitok = (flags & BUS_DMA_NOWAIT) != 0; - entry = iommu_map_alloc_entry(domain, waitok ? 0 : IOMMU_PGF_WAITOK); + entry = iommu_gas_alloc_entry(domain, waitok ? 0 : IOMMU_PGF_WAITOK); if (entry == NULL) return (ENOMEM); entry->start = start; @@ -1054,14 +1054,14 @@ bus_dma_iommu_load_ident(bus_dma_tag_t dmat, bus_dmamap_t map1, ma = malloc(sizeof(vm_page_t) * atop(length), M_TEMP, waitok ? M_WAITOK : M_NOWAIT); if (ma == NULL) { - iommu_map_free_entry(domain, entry); + iommu_gas_free_entry(domain, entry); return (ENOMEM); } for (i = 0; i < atop(length); i++) { ma[i] = vm_page_getfake(entry->start + PAGE_SIZE * i, VM_MEMATTR_DEFAULT); } - error = iommu_map_region(domain, entry, IOMMU_MAP_ENTRY_READ | + error = iommu_gas_map_region(domain, entry, IOMMU_MAP_ENTRY_READ | ((flags & BUS_DMA_NOWRITE) ? 0 : IOMMU_MAP_ENTRY_WRITE), waitok ? IOMMU_MF_CANWAIT : 0, ma); if (error == 0) { diff --git a/sys/dev/iommu/iommu.h b/sys/dev/iommu/iommu.h index ee1149e6ea8f..3800213a1d64 100644 --- a/sys/dev/iommu/iommu.h +++ b/sys/dev/iommu/iommu.h @@ -162,14 +162,6 @@ struct iommu_ctx *iommu_instantiate_ctx(struct iommu_unit *iommu, device_t iommu_get_requester(device_t dev, uint16_t *rid); int iommu_init_busdma(struct iommu_unit *unit); void iommu_fini_busdma(struct iommu_unit *unit); -struct iommu_map_entry *iommu_map_alloc_entry(struct iommu_domain *iodom, -u_int flags); -void iommu_map_free_entry(struct iommu_domain *, struct iommu_map_entry *); -int iommu_map(struct iommu_domain *iodom, -const struct bus_dma_tag_common *common, iommu_gaddr_t size, int offset, -u_int eflags, u_int flags, vm_page_t *ma, struct iommu_map_entry **res); -int iommu_map_region(struct iommu_domain *domain, -struct iommu_map_entry *entry, u_int eflags, u_int flags, vm_page_t *ma); void iommu_gas_init_domain(struct iommu_domain *domain); void iommu_gas_fini_domain(struct iommu_domain *domain); diff --git a/sys/dev/iommu/iommu_gas.c b/sys/dev/iommu/iommu_gas.c index 27954de9db39..a65bb23e87c5 100644 --- a/sys/dev/iommu/iommu_gas.c +++ b/sys/dev/iommu/iommu_gas.c @@ -799,36 +799,6 @@ iommu_gas_reserve_region_extend(struct iommu_domain *domain, return (error); } -struct iommu_map_entry * -iommu_map_alloc_entry(struct iommu_domain *domain, u_int flags) -{ - struct iommu_map_entry *res; - - res = iommu_gas_alloc_entry(domain, flags); - - return (res); -} - -void -iommu_map_free_entry(struct iommu_domain *domain, struct iommu_map_entry *entry) -{ - - iommu_gas_free_entry(domain, entry); -} - -int -iommu_map(struct iommu_domain *domain, -const struct bus_dma_tag_common *common, iommu_gaddr_t size, int offset, -u_int eflags, u_int flags, vm_page_t *ma, struct iommu_map_entry **res) -{ - int error; - - error = iommu_gas_map(domain, common, size, offset, eflags, flags, - ma, res); - - return (error); -} - void iommu_unmap_msi(struct iommu_ctx *ctx) { @@ -917,17 +887,6 @@ iommu_translate_msi(struct iommu_domain *domain, uint64_t *addr) __func__, (uintmax_t)*addr, (uintmax_t)domain->msi_entry->end)); } -int -iommu
git: e6775534aee1 - main - iommu_gas: Correct a broken KASSERT
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=e6775534aee1963a39e5ee762b8eab1d7dfb1b6b commit e6775534aee1963a39e5ee762b8eab1d7dfb1b6b Author: Alan Cox AuthorDate: 2022-06-17 17:03:06 + Commit: Alan Cox CommitDate: 2022-06-17 18:05:20 + iommu_gas: Correct a broken KASSERT If iommu_gas_find_space() ever called iommu_gas_uppermatch(), and it succeeded in allocating space, then the subsequent KASSERT would be triggered. Change that KASSERT to accept either success or ENOMEM. MFC after: 1 week --- sys/dev/iommu/iommu_gas.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/dev/iommu/iommu_gas.c b/sys/dev/iommu/iommu_gas.c index a65bb23e87c5..073b5626edf6 100644 --- a/sys/dev/iommu/iommu_gas.c +++ b/sys/dev/iommu/iommu_gas.c @@ -504,7 +504,7 @@ iommu_gas_find_space(struct iommu_domain *domain, if (common->highaddr >= domain->end) return (ENOMEM); error = iommu_gas_uppermatch(&a, RB_ROOT(&domain->rb_root)); - KASSERT(error == ENOMEM, + KASSERT(error == 0 || error == ENOMEM, ("error %d from iommu_gas_uppermatch", error)); return (error); }
git: 164491fb0387 - main - iommu_gas: Eliminate a stale comment
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=164491fb038724405b9e38355b569ef8dd4ad068 commit 164491fb038724405b9e38355b569ef8dd4ad068 Author: Alan Cox AuthorDate: 2022-06-20 17:14:53 + Commit: Alan Cox CommitDate: 2022-06-20 17:30:36 + iommu_gas: Eliminate a stale comment As of 19bb5a7244ff, the size passed to iommu_gas_map is no longer required to be a multiple of the CPU page size. MFC after: 2 weeks --- sys/dev/iommu/iommu_gas.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sys/dev/iommu/iommu_gas.c b/sys/dev/iommu/iommu_gas.c index 073b5626edf6..f25519552d1c 100644 --- a/sys/dev/iommu/iommu_gas.c +++ b/sys/dev/iommu/iommu_gas.c @@ -358,8 +358,7 @@ iommu_gas_match_insert(struct iommu_gas_match_args *a) /* * The prev->end is always aligned on the page size, which -* causes page alignment for the entry->start too. The size -* is checked to be multiple of the page size. +* causes page alignment for the entry->start too. * * The page sized gap is created between consequent * allocations to ensure that out-of-bounds accesses fault.
git: 32e82bcc1527 - main - busdma_iommu: Eliminate a redundant trunc_page()
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=32e82bcc152783dfe0d03ffdd864cedfddbc80d7 commit 32e82bcc152783dfe0d03ffdd864cedfddbc80d7 Author: Alan Cox AuthorDate: 2022-06-20 18:40:42 + Commit: Alan Cox CommitDate: 2022-06-21 04:03:04 + busdma_iommu: Eliminate a redundant trunc_page() Since OFF_TO_IDX() inherently truncates the given value, there is no need to perform trunc_page() on it. MFC after: 2 weeks --- sys/dev/iommu/busdma_iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/dev/iommu/busdma_iommu.c b/sys/dev/iommu/busdma_iommu.c index ae8c98922e41..42fc0b6c5451 100644 --- a/sys/dev/iommu/busdma_iommu.c +++ b/sys/dev/iommu/busdma_iommu.c @@ -634,7 +634,7 @@ iommu_bus_dmamap_load_something1(struct bus_dma_tag_iommu *tag, segs[seg].ds_addr = entry->start + offset; segs[seg].ds_len = buflen1; - idx += OFF_TO_IDX(trunc_page(offset + buflen1)); + idx += OFF_TO_IDX(offset + buflen1); offset += buflen1; offset &= IOMMU_PAGE_MASK; buflen -= buflen1;
git: 0ba1d8608234 - main - iommu_gas: Add a missing error-case unlock
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=0ba1d8608234eee767b475627da6e5903ce7536a commit 0ba1d8608234eee767b475627da6e5903ce7536a Author: Alan Cox AuthorDate: 2022-06-21 04:48:31 + Commit: Alan Cox CommitDate: 2022-06-21 04:48:31 + iommu_gas: Add a missing error-case unlock Release the domain lock when iommu_gas_reserve_region_extend()'s call to iommu_gas_reserve_region_locked() fails. MFC after: 2 weeks --- sys/dev/iommu/iommu_gas.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sys/dev/iommu/iommu_gas.c b/sys/dev/iommu/iommu_gas.c index 70eef9a0a1f7..a9c4caa30dd8 100644 --- a/sys/dev/iommu/iommu_gas.c +++ b/sys/dev/iommu/iommu_gas.c @@ -781,8 +781,10 @@ iommu_gas_reserve_region_extend(struct iommu_domain *domain, if (entry_start != entry_end) { error = iommu_gas_reserve_region_locked(domain, entry_start, entry_end, entry); - if (error != 0) + if (error != 0) { + IOMMU_DOMAIN_UNLOCK(domain); break; + } entry = NULL; } IOMMU_DOMAIN_UNLOCK(domain);
git: eeb46578c21a - main - busdma_iommu: Fine-grained locking for the dmamap's map list
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=eeb46578c21ad37866f49f3bbb3ac738b44abbf6 commit eeb46578c21ad37866f49f3bbb3ac738b44abbf6 Author: Alan Cox AuthorDate: 2022-06-22 21:51:47 + Commit: Alan Cox CommitDate: 2022-06-25 05:59:23 + busdma_iommu: Fine-grained locking for the dmamap's map list Introduce fine-grained locking on the dmamap's list of map entries, replacing the use of the domain lock. This is not the most significant source of lock contention, but it is the easiest to address. Reviewed by:kib MFC after: 2 weeks Differential Revision: https://reviews.freebsd.org/D35557 --- sys/dev/iommu/busdma_iommu.c | 56 +--- sys/dev/iommu/busdma_iommu.h | 7 ++ 2 files changed, 34 insertions(+), 29 deletions(-) diff --git a/sys/dev/iommu/busdma_iommu.c b/sys/dev/iommu/busdma_iommu.c index 42fc0b6c5451..69cf9dd12e7e 100644 --- a/sys/dev/iommu/busdma_iommu.c +++ b/sys/dev/iommu/busdma_iommu.c @@ -458,6 +458,7 @@ iommu_bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp) return (ENOMEM); } } + IOMMU_DMAMAP_INIT(map); TAILQ_INIT(&map->map_entries); map->tag = tag; map->locked = true; @@ -473,18 +474,16 @@ iommu_bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map1) { struct bus_dma_tag_iommu *tag; struct bus_dmamap_iommu *map; - struct iommu_domain *domain; tag = (struct bus_dma_tag_iommu *)dmat; map = (struct bus_dmamap_iommu *)map1; if (map != NULL) { - domain = tag->ctx->domain; - IOMMU_DOMAIN_LOCK(domain); + IOMMU_DMAMAP_LOCK(map); if (!TAILQ_EMPTY(&map->map_entries)) { - IOMMU_DOMAIN_UNLOCK(domain); + IOMMU_DMAMAP_UNLOCK(map); return (EBUSY); } - IOMMU_DOMAIN_UNLOCK(domain); + IOMMU_DMAMAP_DESTROY(map); free(map, M_IOMMU_DMAMAP); } tag->map_count--; @@ -625,10 +624,11 @@ iommu_bus_dmamap_load_something1(struct bus_dma_tag_iommu *tag, (uintmax_t)entry->start, (uintmax_t)entry->end, (uintmax_t)buflen1, (uintmax_t)tag->common.maxsegsz)); - IOMMU_DOMAIN_LOCK(domain); + KASSERT((entry->flags & IOMMU_MAP_ENTRY_MAP) != 0, + ("entry %p missing IOMMU_MAP_ENTRY_MAP", entry)); + IOMMU_DMAMAP_LOCK(map); TAILQ_INSERT_TAIL(&map->map_entries, entry, dmamap_link); - entry->flags |= IOMMU_MAP_ENTRY_MAP; - IOMMU_DOMAIN_UNLOCK(domain); + IOMMU_DMAMAP_UNLOCK(map); TAILQ_INSERT_TAIL(unroll_list, entry, unroll_link); segs[seg].ds_addr = entry->start + offset; @@ -651,8 +651,8 @@ iommu_bus_dmamap_load_something(struct bus_dma_tag_iommu *tag, { struct iommu_ctx *ctx; struct iommu_domain *domain; - struct iommu_map_entry *entry, *entry1; - struct iommu_map_entries_tailq unroll_list; + struct iommu_map_entry *entry; + struct iommu_map_entries_tailq entries, unroll_list; int error; ctx = tag->ctx; @@ -662,15 +662,15 @@ iommu_bus_dmamap_load_something(struct bus_dma_tag_iommu *tag, TAILQ_INIT(&unroll_list); error = iommu_bus_dmamap_load_something1(tag, map, ma, offset, buflen, flags, segs, segp, &unroll_list); - if (error != 0) { + if (error != 0 && !TAILQ_EMPTY(&unroll_list)) { /* * The busdma interface does not allow us to report * partial buffer load, so unfortunately we have to * revert all work done. */ - IOMMU_DOMAIN_LOCK(domain); - TAILQ_FOREACH_SAFE(entry, &unroll_list, unroll_link, - entry1) { + TAILQ_INIT(&entries); + IOMMU_DMAMAP_LOCK(map); + TAILQ_FOREACH(entry, &unroll_list, unroll_link) { /* * No entries other than what we have created * during the failed run might have been @@ -678,10 +678,11 @@ iommu_bus_dmamap_load_something(struct bus_dma_tag_iommu *tag, * pglock. */ TAILQ_REMOVE(&map->map_entries, entry, dmamap_link); - TAILQ_REMOVE(&unroll_list, entry, unroll_link); - TAILQ_INSERT_TAIL(&domain->unload_entries, entry, - dmamap_link); + TAILQ_INSERT_TAIL(&entries, entry, dmamap_lin
git: da33f6d76b9d - main - iommu_gas: Tidy up
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=da33f6d76b9d0c00649c692f6cb2b3b33bf5af30 commit da33f6d76b9d0c00649c692f6cb2b3b33bf5af30 Author: Alan Cox AuthorDate: 2022-06-26 05:44:47 + Commit: Alan Cox CommitDate: 2022-06-26 06:01:49 + iommu_gas: Tidy up Move a comment to the code that it describes. Improve the wording. Style fixes. MFC after: 2 weeks --- sys/dev/iommu/iommu_gas.c | 24 +++- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/sys/dev/iommu/iommu_gas.c b/sys/dev/iommu/iommu_gas.c index a9c4caa30dd8..2647c2ce6612 100644 --- a/sys/dev/iommu/iommu_gas.c +++ b/sys/dev/iommu/iommu_gas.c @@ -197,8 +197,7 @@ iommu_gas_rb_insert(struct iommu_domain *domain, struct iommu_map_entry *entry) { struct iommu_map_entry *found; - found = RB_INSERT(iommu_gas_entries_tree, - &domain->rb_root, entry); + found = RB_INSERT(iommu_gas_entries_tree, &domain->rb_root, entry); return (found == NULL); } @@ -303,6 +302,13 @@ iommu_gas_match_one(struct iommu_gas_match_args *a, iommu_gaddr_t beg, { iommu_gaddr_t bs, start; + /* +* The prev->end is always aligned on the page size, which +* causes page alignment for the entry->start too. +* +* A page sized gap is created between consecutive +* allocations to ensure that out-of-bounds accesses fault. +*/ a->entry->start = roundup2(beg + IOMMU_PAGE_SIZE, a->common->alignment); if (a->entry->start + a->offset + a->size > maxaddr) @@ -356,13 +362,6 @@ iommu_gas_match_insert(struct iommu_gas_match_args *a) { bool found __diagused; - /* -* The prev->end is always aligned on the page size, which -* causes page alignment for the entry->start too. -* -* The page sized gap is created between consequent -* allocations to ensure that out-of-bounds accesses fault. -*/ a->entry->end = a->entry->start + roundup2(a->size + a->offset, IOMMU_PAGE_SIZE); @@ -487,8 +486,7 @@ iommu_gas_find_space(struct iommu_domain *domain, /* Handle lower region. */ if (common->lowaddr > 0) { - error = iommu_gas_lowermatch(&a, - RB_ROOT(&domain->rb_root)); + error = iommu_gas_lowermatch(&a, RB_ROOT(&domain->rb_root)); if (error == 0) return (0); KASSERT(error == ENOMEM, @@ -636,7 +634,7 @@ iommu_gas_map(struct iommu_domain *domain, ("invalid flags 0x%x", flags)); entry = iommu_gas_alloc_entry(domain, - (flags & IOMMU_MF_CANWAIT) != 0 ? IOMMU_PGF_WAITOK : 0); + (flags & IOMMU_MF_CANWAIT) != 0 ? IOMMU_PGF_WAITOK : 0); if (entry == NULL) return (ENOMEM); IOMMU_DOMAIN_LOCK(domain); @@ -660,7 +658,7 @@ iommu_gas_map(struct iommu_domain *domain, error = domain->ops->map(domain, entry->start, entry->end - entry->start, ma, eflags, - ((flags & IOMMU_MF_CANWAIT) != 0 ? IOMMU_PGF_WAITOK : 0)); + ((flags & IOMMU_MF_CANWAIT) != 0 ? IOMMU_PGF_WAITOK : 0)); if (error == ENOMEM) { iommu_domain_unload_entry(entry, true); return (error);
git: 7b39a9bc1df3 - main - iommu_gas: Fix a recent regression with IOMMU_MF_CANSPLIT
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=7b39a9bc1df37502e8186593f3427b7ff0e4cc71 commit 7b39a9bc1df37502e8186593f3427b7ff0e4cc71 Author: Alan Cox AuthorDate: 2022-06-26 16:48:12 + Commit: Alan Cox CommitDate: 2022-06-26 21:31:54 + iommu_gas: Fix a recent regression with IOMMU_MF_CANSPLIT As of 19bb5a7244ff, the IOMMU_MF_CANSPLIT case in iommu_gas_match_one() must take into account the specified offset. Otherwise, the recently changed end calculation in iommu_gas_match_insert() could produce an end address that crosses the specified boundary by one page. Reviewed by:dougm MFC with: 19bb5a7244ff --- sys/dev/iommu/iommu_gas.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/dev/iommu/iommu_gas.c b/sys/dev/iommu/iommu_gas.c index 2647c2ce6612..bb6cde2721a6 100644 --- a/sys/dev/iommu/iommu_gas.c +++ b/sys/dev/iommu/iommu_gas.c @@ -350,7 +350,7 @@ iommu_gas_match_one(struct iommu_gas_match_args *a, iommu_gaddr_t beg, * the next entry, then we do not have gap. Ignore for now. */ if ((a->gas_flags & IOMMU_MF_CANSPLIT) != 0) { - a->size = bs - a->entry->start; + a->size = bs - a->entry->start - a->offset; return (true); }
git: db0110a536bf - main - iommu: Shrink the iommu map entry structure
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=db0110a536bf70c1ff55f3b3f46a0b5a9af46058 commit db0110a536bf70c1ff55f3b3f46a0b5a9af46058 Author: Alan Cox AuthorDate: 2022-07-11 03:52:52 + Commit: Alan Cox CommitDate: 2022-07-16 03:24:52 + iommu: Shrink the iommu map entry structure Eliminate the unroll_entry field from struct iommu_map_entry, shrinking the struct by 16 bytes on 64-bit architectures. Reviewed by:kib MFC after: 2 weeks Differential Revision: https://reviews.freebsd.org/D35769 --- sys/dev/iommu/busdma_iommu.c | 33 ++--- sys/dev/iommu/iommu.h| 2 -- sys/x86/iommu/intel_ctx.c| 4 ++-- sys/x86/iommu/intel_drv.c| 2 +- 4 files changed, 13 insertions(+), 28 deletions(-) diff --git a/sys/dev/iommu/busdma_iommu.c b/sys/dev/iommu/busdma_iommu.c index 69cf9dd12e7e..10e7476b35eb 100644 --- a/sys/dev/iommu/busdma_iommu.c +++ b/sys/dev/iommu/busdma_iommu.c @@ -558,7 +558,7 @@ static int iommu_bus_dmamap_load_something1(struct bus_dma_tag_iommu *tag, struct bus_dmamap_iommu *map, vm_page_t *ma, int offset, bus_size_t buflen, int flags, bus_dma_segment_t *segs, int *segp, -struct iommu_map_entries_tailq *unroll_list) +struct iommu_map_entries_tailq *entries) { struct iommu_ctx *ctx; struct iommu_domain *domain; @@ -626,10 +626,7 @@ iommu_bus_dmamap_load_something1(struct bus_dma_tag_iommu *tag, KASSERT((entry->flags & IOMMU_MAP_ENTRY_MAP) != 0, ("entry %p missing IOMMU_MAP_ENTRY_MAP", entry)); - IOMMU_DMAMAP_LOCK(map); - TAILQ_INSERT_TAIL(&map->map_entries, entry, dmamap_link); - IOMMU_DMAMAP_UNLOCK(map); - TAILQ_INSERT_TAIL(unroll_list, entry, unroll_link); + TAILQ_INSERT_TAIL(entries, entry, dmamap_link); segs[seg].ds_addr = entry->start + offset; segs[seg].ds_len = buflen1; @@ -651,36 +648,26 @@ iommu_bus_dmamap_load_something(struct bus_dma_tag_iommu *tag, { struct iommu_ctx *ctx; struct iommu_domain *domain; - struct iommu_map_entry *entry; - struct iommu_map_entries_tailq entries, unroll_list; + struct iommu_map_entries_tailq entries; int error; ctx = tag->ctx; domain = ctx->domain; atomic_add_long(&ctx->loads, 1); - TAILQ_INIT(&unroll_list); + TAILQ_INIT(&entries); error = iommu_bus_dmamap_load_something1(tag, map, ma, offset, - buflen, flags, segs, segp, &unroll_list); - if (error != 0 && !TAILQ_EMPTY(&unroll_list)) { + buflen, flags, segs, segp, &entries); + if (error == 0) { + IOMMU_DMAMAP_LOCK(map); + TAILQ_CONCAT(&map->map_entries, &entries, dmamap_link); + IOMMU_DMAMAP_UNLOCK(map); + } else if (!TAILQ_EMPTY(&entries)) { /* * The busdma interface does not allow us to report * partial buffer load, so unfortunately we have to * revert all work done. */ - TAILQ_INIT(&entries); - IOMMU_DMAMAP_LOCK(map); - TAILQ_FOREACH(entry, &unroll_list, unroll_link) { - /* -* No entries other than what we have created -* during the failed run might have been -* inserted there in between, since we own ctx -* pglock. -*/ - TAILQ_REMOVE(&map->map_entries, entry, dmamap_link); - TAILQ_INSERT_TAIL(&entries, entry, dmamap_link); - } - IOMMU_DMAMAP_UNLOCK(map); IOMMU_DOMAIN_LOCK(domain); TAILQ_CONCAT(&domain->unload_entries, &entries, dmamap_link); IOMMU_DOMAIN_UNLOCK(domain); diff --git a/sys/dev/iommu/iommu.h b/sys/dev/iommu/iommu.h index 3800213a1d64..62b5659b6e83 100644 --- a/sys/dev/iommu/iommu.h +++ b/sys/dev/iommu/iommu.h @@ -58,8 +58,6 @@ struct iommu_map_entry { u_int flags; TAILQ_ENTRY(iommu_map_entry) dmamap_link; /* Link for dmamap entries */ RB_ENTRY(iommu_map_entry) rb_entry; /* Links for domain entries */ - TAILQ_ENTRY(iommu_map_entry) unroll_link; /* Link for unroll after - dmamap_load failure */ struct iommu_domain *domain; struct iommu_qi_genseq gseq; }; diff --git a/sys/x86/iommu/intel_ctx.c b/sys/x86/iommu/intel_ctx.c index 815dc6146b00..79e2a15d80c7 100644 --- a/sys/x86/iommu/intel_ctx.c +++ b/sys/x86/iommu/intel_ctx.c @@ -245,7 +245,7 @@ domain_init_rmrr(struct dmar_domain *domain, device_t dev,
git: da55f86c6146 - main - x86/iommu: Eliminate redundant wrappers
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=da55f86c61462b119fd1306d12411989d6610650 commit da55f86c61462b119fd1306d12411989d6610650 Author: Alan Cox AuthorDate: 2022-07-16 04:25:11 + Commit: Alan Cox CommitDate: 2022-07-16 23:05:37 + x86/iommu: Eliminate redundant wrappers Reviewed by:kib MFC after: 2 weeks Differential Revision: https://reviews.freebsd.org/D35832 --- sys/x86/iommu/intel_ctx.c | 26 -- sys/x86/iommu/intel_dmar.h | 3 --- 2 files changed, 4 insertions(+), 25 deletions(-) diff --git a/sys/x86/iommu/intel_ctx.c b/sys/x86/iommu/intel_ctx.c index 79e2a15d80c7..bfc607674b57 100644 --- a/sys/x86/iommu/intel_ctx.c +++ b/sys/x86/iommu/intel_ctx.c @@ -868,7 +868,7 @@ dmar_domain_free_entry(struct iommu_map_entry *entry, bool free) } void -dmar_domain_unload_entry(struct iommu_map_entry *entry, bool free) +iommu_domain_unload_entry(struct iommu_map_entry *entry, bool free) { struct dmar_domain *domain; struct dmar_unit *unit; @@ -902,15 +902,15 @@ dmar_domain_unload_emit_wait(struct dmar_domain *domain, } void -dmar_domain_unload(struct dmar_domain *domain, +iommu_domain_unload(struct iommu_domain *iodom, struct iommu_map_entries_tailq *entries, bool cansleep) { + struct dmar_domain *domain; struct dmar_unit *unit; - struct iommu_domain *iodom; struct iommu_map_entry *entry, *entry1; int error __diagused; - iodom = DOM2IODOM(domain); + domain = IODOM2DOM(iodom); unit = DOM2DMAR(domain); TAILQ_FOREACH_SAFE(entry, entries, dmamap_link, entry1) { @@ -975,21 +975,3 @@ iommu_free_ctx(struct iommu_ctx *context) dmar_free_ctx(ctx); } - -void -iommu_domain_unload_entry(struct iommu_map_entry *entry, bool free) -{ - - dmar_domain_unload_entry(entry, free); -} - -void -iommu_domain_unload(struct iommu_domain *iodom, -struct iommu_map_entries_tailq *entries, bool cansleep) -{ - struct dmar_domain *domain; - - domain = IODOM2DOM(iodom); - - dmar_domain_unload(domain, entries, cansleep); -} diff --git a/sys/x86/iommu/intel_dmar.h b/sys/x86/iommu/intel_dmar.h index b34505a4e5d0..05793ed9f238 100644 --- a/sys/x86/iommu/intel_dmar.h +++ b/sys/x86/iommu/intel_dmar.h @@ -277,9 +277,6 @@ int dmar_move_ctx_to_domain(struct dmar_domain *domain, struct dmar_ctx *ctx); void dmar_free_ctx_locked(struct dmar_unit *dmar, struct dmar_ctx *ctx); void dmar_free_ctx(struct dmar_ctx *ctx); struct dmar_ctx *dmar_find_ctx_locked(struct dmar_unit *dmar, uint16_t rid); -void dmar_domain_unload_entry(struct iommu_map_entry *entry, bool free); -void dmar_domain_unload(struct dmar_domain *domain, -struct iommu_map_entries_tailq *entries, bool cansleep); void dmar_domain_free_entry(struct iommu_map_entry *entry, bool free); void dmar_dev_parse_rmrr(struct dmar_domain *domain, int dev_domain,
git: 4eaaacc75535 - main - x86/iommu: Shrink the critical section in dmar_qi_task()
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=4eaaacc75535befdb9894cca4e0d8da376328fa4 commit 4eaaacc75535befdb9894cca4e0d8da376328fa4 Author: Alan Cox AuthorDate: 2022-07-18 00:56:39 + Commit: Alan Cox CommitDate: 2022-07-19 03:23:13 + x86/iommu: Shrink the critical section in dmar_qi_task() It is safe to test and clear the Invalidation Wait Descriptor Complete flag before acquiring the DMAR lock in dmar_qi_task(), rather than waiting until the lock is held. Reviewed by:kib MFC after: 2 weeks --- sys/x86/iommu/intel_qi.c | 15 ++- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/sys/x86/iommu/intel_qi.c b/sys/x86/iommu/intel_qi.c index 894e3d537ac7..ca58715a227c 100644 --- a/sys/x86/iommu/intel_qi.c +++ b/sys/x86/iommu/intel_qi.c @@ -343,6 +343,16 @@ dmar_qi_task(void *arg, int pending __unused) unit = arg; + /* +* Request an interrupt on the completion of the next invalidation +* wait descriptor with the IF field set. +*/ + ics = dmar_read4(unit, DMAR_ICS_REG); + if ((ics & DMAR_ICS_IWC) != 0) { + ics = DMAR_ICS_IWC; + dmar_write4(unit, DMAR_ICS_REG, ics); + } + DMAR_LOCK(unit); for (;;) { entry = TAILQ_FIRST(&unit->tlb_flush_entries); @@ -356,11 +366,6 @@ dmar_qi_task(void *arg, int pending __unused) IOMMU_MAP_ENTRY_QI_NF) == 0); DMAR_LOCK(unit); } - ics = dmar_read4(unit, DMAR_ICS_REG); - if ((ics & DMAR_ICS_IWC) != 0) { - ics = DMAR_ICS_IWC; - dmar_write4(unit, DMAR_ICS_REG, ics); - } if (unit->inv_seq_waiters > 0) wakeup(&unit->inv_seq_waiters); DMAR_UNLOCK(unit);
git: 54291f7d6506 - main - swap_pager: Reduce the scope of the object lock in putpages
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=54291f7d6506e6c6087433c5bbdb2224b6cef23b commit 54291f7d6506e6c6087433c5bbdb2224b6cef23b Author: Alan Cox AuthorDate: 2022-07-19 03:28:07 + Commit: Alan Cox CommitDate: 2022-07-19 03:35:49 + swap_pager: Reduce the scope of the object lock in putpages We don't need to hold the object lock while allocating swap space, so don't. Reviewed by:dougm, kib, markj MFC after: 1 week Differential Revision: https://reviews.freebsd.org/D35839 --- sys/vm/swap_pager.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c index c20360975c4b..67cc3bf017d2 100644 --- a/sys/vm/swap_pager.c +++ b/sys/vm/swap_pager.c @@ -1506,10 +1506,8 @@ swap_pager_putpages(vm_object_t object, vm_page_t *ma, int count, } /* Get a block of swap of size up to size n. */ - VM_OBJECT_WLOCK(object); blk = swp_pager_getswapspace(&n); if (blk == SWAPBLK_NONE) { - VM_OBJECT_WUNLOCK(object); mtx_lock(&swbuf_mtx); if (++nsw_wcount_async == 1) wakeup(&nsw_wcount_async); @@ -1518,6 +1516,7 @@ swap_pager_putpages(vm_object_t object, vm_page_t *ma, int count, rtvals[i + j] = VM_PAGER_FAIL; continue; } + VM_OBJECT_WLOCK(object); for (j = 0; j < n; ++j) { mreq = ma[i + j]; vm_page_aflag_clear(mreq, PGA_SWAP_FREE);
git: dfabdacb279c - main - iommu_gas: Avoid double unmapping on error
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=dfabdacb279ca603d008a0e7e952c5c59ac51da4 commit dfabdacb279ca603d008a0e7e952c5c59ac51da4 Author: Alan Cox AuthorDate: 2022-07-21 06:53:54 + Commit: Alan Cox CommitDate: 2022-07-21 07:00:46 + iommu_gas: Avoid double unmapping on error In the extremely unlikely case that the iommu_gas_map_region() call in bus_dma_iommu_load_ident() failed, we would attempt to unmap the failed entry twice, first in iommu_gas_map_region(), and a second time in the caller. Once is enough, and twice is problematic because it leads to a second RB_REMOVE call on the same tree node. Like it or not, RB_TREE does not handle that possibility. Reviewed by:kib MFC after: 1 week Differential Revision: https://reviews.freebsd.org/D35869 --- sys/dev/iommu/busdma_iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/dev/iommu/busdma_iommu.c b/sys/dev/iommu/busdma_iommu.c index 10e7476b35eb..67e82fe43e58 100644 --- a/sys/dev/iommu/busdma_iommu.c +++ b/sys/dev/iommu/busdma_iommu.c @@ -1055,7 +1055,7 @@ bus_dma_iommu_load_ident(bus_dma_tag_t dmat, bus_dmamap_t map1, TAILQ_INSERT_TAIL(&map->map_entries, entry, dmamap_link); IOMMU_DMAMAP_UNLOCK(map); } else { - iommu_domain_unload_entry(entry, true); + iommu_gas_free_entry(domain, entry); } for (i = 0; i < atop(length); i++) vm_page_putfake(ma[i]);
git: 8bc367384745 - main - iommu_gas: Eliminate a possible case of use-after-free
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=8bc3673847453ca51237b5c85fe57f3f02e17a4b commit 8bc3673847453ca51237b5c85fe57f3f02e17a4b Author: Alan Cox AuthorDate: 2022-07-22 17:00:26 + Commit: Alan Cox CommitDate: 2022-07-25 16:14:58 + iommu_gas: Eliminate a possible case of use-after-free Eliminate a possible case of use-after-free in an error handling path after a mapping failure. Specifically, eliminate IOMMU_MAP_ENTRY_QI_NF and instead perform the IOTLB invalidation synchronously. Otherwise, when iommu_domain_unload_entry() is called and told not to free the IOMMU map entry, the caller could free the entry before dmar_qi_task() is finished with it. Reviewed by:kib MFC after: 1 week Differential Revision: https://reviews.freebsd.org/D35878 --- sys/arm64/iommu/iommu.c| 3 ++- sys/dev/iommu/iommu.h | 3 ++- sys/dev/iommu/iommu_gas.c | 6 -- sys/dev/iommu/iommu_gas.h | 1 - sys/x86/iommu/intel_ctx.c | 28 +++- sys/x86/iommu/intel_dmar.h | 2 ++ sys/x86/iommu/intel_qi.c | 14 -- 7 files changed, 41 insertions(+), 16 deletions(-) diff --git a/sys/arm64/iommu/iommu.c b/sys/arm64/iommu/iommu.c index aa48dcf5ab5e..0080ab4ff316 100644 --- a/sys/arm64/iommu/iommu.c +++ b/sys/arm64/iommu/iommu.c @@ -509,7 +509,8 @@ iommu_find(device_t dev, bool verbose) } void -iommu_domain_unload_entry(struct iommu_map_entry *entry, bool free) +iommu_domain_unload_entry(struct iommu_map_entry *entry, bool free, +bool cansleep __unused) { dprintf("%s\n", __func__); diff --git a/sys/dev/iommu/iommu.h b/sys/dev/iommu/iommu.h index 62b5659b6e83..65fefe3ada7b 100644 --- a/sys/dev/iommu/iommu.h +++ b/sys/dev/iommu/iommu.h @@ -151,7 +151,8 @@ void iommu_free_ctx_locked(struct iommu_unit *iommu, struct iommu_ctx *ctx); struct iommu_ctx *iommu_get_ctx(struct iommu_unit *, device_t dev, uint16_t rid, bool id_mapped, bool rmrr_init); struct iommu_unit *iommu_find(device_t dev, bool verbose); -void iommu_domain_unload_entry(struct iommu_map_entry *entry, bool free); +void iommu_domain_unload_entry(struct iommu_map_entry *entry, bool free, +bool cansleep); void iommu_domain_unload(struct iommu_domain *domain, struct iommu_map_entries_tailq *entries, bool cansleep); diff --git a/sys/dev/iommu/iommu_gas.c b/sys/dev/iommu/iommu_gas.c index 86dc919e4572..ec456e2ec48b 100644 --- a/sys/dev/iommu/iommu_gas.c +++ b/sys/dev/iommu/iommu_gas.c @@ -638,7 +638,8 @@ iommu_gas_map(struct iommu_domain *domain, entry->end - entry->start, ma, eflags, ((flags & IOMMU_MF_CANWAIT) != 0 ? IOMMU_PGF_WAITOK : 0)); if (error == ENOMEM) { - iommu_domain_unload_entry(entry, true); + iommu_domain_unload_entry(entry, true, + (flags & IOMMU_MF_CANWAIT) != 0); return (error); } KASSERT(error == 0, @@ -676,7 +677,8 @@ iommu_gas_map_region(struct iommu_domain *domain, struct iommu_map_entry *entry, entry->end - entry->start, ma + OFF_TO_IDX(start - entry->start), eflags, ((flags & IOMMU_MF_CANWAIT) != 0 ? IOMMU_PGF_WAITOK : 0)); if (error == ENOMEM) { - iommu_domain_unload_entry(entry, false); + iommu_domain_unload_entry(entry, false, + (flags & IOMMU_MF_CANWAIT) != 0); return (error); } KASSERT(error == 0, diff --git a/sys/dev/iommu/iommu_gas.h b/sys/dev/iommu/iommu_gas.h index c32a098538b0..a9d0df5f272f 100644 --- a/sys/dev/iommu/iommu_gas.h +++ b/sys/dev/iommu/iommu_gas.h @@ -50,7 +50,6 @@ #defineIOMMU_MAP_ENTRY_MAP 0x0004 /* Busdma created, linked by dmamap_link */ #defineIOMMU_MAP_ENTRY_UNMAPPED0x0010 /* No backing pages */ -#defineIOMMU_MAP_ENTRY_QI_NF 0x0020 /* qi task, do not free entry */ #defineIOMMU_MAP_ENTRY_READ0x1000 /* Read permitted */ #defineIOMMU_MAP_ENTRY_WRITE 0x2000 /* Write permitted */ #defineIOMMU_MAP_ENTRY_SNOOP 0x4000 /* Snoop */ diff --git a/sys/x86/iommu/intel_ctx.c b/sys/x86/iommu/intel_ctx.c index bfc607674b57..5e13f020264b 100644 --- a/sys/x86/iommu/intel_ctx.c +++ b/sys/x86/iommu/intel_ctx.c @@ -868,25 +868,35 @@ dmar_domain_free_entry(struct iommu_map_entry *entry, bool free) } void -iommu_domain_unload_entry(struct iommu_map_entry *entry, bool free) +iommu_domain_unload_entry(struct iommu_map_entry *entry, bool free, +bool cansleep) { struct dmar_domain *domain; struct dmar_unit *unit; domain = IODOM2DOM(entry->domain); unit = DOM2DMAR(domain); + + /* +* If "free" is false, then the IOTLB invalidation must be performed +* synchronously. Otherwise, the caller mi
git: c25156347083 - main - x86/iommu: Correct a recent change to iommu_domain_unload_entry()
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=c251563470831c34cf53242936425a0d4d995edf commit c251563470831c34cf53242936425a0d4d995edf Author: Alan Cox AuthorDate: 2022-07-26 04:53:15 + Commit: Alan Cox CommitDate: 2022-07-26 06:07:21 + x86/iommu: Correct a recent change to iommu_domain_unload_entry() Correct 8bc367384745. When iommu_domain_unload_entry() performs a synchronous IOTLB invalidation, it must call dmar_domain_free_entry() to remove the entry from the domain's RB_TREE. Push down the acquisition and release of the DMAR lock into the recently introduced function dmar_qi_invalidate_sync_locked() and remove the _locked suffix. MFC with: 8bc367384745 --- sys/x86/iommu/intel_ctx.c | 7 --- sys/x86/iommu/intel_dmar.h | 4 ++-- sys/x86/iommu/intel_qi.c | 9 ++--- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/sys/x86/iommu/intel_ctx.c b/sys/x86/iommu/intel_ctx.c index 5e13f020264b..936cf8bb7632 100644 --- a/sys/x86/iommu/intel_ctx.c +++ b/sys/x86/iommu/intel_ctx.c @@ -883,17 +883,18 @@ iommu_domain_unload_entry(struct iommu_map_entry *entry, bool free, * dmar_qi_task() is finished processing it. */ if (unit->qi_enabled) { - DMAR_LOCK(unit); if (free) { + DMAR_LOCK(unit); dmar_qi_invalidate_locked(domain, entry->start, entry->end - entry->start, &entry->gseq, true); TAILQ_INSERT_TAIL(&unit->tlb_flush_entries, entry, dmamap_link); + DMAR_UNLOCK(unit); } else { - dmar_qi_invalidate_sync_locked(domain, entry->start, + dmar_qi_invalidate_sync(domain, entry->start, entry->end - entry->start, cansleep); + dmar_domain_free_entry(entry, false); } - DMAR_UNLOCK(unit); } else { domain_flush_iotlb_sync(domain, entry->start, entry->end - entry->start); diff --git a/sys/x86/iommu/intel_dmar.h b/sys/x86/iommu/intel_dmar.h index 0f811d760bb7..06cecdf704ff 100644 --- a/sys/x86/iommu/intel_dmar.h +++ b/sys/x86/iommu/intel_dmar.h @@ -251,8 +251,8 @@ int dmar_init_qi(struct dmar_unit *unit); void dmar_fini_qi(struct dmar_unit *unit); void dmar_qi_invalidate_locked(struct dmar_domain *domain, iommu_gaddr_t start, iommu_gaddr_t size, struct iommu_qi_genseq *psec, bool emit_wait); -void dmar_qi_invalidate_sync_locked(struct dmar_domain *domain, -iommu_gaddr_t start, iommu_gaddr_t size, bool cansleep); +void dmar_qi_invalidate_sync(struct dmar_domain *domain, iommu_gaddr_t start, +iommu_gaddr_t size, bool cansleep); void dmar_qi_invalidate_ctx_glob_locked(struct dmar_unit *unit); void dmar_qi_invalidate_iotlb_glob_locked(struct dmar_unit *unit); void dmar_qi_invalidate_iec_glob(struct dmar_unit *unit); diff --git a/sys/x86/iommu/intel_qi.c b/sys/x86/iommu/intel_qi.c index 174cf9ea19a8..32f01a2787b0 100644 --- a/sys/x86/iommu/intel_qi.c +++ b/sys/x86/iommu/intel_qi.c @@ -243,14 +243,17 @@ dmar_qi_invalidate_locked(struct dmar_domain *domain, iommu_gaddr_t base, } void -dmar_qi_invalidate_sync_locked(struct dmar_domain *domain, iommu_gaddr_t base, +dmar_qi_invalidate_sync(struct dmar_domain *domain, iommu_gaddr_t base, iommu_gaddr_t size, bool cansleep) { + struct dmar_unit *unit; struct iommu_qi_genseq gseq; - DMAR_ASSERT_LOCKED(domain->dmar); + unit = domain->dmar; + DMAR_LOCK(unit); dmar_qi_invalidate_locked(domain, base, size, &gseq, true); - dmar_qi_wait_for_seq(domain->dmar, &gseq, !cansleep); + dmar_qi_wait_for_seq(unit, &gseq, !cansleep); + DMAR_UNLOCK(unit); } void
git: 42736dc44dd0 - main - x86/iommu: Reduce DMAR lock contention
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=42736dc44dd0151546db3f2e145ae1cfd4546fe1 commit 42736dc44dd0151546db3f2e145ae1cfd4546fe1 Author: Alan Cox AuthorDate: 2022-07-26 06:04:54 + Commit: Alan Cox CommitDate: 2022-07-29 05:11:33 + x86/iommu: Reduce DMAR lock contention Replace the DMAR unit's tlb_flush TAILQ by a custom list implementation that enables dmar_qi_task() to dequeue entries without holding the DMAR lock. Reviewed by:kib MFC after: 1 week Differential Revision: https://reviews.freebsd.org/D35951 --- sys/dev/iommu/iommu.h | 5 +- sys/dev/iommu/iommu_gas.c | 5 +- sys/x86/iommu/intel_ctx.c | 16 +++ sys/x86/iommu/intel_dmar.h | 33 +++-- sys/x86/iommu/intel_qi.c | 113 ++--- 5 files changed, 140 insertions(+), 32 deletions(-) diff --git a/sys/dev/iommu/iommu.h b/sys/dev/iommu/iommu.h index 65fefe3ada7b..fefd0f615be5 100644 --- a/sys/dev/iommu/iommu.h +++ b/sys/dev/iommu/iommu.h @@ -56,7 +56,10 @@ struct iommu_map_entry { iommu_gaddr_t free_down;/* Max free space below the current R/B tree node */ u_int flags; - TAILQ_ENTRY(iommu_map_entry) dmamap_link; /* Link for dmamap entries */ + union { + TAILQ_ENTRY(iommu_map_entry) dmamap_link; /* DMA map entries */ + struct iommu_map_entry *tlb_flush_next; + }; RB_ENTRY(iommu_map_entry) rb_entry; /* Links for domain entries */ struct iommu_domain *domain; struct iommu_qi_genseq gseq; diff --git a/sys/dev/iommu/iommu_gas.c b/sys/dev/iommu/iommu_gas.c index ec456e2ec48b..bac15edcf849 100644 --- a/sys/dev/iommu/iommu_gas.c +++ b/sys/dev/iommu/iommu_gas.c @@ -99,7 +99,7 @@ iommu_gas_alloc_entry(struct iommu_domain *domain, u_int flags) res = uma_zalloc(iommu_map_entry_zone, ((flags & IOMMU_PGF_WAITOK) != 0 ? M_WAITOK : M_NOWAIT) | M_ZERO); - if (res != NULL) { + if (res != NULL && domain != NULL) { res->domain = domain; atomic_add_int(&domain->entries_cnt, 1); } @@ -113,7 +113,8 @@ iommu_gas_free_entry(struct iommu_domain *domain, struct iommu_map_entry *entry) KASSERT(domain == entry->domain, ("mismatched free domain %p entry %p entry->domain %p", domain, entry, entry->domain)); - atomic_subtract_int(&domain->entries_cnt, 1); + if (domain != NULL) + atomic_subtract_int(&domain->entries_cnt, 1); uma_zfree(iommu_map_entry_zone, entry); } diff --git a/sys/x86/iommu/intel_ctx.c b/sys/x86/iommu/intel_ctx.c index 936cf8bb7632..3bd425aeecbd 100644 --- a/sys/x86/iommu/intel_ctx.c +++ b/sys/x86/iommu/intel_ctx.c @@ -867,6 +867,10 @@ dmar_domain_free_entry(struct iommu_map_entry *entry, bool free) entry->flags = 0; } +/* + * If the given value for "free" is true, then the caller must not be using + * the entry's dmamap_link field. + */ void iommu_domain_unload_entry(struct iommu_map_entry *entry, bool free, bool cansleep) @@ -885,10 +889,7 @@ iommu_domain_unload_entry(struct iommu_map_entry *entry, bool free, if (unit->qi_enabled) { if (free) { DMAR_LOCK(unit); - dmar_qi_invalidate_locked(domain, entry->start, - entry->end - entry->start, &entry->gseq, true); - TAILQ_INSERT_TAIL(&unit->tlb_flush_entries, entry, - dmamap_link); + dmar_qi_invalidate_locked(domain, entry, true); DMAR_UNLOCK(unit); } else { dmar_qi_invalidate_sync(domain, entry->start, @@ -942,12 +943,11 @@ iommu_domain_unload(struct iommu_domain *iodom, KASSERT(unit->qi_enabled, ("loaded entry left")); DMAR_LOCK(unit); - TAILQ_FOREACH(entry, entries, dmamap_link) { - dmar_qi_invalidate_locked(domain, entry->start, entry->end - - entry->start, &entry->gseq, + while ((entry = TAILQ_FIRST(entries)) != NULL) { + TAILQ_REMOVE(entries, entry, dmamap_link); + dmar_qi_invalidate_locked(domain, entry, dmar_domain_unload_emit_wait(domain, entry)); } - TAILQ_CONCAT(&unit->tlb_flush_entries, entries, dmamap_link); DMAR_UNLOCK(unit); } diff --git a/sys/x86/iommu/intel_dmar.h b/sys/x86/iommu/intel_dmar.h index 06cecdf704ff..1234ee058ffd 100644 --- a/sys/x86/iommu/intel_dmar.h +++ b/sys/x86/iommu/intel_dmar.h @@ -177,8 +177,33 @@ struct dmar_unit { u_int irte_cnt; vmem_t *irtids; - /* Delayed freeing of
git: 4670f90846d4 - main - iommu_gas: Eliminate redundant parameters and push down lock acquisition
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=4670f90846d49027bf23435a30895a74264f1e79 commit 4670f90846d49027bf23435a30895a74264f1e79 Author: Alan Cox AuthorDate: 2022-07-29 06:14:46 + Commit: Alan Cox CommitDate: 2022-07-30 19:28:48 + iommu_gas: Eliminate redundant parameters and push down lock acquisition Since IOMMU map entries store a reference to the domain in which they reside, there is no need to pass the domain to iommu_gas_free_entry(), iommu_gas_free_space(), and iommu_gas_free_region(). Push down the acquisition and release of the IOMMU domain lock into iommu_gas_free_space() and iommu_gas_free_region(). Both of these changes allow for simplifications in the callers of the functions without really complicating the functions themselves. Moreover, the latter change eliminates the direct use of the IOMMU domain lock from the x86-specific DMAR code. Reviewed by:kib MFC after: 1 week Differential Revision: https://reviews.freebsd.org/D35995 --- sys/arm64/iommu/iommu.c | 10 ++ sys/dev/iommu/busdma_iommu.c | 4 ++-- sys/dev/iommu/iommu.h| 9 +++-- sys/dev/iommu/iommu_gas.c| 44 +--- sys/x86/iommu/intel_ctx.c| 13 - sys/x86/iommu/intel_qi.c | 10 +++--- 6 files changed, 39 insertions(+), 51 deletions(-) diff --git a/sys/arm64/iommu/iommu.c b/sys/arm64/iommu/iommu.c index 0080ab4ff316..d24cad94e966 100644 --- a/sys/arm64/iommu/iommu.c +++ b/sys/arm64/iommu/iommu.c @@ -410,16 +410,10 @@ iommu_free_ctx(struct iommu_ctx *ioctx) static void iommu_domain_free_entry(struct iommu_map_entry *entry, bool free) { - struct iommu_domain *iodom; - - iodom = entry->domain; - - IOMMU_DOMAIN_LOCK(iodom); - iommu_gas_free_space(iodom, entry); - IOMMU_DOMAIN_UNLOCK(iodom); + iommu_gas_free_space(entry); if (free) - iommu_gas_free_entry(iodom, entry); + iommu_gas_free_entry(entry); else entry->flags = 0; } diff --git a/sys/dev/iommu/busdma_iommu.c b/sys/dev/iommu/busdma_iommu.c index 67e82fe43e58..8f63d8b47f19 100644 --- a/sys/dev/iommu/busdma_iommu.c +++ b/sys/dev/iommu/busdma_iommu.c @@ -1040,7 +1040,7 @@ bus_dma_iommu_load_ident(bus_dma_tag_t dmat, bus_dmamap_t map1, ma = malloc(sizeof(vm_page_t) * atop(length), M_TEMP, waitok ? M_WAITOK : M_NOWAIT); if (ma == NULL) { - iommu_gas_free_entry(domain, entry); + iommu_gas_free_entry(entry); return (ENOMEM); } for (i = 0; i < atop(length); i++) { @@ -1055,7 +1055,7 @@ bus_dma_iommu_load_ident(bus_dma_tag_t dmat, bus_dmamap_t map1, TAILQ_INSERT_TAIL(&map->map_entries, entry, dmamap_link); IOMMU_DMAMAP_UNLOCK(map); } else { - iommu_gas_free_entry(domain, entry); + iommu_gas_free_entry(entry); } for (i = 0; i < atop(length); i++) vm_page_putfake(ma[i]); diff --git a/sys/dev/iommu/iommu.h b/sys/dev/iommu/iommu.h index fefd0f615be5..ae4022c5c4f7 100644 --- a/sys/dev/iommu/iommu.h +++ b/sys/dev/iommu/iommu.h @@ -169,15 +169,12 @@ void iommu_gas_init_domain(struct iommu_domain *domain); void iommu_gas_fini_domain(struct iommu_domain *domain); struct iommu_map_entry *iommu_gas_alloc_entry(struct iommu_domain *domain, u_int flags); -void iommu_gas_free_entry(struct iommu_domain *domain, -struct iommu_map_entry *entry); -void iommu_gas_free_space(struct iommu_domain *domain, -struct iommu_map_entry *entry); +void iommu_gas_free_entry(struct iommu_map_entry *entry); +void iommu_gas_free_space(struct iommu_map_entry *entry); int iommu_gas_map(struct iommu_domain *domain, const struct bus_dma_tag_common *common, iommu_gaddr_t size, int offset, u_int eflags, u_int flags, vm_page_t *ma, struct iommu_map_entry **res); -void iommu_gas_free_region(struct iommu_domain *domain, -struct iommu_map_entry *entry); +void iommu_gas_free_region(struct iommu_map_entry *entry); int iommu_gas_map_region(struct iommu_domain *domain, struct iommu_map_entry *entry, u_int eflags, u_int flags, vm_page_t *ma); int iommu_gas_reserve_region(struct iommu_domain *domain, iommu_gaddr_t start, diff --git a/sys/dev/iommu/iommu_gas.c b/sys/dev/iommu/iommu_gas.c index bac15edcf849..bad56ab9140e 100644 --- a/sys/dev/iommu/iommu_gas.c +++ b/sys/dev/iommu/iommu_gas.c @@ -107,12 +107,11 @@ iommu_gas_alloc_entry(struct iommu_domain *domain, u_int flags) } void -iommu_gas_free_entry(struct iommu_domain *domain, struct iommu_map_entry *entry) +iommu_gas_free_entry(struct iommu_map_entry *entry) { + struct iommu_domain *domain; - KASSERT(domain == entry->domain, - ("mismatched free domain %p entry %
git: 7f46deccbed7 - main - x86/iommu: Reduce the number of queued invalidation interrupts
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=7f46deccbed74436b62f8fd02655ff4ad89f1023 commit 7f46deccbed74436b62f8fd02655ff4ad89f1023 Author: Alan Cox AuthorDate: 2022-07-31 19:28:30 + Commit: Alan Cox CommitDate: 2022-08-06 18:05:58 + x86/iommu: Reduce the number of queued invalidation interrupts Restructure dmar_qi_task() so as to reduce the number of invalidation completion interrupts. Specifically, because processing completed invalidations in dmar_qi_task() can take quite some time, don't reenable completion interrupts until processing has completed a first time. Then, check a second time after reenabling completion interrupts, so that any invalidations that complete just before interrupts are reenabled do not linger until a future invalidation might raise an interrupt. (Recent changes have made checking for completed invalidations cheap; no locking is required.) Reviewed by:kib MFC after: 1 week Differential Revision: https://reviews.freebsd.org/D36054 --- sys/x86/iommu/intel_qi.c | 45 + 1 file changed, 29 insertions(+), 16 deletions(-) diff --git a/sys/x86/iommu/intel_qi.c b/sys/x86/iommu/intel_qi.c index baaf5b472a2c..8a8e656083e3 100644 --- a/sys/x86/iommu/intel_qi.c +++ b/sys/x86/iommu/intel_qi.c @@ -411,14 +411,34 @@ dmar_qi_intr(void *arg) return (FILTER_HANDLED); } +static void +dmar_qi_drain_tlb_flush(struct dmar_unit *unit) +{ + struct iommu_map_entry *entry, *head; + + for (head = unit->tlb_flush_head;; head = entry) { + entry = (struct iommu_map_entry *) + atomic_load_acq_ptr((uintptr_t *)&head->tlb_flush_next); + if (entry == NULL || + !dmar_qi_seq_processed(unit, &entry->gseq)) + break; + unit->tlb_flush_head = entry; + iommu_gas_free_entry(head); + if ((entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0) + iommu_gas_free_region(entry); + else + iommu_gas_free_space(entry); + } +} + static void dmar_qi_task(void *arg, int pending __unused) { struct dmar_unit *unit; - struct iommu_map_entry *entry, *head; uint32_t ics; unit = arg; + dmar_qi_drain_tlb_flush(unit); /* * Request an interrupt on the completion of the next invalidation @@ -428,23 +448,16 @@ dmar_qi_task(void *arg, int pending __unused) if ((ics & DMAR_ICS_IWC) != 0) { ics = DMAR_ICS_IWC; dmar_write4(unit, DMAR_ICS_REG, ics); - } - for (;;) { - head = unit->tlb_flush_head; - entry = (struct iommu_map_entry *) - atomic_load_acq_ptr((uintptr_t *)&head->tlb_flush_next); - if (entry == NULL) - break; - if (!dmar_qi_seq_processed(unit, &entry->gseq)) - break; - unit->tlb_flush_head = entry; - iommu_gas_free_entry(head); - if ((entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0) - iommu_gas_free_region(entry); - else - iommu_gas_free_space(entry); + /* +* Drain a second time in case the DMAR processes an entry +* after the first call and before clearing DMAR_ICS_IWC. +* Otherwise, such entries will linger until a later entry +* that requests an interrupt is processed. +*/ + dmar_qi_drain_tlb_flush(unit); } + if (unit->inv_seq_waiters > 0) { /* * Acquire the DMAR lock so that wakeup() is called only after
git: 34eeabff5a86 - main - amd64/arm64 pmap: Stop requiring the accessed bit for superpage promotion
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=34eeabff5a8636155bb02985c5928c1844fd3178 commit 34eeabff5a8636155bb02985c5928c1844fd3178 Author: Alan Cox AuthorDate: 2023-05-31 23:10:41 + Commit: Alan Cox CommitDate: 2023-06-12 18:40:57 + amd64/arm64 pmap: Stop requiring the accessed bit for superpage promotion Stop requiring all of the PTEs to have the accessed bit set for superpage promotion to occur. Given that change, add support for promotion to pmap_enter_quick(), which does not set the accessed bit in the PTE that it creates. Since the final mapping within a superpage-aligned and sized region of a memory-mapped file is typically created by a call to pmap_enter_quick(), we now achieve promotions in circumstances where they did not occur before, for example, the X server's read-only mapping of libLLVM-15.so. See also https://www.usenix.org/system/files/atc20-zhu-weixi_0.pdf Reviewed by:kib, markj MFC after: 2 weeks Differential Revision: https://reviews.freebsd.org/D40478 --- sys/amd64/amd64/pmap.c | 154 +++ sys/amd64/include/pmap.h | 2 +- sys/arm64/arm64/pmap.c | 122 +++-- 3 files changed, 205 insertions(+), 73 deletions(-) diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index 123811ed573f..3cb02a4f9daa 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -313,6 +313,33 @@ pmap_pku_mask_bit(pmap_t pmap) return (pmap->pm_type == PT_X86 ? X86_PG_PKU_MASK : 0); } +static __inline boolean_t +safe_to_clear_referenced(pmap_t pmap, pt_entry_t pte) +{ + + if (!pmap_emulate_ad_bits(pmap)) + return (TRUE); + + KASSERT(pmap->pm_type == PT_EPT, ("invalid pm_type %d", pmap->pm_type)); + + /* +* XWR = 010 or 110 will cause an unconditional EPT misconfiguration +* so we don't let the referenced (aka EPT_PG_READ) bit to be cleared +* if the EPT_PG_WRITE bit is set. +*/ + if ((pte & EPT_PG_WRITE) != 0) + return (FALSE); + + /* +* XWR = 100 is allowed only if the PMAP_SUPPORTS_EXEC_ONLY is set. +*/ + if ((pte & EPT_PG_EXECUTE) == 0 || + ((pmap->pm_flags & PMAP_SUPPORTS_EXEC_ONLY) != 0)) + return (TRUE); + else + return (FALSE); +} + #if !defined(DIAGNOSTIC) #ifdef __GNUC_GNU_INLINE__ #define PMAP_INLINE__attribute__((__gnu_inline__)) inline @@ -1279,7 +1306,8 @@ static intpmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t newpde, static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp); static void pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte); -static int pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte, bool promoted); +static int pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte, bool promoted, +bool allpte_PG_A_set); static void pmap_invalidate_cache_range_selfsnoop(vm_offset_t sva, vm_offset_t eva); static void pmap_invalidate_cache_range_all(vm_offset_t sva, @@ -2491,7 +2519,7 @@ pmap_init(void) */ if ((i == 0 || kernphys + ((vm_paddr_t)(i - 1) << PDRSHIFT) < KERNend) && - pmap_insert_pt_page(kernel_pmap, mpte, false)) + pmap_insert_pt_page(kernel_pmap, mpte, false, false)) panic("pmap_init: pmap_insert_pt_page failed"); } PMAP_UNLOCK(kernel_pmap); @@ -4061,14 +4089,26 @@ pmap_add_delayed_free_list(vm_page_t m, struct spglist *free, * for mapping a distinct range of virtual addresses. The pmap's collection is * ordered by this virtual address range. * - * If "promoted" is false, then the page table page "mpte" must be zero filled. + * If "promoted" is false, then the page table page "mpte" must be zero filled; + * "mpte"'s valid field will be set to 0. + * + * If "promoted" is true and "allpte_PG_A_set" is false, then "mpte" must + * contain valid mappings with identical attributes except for PG_A; "mpte"'s + * valid field will be set to 1. + * + * If "promoted" and "allpte_PG_A_set" are both true, then "mpte" must contain + * valid mappings with identical attributes including PG_A; "mpte"'s valid + * field will be set to VM_PAGE_BITS_ALL. */ static __inline int -pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte, bool promoted) +pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte, bool promoted, +bool allpte_PG_A_set) { PMAP_LOCK_ASSERT(pmap, MA_OWNED); - mpte->valid = promoted ? VM_PAG
git: 58d427172157 - main - vm_phys: Fix typo in 9e8174289236
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=58d427172157dedf82e46014e7d19cf973186dd9 commit 58d427172157dedf82e46014e7d19cf973186dd9 Author: Alan Cox AuthorDate: 2023-06-16 08:12:42 + Commit: Alan Cox CommitDate: 2023-06-16 08:12:42 + vm_phys: Fix typo in 9e8174289236 --- sys/vm/vm_phys.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/vm/vm_phys.c b/sys/vm/vm_phys.c index a0b53f0f7c4b..28f12231e01c 100644 --- a/sys/vm/vm_phys.c +++ b/sys/vm/vm_phys.c @@ -1246,7 +1246,7 @@ vm_phys_find_range(vm_page_t bounds[], int segind, int domain, struct vm_phys_seg *end_seg, *seg; KASSERT(npages > 0, ("npages is zero")); - KASSERT(domain >= 0 && domain < vm_ndomain, ("domain out of range")); + KASSERT(domain >= 0 && domain < vm_ndomains, ("domain out of range")); end_seg = &vm_phys_segs[vm_phys_nsegs]; for (seg = &vm_phys_segs[segind]; seg < end_seg; seg++) { if (seg->domain != domain)
git: 0d2f98c2f092 - main - amd64 pmap: Tidy up pmap_promote_pde() calls
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=0d2f98c2f0928a8ee8446c3e5e0e4fb93f1dd9c5 commit 0d2f98c2f0928a8ee8446c3e5e0e4fb93f1dd9c5 Author: Alan Cox AuthorDate: 2023-06-17 17:18:33 + Commit: Alan Cox CommitDate: 2023-06-24 18:09:04 + amd64 pmap: Tidy up pmap_promote_pde() calls Since pmap_ps_enabled() is true by default, check it inside of pmap_promote_pde() instead of at every call site. Modify pmap_promote_pde() to return true if the promotion succeeded and false otherwise. Use this return value in a couple places. Reviewed by:kib, markj Differential Revision: https://reviews.freebsd.org/D40744 --- sys/amd64/amd64/pmap.c | 36 ++-- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index 3cb02a4f9daa..3215a7f8d559 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -436,7 +436,7 @@ pt_entry_t pg_nx; static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "VM/pmap parameters"); -static int pg_ps_enabled = 1; +static int __read_frequently pg_ps_enabled = 1; SYSCTL_INT(_vm_pmap, OID_AUTO, pg_ps_enabled, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &pg_ps_enabled, 0, "Are large page mappings enabled?"); @@ -1318,7 +1318,7 @@ static void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode); static vm_page_t pmap_large_map_getptp_unlocked(void); static vm_paddr_t pmap_large_map_kextract(vm_offset_t va); #if VM_NRESERVLEVEL > 0 -static void pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, +static bool pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, vm_page_t mpte, struct rwlock **lockp); #endif static boolean_t pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t sva, @@ -6856,7 +6856,7 @@ pmap_pde_ept_executable(pmap_t pmap, pd_entry_t pde) * aligned, contiguous physical memory and (2) the 4KB page mappings must have * identical characteristics. */ -static void +static bool pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, vm_page_t mpte, struct rwlock **lockp) { @@ -6865,6 +6865,10 @@ pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, vm_page_t mpte, pt_entry_t allpte_PG_A, PG_A, PG_G, PG_M, PG_PKU_MASK, PG_RW, PG_V; int PG_PTE_CACHE; + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + if (!pmap_ps_enabled(pmap)) + return (false); + PG_A = pmap_accessed_bit(pmap); PG_G = pmap_global_bit(pmap); PG_M = pmap_modified_bit(pmap); @@ -6873,8 +6877,6 @@ pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, vm_page_t mpte, PG_PKU_MASK = pmap_pku_mask_bit(pmap); PG_PTE_CACHE = pmap_cache_mask(pmap, 0); - PMAP_LOCK_ASSERT(pmap, MA_OWNED); - /* * Examine the first PTE in the specified PTP. Abort if this PTE is * ineligible for promotion due to hardware errata, invalid, or does @@ -6883,12 +6885,12 @@ pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, vm_page_t mpte, firstpte = (pt_entry_t *)PHYS_TO_DMAP(*pde & PG_FRAME); newpde = *firstpte; if (!pmap_allow_2m_x_page(pmap, pmap_pde_ept_executable(pmap, newpde))) - return; + return (false); if ((newpde & ((PG_FRAME & PDRMASK) | PG_V)) != PG_V) { counter_u64_add(pmap_pde_p_failures, 1); CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#lx" " in pmap %p", va, pmap); - return; + return (false); } /* @@ -6933,7 +6935,7 @@ setpde: counter_u64_add(pmap_pde_p_failures, 1); CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#lx" " in pmap %p", va, pmap); - return; + return (false); } setpte: if ((oldpte & (PG_M | PG_RW)) == PG_RW) { @@ -6952,7 +6954,7 @@ setpte: counter_u64_add(pmap_pde_p_failures, 1); CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#lx" " in pmap %p", va, pmap); - return; + return (false); } allpte_PG_A &= oldpte; pa -= PAGE_SIZE; @@ -6993,7 +6995,7 @@ setpte: CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#lx in pmap %p", va, pmap); - return; + return (false); } /* @@ -7018,6 +7020,7 @@ setpte: counter_u64_add(pmap_pde_promotions, 1); CTR2(KTR_PMAP, "pmap_promote_pde: success for va %#l
git: d8e6f4946cec - main - vm: Fix anonymous memory clustering under ASLR
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=d8e6f4946cec0b84a6997d62e791b8cf993741b2 commit d8e6f4946cec0b84a6997d62e791b8cf993741b2 Author: Alan Cox AuthorDate: 2023-06-23 17:00:32 + Commit: Alan Cox CommitDate: 2023-06-27 04:42:48 + vm: Fix anonymous memory clustering under ASLR By default, our ASLR implementation is supposed to cluster anonymous memory allocations, unless the application's mmap(..., MAP_ANON, ...) call included a non-zero address hint. Unfortunately, clustering never occurred because kern_mmap() always replaced the given address hint when it was zero. So, the ASLR implementation always believed that a non-zero hint had been provided and randomized the mapping's location in the address space. To fix this problem, I'm pushing down the point at which we convert a hint of zero to the minimum allocatable address from kern_mmap() to vm_map_find_min(). Reviewed by:kib MFC after: 2 weeks Differential Revision: https://reviews.freebsd.org/D40743 --- sys/vm/vm_map.c | 10 +++--- sys/vm/vm_map.h | 1 + sys/vm/vm_mmap.c | 8 +--- 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c index f5863a9b9939..a02107b5e64d 100644 --- a/sys/vm/vm_map.c +++ b/sys/vm/vm_map.c @@ -1981,14 +1981,14 @@ SYSCTL_INT(_vm, OID_AUTO, cluster_anon, CTLFLAG_RW, "Cluster anonymous mappings: 0 = no, 1 = yes if no hint, 2 = always"); static bool -clustering_anon_allowed(vm_offset_t addr) +clustering_anon_allowed(vm_offset_t addr, int cow) { switch (cluster_anon) { case 0: return (false); case 1: - return (addr == 0); + return (addr == 0 || (cow & MAP_NO_HINT) != 0); case 2: default: return (true); @@ -2111,7 +2111,7 @@ vm_map_find(vm_map_t map, vm_object_t object, vm_ooffset_t offset, } else alignment = 0; en_aslr = (map->flags & MAP_ASLR) != 0; - update_anon = cluster = clustering_anon_allowed(*addr) && + update_anon = cluster = clustering_anon_allowed(*addr, cow) && (map->flags & MAP_IS_SUB_MAP) == 0 && max_addr == 0 && find_space != VMFS_NO_SPACE && object == NULL && (cow & (MAP_INHERIT_SHARE | MAP_STACK_GROWS_UP | @@ -2255,6 +2255,10 @@ vm_map_find_min(vm_map_t map, vm_object_t object, vm_ooffset_t offset, int rv; hint = *addr; + if (hint == 0) + cow |= MAP_NO_HINT; + if (hint < min_addr) + *addr = hint = min_addr; for (;;) { rv = vm_map_find(map, object, offset, addr, length, max_addr, find_space, prot, max, cow); diff --git a/sys/vm/vm_map.h b/sys/vm/vm_map.h index 2ac54a39a57b..fd8b606e8ddc 100644 --- a/sys/vm/vm_map.h +++ b/sys/vm/vm_map.h @@ -383,6 +383,7 @@ long vmspace_resident_count(struct vmspace *vmspace); #defineMAP_CREATE_STACK_GAP_DN 0x0002 #defineMAP_VN_EXEC 0x0004 #defineMAP_SPLIT_BOUNDARY_MASK 0x0018 +#defineMAP_NO_HINT 0x0020 #defineMAP_SPLIT_BOUNDARY_SHIFT 19 diff --git a/sys/vm/vm_mmap.c b/sys/vm/vm_mmap.c index 56345fcaf560..408e077476dd 100644 --- a/sys/vm/vm_mmap.c +++ b/sys/vm/vm_mmap.c @@ -353,10 +353,12 @@ kern_mmap(struct thread *td, const struct mmap_req *mrp) * the hint would fall in the potential heap space, * place it after the end of the largest possible heap. * -* There should really be a pmap call to determine a reasonable -* location. +* For anonymous mappings within the address space of the +* calling process, the absence of a hint is handled at a +* lower level in order to implement different clustering +* strategies for ASLR. */ - if (addr == 0 || + if (((flags & MAP_ANON) == 0 && addr == 0) || (addr >= round_page((vm_offset_t)vms->vm_taddr) && addr < round_page((vm_offset_t)vms->vm_daddr + lim_max(td, RLIMIT_DATA
git: 3767de839742 - main - arm64 pmap: Tidy up pmap_promote_l2() calls
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=3767de83974206e4267dabf7fbe66b151c1a0b14 commit 3767de83974206e4267dabf7fbe66b151c1a0b14 Author: Alan Cox AuthorDate: 2023-06-28 07:08:02 + Commit: Alan Cox CommitDate: 2023-06-28 17:46:15 + arm64 pmap: Tidy up pmap_promote_l2() calls Since pmap_ps_enabled() is true by default, check it inside of pmap_promote_l2() instead of at every call site. Modify pmap_promote_l2() to return true if the promotion succeeded and false otherwise. (A similar change was applied to the amd64 pmap in 0d2f98c2f092.) Reviewed by:kib, markj Differential Revision: https://reviews.freebsd.org/D40781 --- sys/arm64/arm64/pmap.c | 41 ++--- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c index 3166b3d7959b..46520889728f 100644 --- a/sys/arm64/arm64/pmap.c +++ b/sys/arm64/arm64/pmap.c @@ -435,7 +435,7 @@ void (*pmap_stage2_invalidate_all)(uint64_t); #defineTLBI_VA(addr) (((addr) >> TLBI_VA_SHIFT) & TLBI_VA_MASK) #defineTLBI_VA_L3_INCR (L3_SIZE >> TLBI_VA_SHIFT) -static int superpages_enabled = 1; +static int __read_frequently superpages_enabled = 1; SYSCTL_INT(_vm_pmap, OID_AUTO, superpages_enabled, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &superpages_enabled, 0, "Are large page mappings enabled?"); @@ -4141,14 +4141,21 @@ pmap_pv_promote_l2(pmap_t pmap, vm_offset_t va, vm_paddr_t pa, * aligned, contiguous physical memory and (2) the 4KB page mappings must have * identical characteristics. */ -static void +static bool pmap_promote_l2(pmap_t pmap, pd_entry_t *l2, vm_offset_t va, vm_page_t mpte, struct rwlock **lockp) { pt_entry_t all_l3e_AF, *firstl3, *l3, newl2, oldl3, pa; PMAP_LOCK_ASSERT(pmap, MA_OWNED); - PMAP_ASSERT_STAGE1(pmap); + + /* +* Currently, this function only supports promotion on stage 1 pmaps +* because it tests stage 1 specific fields and performs a break- +* before-make sequence that is incorrect for stage 2 pmaps. +*/ + if (pmap->pm_stage != PM_STAGE1 || !pmap_ps_enabled(pmap)) + return (false); /* * Examine the first L3E in the specified PTP. Abort if this L3E is @@ -4157,14 +4164,14 @@ pmap_promote_l2(pmap_t pmap, pd_entry_t *l2, vm_offset_t va, vm_page_t mpte, firstl3 = (pt_entry_t *)PHYS_TO_DMAP(PTE_TO_PHYS(pmap_load(l2))); newl2 = pmap_load(firstl3); if ((newl2 & ATTR_SW_NO_PROMOTE) != 0) - return; + return (false); /* ... is not the first physical page within an L2 block */ if ((PTE_TO_PHYS(newl2) & L2_OFFSET) != 0 || ((newl2 & ATTR_DESCR_MASK) != L3_PAGE)) { /* ... or is invalid */ atomic_add_long(&pmap_l2_p_failures, 1); CTR2(KTR_PMAP, "pmap_promote_l2: failure for va %#lx" " in pmap %p", va, pmap); - return; + return (false); } /* @@ -4212,7 +4219,7 @@ setl2: atomic_add_long(&pmap_l2_p_failures, 1); CTR2(KTR_PMAP, "pmap_promote_l2: failure for va %#lx" " in pmap %p", va, pmap); - return; + return (false); } setl3: if ((oldl3 & (ATTR_S1_AP_RW_BIT | ATTR_SW_DBM)) == @@ -4232,7 +4239,7 @@ setl3: atomic_add_long(&pmap_l2_p_failures, 1); CTR2(KTR_PMAP, "pmap_promote_l2: failure for va %#lx" " in pmap %p", va, pmap); - return; + return (false); } all_l3e_AF &= oldl3; pa -= PAGE_SIZE; @@ -4263,7 +4270,7 @@ setl3: CTR2(KTR_PMAP, "pmap_promote_l2: failure for va %#lx in pmap %p", va, pmap); - return; + return (false); } if ((newl2 & ATTR_SW_MANAGED) != 0) @@ -4277,6 +4284,7 @@ setl3: atomic_add_long(&pmap_l2_promotions, 1); CTR2(KTR_PMAP, "pmap_promote_l2: success for va %#lx in pmap %p", va, pmap); + return (true); } #endif /* VM_NRESERVLEVEL > 0 */ @@ -4681,17 +4689,13 @@ validate: #if VM_NRESERVLEVEL > 0 /* -* Try to promote from level 3 pages to a level 2 superpage. This -* currently only works on stage 1 pmaps as pmap_promote_l2 looks at -* stage 1 specific fields and performs a break-before-make sequence -* that is incorrect a stage 2 pmap. +
git: e59d202312f9 - main - arm64: make VM_NFREEORDER and the comment describing it match
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=e59d202312f9868583c6603030ded2476085920d commit e59d202312f9868583c6603030ded2476085920d Author: Alan Cox AuthorDate: 2023-06-28 08:23:09 + Commit: Alan Cox CommitDate: 2023-06-29 17:48:48 + arm64: make VM_NFREEORDER and the comment describing it match The setting of VM_NFREEORDER and the comment describing it were copied from sparc64 where both the page size and the number of page table entries that fit in a cache line are different from arm64. Reviewed by:andrew, kib, markj MFC after: 2 weeks Differential Revision: https://reviews.freebsd.org/D40782 --- sys/arm64/include/vmparam.h | 15 --- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/sys/arm64/include/vmparam.h b/sys/arm64/include/vmparam.h index b28a79256453..23b7d0d87c94 100644 --- a/sys/arm64/include/vmparam.h +++ b/sys/arm64/include/vmparam.h @@ -89,14 +89,15 @@ #defineVM_FREELIST_DEFAULT 0 /* - * An allocation size of 16MB is supported in order to optimize the - * use of the direct map by UMA. Specifically, a cache line contains - * at most four TTEs, collectively mapping 16MB of physical memory. - * By reducing the number of distinct 16MB "pages" that are used by UMA, - * the physical memory allocator reduces the likelihood of both 4MB - * page TLB misses and cache misses caused by 4MB page TLB misses. + * When PAGE_SIZE is 4KB, an allocation size of 16MB is supported in order + * to optimize the use of the direct map by UMA. Specifically, a 64-byte + * cache line contains at most 8 L2 BLOCK entries, collectively mapping 16MB + * of physical memory. By reducing the number of distinct 16MB "pages" that + * are used by UMA, the physical memory allocator reduces the likelihood of + * both 2MB page TLB misses and cache misses during the page table walk when + * a 2MB page TLB miss does occur. */ -#defineVM_NFREEORDER 12 +#defineVM_NFREEORDER 13 /* * Enable superpage reservations: 1 level.
git: 294c52d969df - main - amd64 pmap: Fix compilation when superpage reservations are disabled
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=294c52d969dfdaf1d9b3f4a1de76b702ee724afc commit 294c52d969dfdaf1d9b3f4a1de76b702ee724afc Author: Yufeng Zhou AuthorDate: 2023-07-12 07:52:02 + Commit: Alan Cox CommitDate: 2023-07-12 17:07:42 + amd64 pmap: Fix compilation when superpage reservations are disabled The function pmap_pde_ept_executable() should not be conditionally compiled based on VM_NRESERVLEVEL. It is required indirectly by pmap_enter(..., psind=1) even when reservation-based allocation is disabled at compile time. Reviewed by:alc MFC after: 1 week --- sys/amd64/amd64/pmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index 3215a7f8d559..896078f3c456 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -6839,7 +6839,6 @@ retry: PMAP_UNLOCK(pmap); } -#if VM_NRESERVLEVEL > 0 static bool pmap_pde_ept_executable(pmap_t pmap, pd_entry_t pde) { @@ -6849,6 +6848,7 @@ pmap_pde_ept_executable(pmap_t pmap, pd_entry_t pde) return ((pde & EPT_PG_EXECUTE) != 0); } +#if VM_NRESERVLEVEL > 0 /* * Tries to promote the 512, contiguous 4KB page mappings that are within a * single page table page (PTP) to a single 2MB page mapping. For promotion
git: 29edff0dea0f - main - arm64/riscv pmap: Initialize the pmap's pm_pvchunk field
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=29edff0dea0f7a2df710dd649d0cbcd4a2da3692 commit 29edff0dea0f7a2df710dd649d0cbcd4a2da3692 Author: Alan Cox AuthorDate: 2023-07-16 20:58:04 + Commit: Alan Cox CommitDate: 2023-07-22 04:58:18 + arm64/riscv pmap: Initialize the pmap's pm_pvchunk field I believe that there are two reasons that the missing TAILQ initialization operations haven't caused a problem. First, the TAILQ head's first field is being initialized to zeroes elsewhere. Second, the first access to the TAILQ head's last field is by TAILQ_INSERT_HEAD(), which assigns to the last field without reading it when the first field is NULL. Reviewed by:kib, markj MFC after: 1 week Differential Revision: https://reviews.freebsd.org/D41118 --- sys/arm64/arm64/pmap.c | 3 +++ sys/riscv/riscv/pmap.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c index c2681104c961..b2591437b3b3 100644 --- a/sys/arm64/arm64/pmap.c +++ b/sys/arm64/arm64/pmap.c @@ -1291,6 +1291,7 @@ pmap_bootstrap(vm_paddr_t kernstart, vm_size_t kernlen) PMAP_LOCK_INIT(kernel_pmap); kernel_pmap->pm_l0_paddr = pmap_early_vtophys((vm_offset_t)kernel_pmap_store.pm_l0); + TAILQ_INIT(&kernel_pmap->pm_pvchunk); vm_radix_init(&kernel_pmap->pm_root); kernel_pmap->pm_cookie = COOKIE_FROM(-1, INT_MIN); kernel_pmap->pm_stage = PM_STAGE1; @@ -2270,6 +2271,7 @@ pmap_pinit0(pmap_t pmap) bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); pmap->pm_l0_paddr = READ_SPECIALREG(ttbr0_el1); pmap->pm_l0 = (pd_entry_t *)PHYS_TO_DMAP(pmap->pm_l0_paddr); + TAILQ_INIT(&pmap->pm_pvchunk); vm_radix_init(&pmap->pm_root); pmap->pm_cookie = COOKIE_FROM(ASID_RESERVED_FOR_PID_0, INT_MIN); pmap->pm_stage = PM_STAGE1; @@ -2293,6 +2295,7 @@ pmap_pinit_stage(pmap_t pmap, enum pmap_stage stage, int levels) pmap->pm_l0_paddr = VM_PAGE_TO_PHYS(m); pmap->pm_l0 = (pd_entry_t *)PHYS_TO_DMAP(pmap->pm_l0_paddr); + TAILQ_INIT(&pmap->pm_pvchunk); vm_radix_init(&pmap->pm_root); bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); pmap->pm_cookie = COOKIE_FROM(-1, INT_MAX); diff --git a/sys/riscv/riscv/pmap.c b/sys/riscv/riscv/pmap.c index 7580f091ad86..3732eea14f7d 100644 --- a/sys/riscv/riscv/pmap.c +++ b/sys/riscv/riscv/pmap.c @@ -646,6 +646,7 @@ pmap_bootstrap(vm_offset_t l1pt, vm_paddr_t kernstart, vm_size_t kernlen) /* Set this early so we can use the pagetable walking functions */ kernel_pmap_store.pm_top = (pd_entry_t *)l1pt; PMAP_LOCK_INIT(kernel_pmap); + TAILQ_INIT(&kernel_pmap->pm_pvchunk); vm_radix_init(&kernel_pmap->pm_root); rw_init(&pvh_global_lock, "pmap pv global"); @@ -1327,6 +1328,7 @@ pmap_pinit0(pmap_t pmap) pmap->pm_satp = pmap_satp_mode() | (vtophys(pmap->pm_top) >> PAGE_SHIFT); CPU_ZERO(&pmap->pm_active); + TAILQ_INIT(&pmap->pm_pvchunk); vm_radix_init(&pmap->pm_root); pmap_activate_boot(pmap); } @@ -1369,6 +1371,7 @@ pmap_pinit(pmap_t pmap) pmap->pm_top[i] = kernel_pmap->pm_top[i]; } + TAILQ_INIT(&pmap->pm_pvchunk); vm_radix_init(&pmap->pm_root); return (1);
git: 0aebcfc9f4d6 - main - arm64 pmap: Eliminate some duplication of code
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=0aebcfc9f4d642a8bef95504dc928fab78af33bf commit 0aebcfc9f4d642a8bef95504dc928fab78af33bf Author: Alan Cox AuthorDate: 2023-07-22 17:41:49 + Commit: Alan Cox CommitDate: 2023-07-23 05:34:17 + arm64 pmap: Eliminate some duplication of code pmap_unmapbios() can simply call pmap_kremove_device() rather than duplicating its code. While I'm here, add a comment to pmap_kremove_device() explaining its proper use, and fix a whitespace issue. MFC after: 1 week --- sys/arm64/arm64/pmap.c | 19 ++- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c index b2591437b3b3..dfed0142f273 100644 --- a/sys/arm64/arm64/pmap.c +++ b/sys/arm64/arm64/pmap.c @@ -2032,6 +2032,13 @@ pmap_kremove(vm_offset_t va) pmap_s1_invalidate_page(kernel_pmap, va, true); } +/* + * Remove the specified range of mappings from the kernel address space. + * + * Should only be applied to mappings that were created by pmap_kenter() or + * pmap_kenter_device(). Nothing about this function is actually specific + * to device mappings. + */ void pmap_kremove_device(vm_offset_t sva, vm_size_t size) { @@ -2039,7 +2046,7 @@ pmap_kremove_device(vm_offset_t sva, vm_size_t size) vm_offset_t va; KASSERT((sva & L3_OFFSET) == 0, - ("pmap_kremove_device: Invalid virtual address")); + ("pmap_kremove_device: Invalid virtual address")); KASSERT((size & PAGE_MASK) == 0, ("pmap_kremove_device: Mapping is not page-sized")); @@ -6550,7 +6557,7 @@ void pmap_unmapbios(void *p, vm_size_t size) { struct pmap_preinit_mapping *ppim; - vm_offset_t offset, tmpsize, va, va_trunc; + vm_offset_t offset, va, va_trunc; pd_entry_t *pde; pt_entry_t *l2; int i, lvl, l2_blocks, block; @@ -6600,14 +6607,8 @@ pmap_unmapbios(void *p, vm_size_t size) size = round_page(offset + size); va = trunc_page(va); - pde = pmap_pde(kernel_pmap, va, &lvl); - KASSERT(pde != NULL, - ("pmap_unmapbios: Invalid page entry, va: 0x%lx", va)); - KASSERT(lvl == 2, ("pmap_unmapbios: Invalid level %d", lvl)); - /* Unmap and invalidate the pages */ -for (tmpsize = 0; tmpsize < size; tmpsize += PAGE_SIZE) - pmap_kremove(va + tmpsize); + pmap_kremove_device(va, size); kva_free(va, size); }
git: 7b1e606c7222 - main - arm64 pmap: Retire PMAP_INLINE
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=7b1e606c7acdaea613924f566ffe9b65c068 commit 7b1e606c7acdaea613924f566ffe9b65c068 Author: Alan Cox AuthorDate: 2023-07-22 17:55:43 + Commit: Alan Cox CommitDate: 2023-07-23 05:34:17 + arm64 pmap: Retire PMAP_INLINE Neither of the remaining callers to pmap_kremove() warrant inlining. Those calls rarely occur. In other words, we were optimizing for the uncommon case. MFC after: 1 week --- sys/arm64/arm64/pmap.c | 12 +--- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c index dfed0142f273..379296f375ae 100644 --- a/sys/arm64/arm64/pmap.c +++ b/sys/arm64/arm64/pmap.c @@ -170,16 +170,6 @@ __FBSDID("$FreeBSD$"); #defineNUL1E (NUL0E * NL1PG) #defineNUL2E (NUL1E * NL2PG) -#if !defined(DIAGNOSTIC) -#ifdef __GNUC_GNU_INLINE__ -#define PMAP_INLINE__attribute__((__gnu_inline__)) inline -#else -#define PMAP_INLINEextern inline -#endif -#else -#define PMAP_INLINE -#endif - #ifdef PV_STATS #define PV_STAT(x) do { x ; } while (0) #define __pvused @@ -2022,7 +2012,7 @@ pmap_kenter_device(vm_offset_t sva, vm_size_t size, vm_paddr_t pa) /* * Remove a page from the kernel pagetables. */ -PMAP_INLINE void +void pmap_kremove(vm_offset_t va) { pt_entry_t *pte;
git: 50d663b14b31 - main - vm: Fix vm_map_find_min()
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=50d663b14b310d6020b4b6cc92d4fae985f086f2 commit 50d663b14b310d6020b4b6cc92d4fae985f086f2 Author: Alan Cox AuthorDate: 2023-07-25 07:24:19 + Commit: Alan Cox CommitDate: 2023-07-26 05:24:50 + vm: Fix vm_map_find_min() Fix the handling of address hints that are less than min_addr by vm_map_find_min(). Reported by:dchagin Reviewed by:kib Fixes: d8e6f4946cec0 "vm: Fix anonymous memory clustering under ASLR" Differential Revision: https://reviews.freebsd.org/D41159 --- sys/vm/vm_map.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c index 444e09986d4e..eb607d519247 100644 --- a/sys/vm/vm_map.c +++ b/sys/vm/vm_map.c @@ -2255,10 +2255,10 @@ vm_map_find_min(vm_map_t map, vm_object_t object, vm_ooffset_t offset, int rv; hint = *addr; - if (hint == 0) + if (hint == 0) { cow |= MAP_NO_HINT; - if (hint < min_addr) *addr = hint = min_addr; + } for (;;) { rv = vm_map_find(map, object, offset, addr, length, max_addr, find_space, prot, max, cow);
git: a98a0090b2ba - main - arm64 pmap: Eliminate unnecessary TLB invalidations
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=a98a0090b2ba64ff0bc3cf71a00fb5f9e31fc1d3 commit a98a0090b2ba64ff0bc3cf71a00fb5f9e31fc1d3 Author: Alan Cox AuthorDate: 2023-07-23 07:11:43 + Commit: Alan Cox CommitDate: 2023-07-26 05:37:13 + arm64 pmap: Eliminate unnecessary TLB invalidations Eliminate unnecessary TLB invalidations by pmap_kenter(), pmap_qenter(), and pmap_mapbios() when the old page table entries were invalid. While I'm here, correct some nearby whitespace issues. MFC after: 2 weeks --- sys/arm64/arm64/pmap.c | 49 ++--- 1 file changed, 38 insertions(+), 11 deletions(-) diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c index 379296f375ae..fa09d2026550 100644 --- a/sys/arm64/arm64/pmap.c +++ b/sys/arm64/arm64/pmap.c @@ -1972,19 +1972,20 @@ void pmap_kenter(vm_offset_t sva, vm_size_t size, vm_paddr_t pa, int mode) { pd_entry_t *pde; - pt_entry_t *pte, attr; + pt_entry_t attr, old_l3e, *pte; vm_offset_t va; int lvl; KASSERT((pa & L3_OFFSET) == 0, - ("pmap_kenter: Invalid physical address")); + ("pmap_kenter: Invalid physical address")); KASSERT((sva & L3_OFFSET) == 0, - ("pmap_kenter: Invalid virtual address")); + ("pmap_kenter: Invalid virtual address")); KASSERT((size & PAGE_MASK) == 0, ("pmap_kenter: Mapping is not page-sized")); attr = ATTR_DEFAULT | ATTR_S1_AP(ATTR_S1_AP_RW) | ATTR_S1_XN | ATTR_S1_IDX(mode) | L3_PAGE; + old_l3e = 0; va = sva; while (size != 0) { pde = pmap_pde(kernel_pmap, va, &lvl); @@ -1993,13 +1994,21 @@ pmap_kenter(vm_offset_t sva, vm_size_t size, vm_paddr_t pa, int mode) KASSERT(lvl == 2, ("pmap_kenter: Invalid level %d", lvl)); pte = pmap_l2_to_l3(pde, va); - pmap_load_store(pte, PHYS_TO_PTE(pa) | attr); + old_l3e |= pmap_load_store(pte, PHYS_TO_PTE(pa) | attr); va += PAGE_SIZE; pa += PAGE_SIZE; size -= PAGE_SIZE; } - pmap_s1_invalidate_range(kernel_pmap, sva, va, true); + if ((old_l3e & ATTR_DESCR_VALID) != 0) + pmap_s1_invalidate_range(kernel_pmap, sva, va, true); + else { + /* +* Because the old entries were invalid and the new mappings +* are not executable, an isb is not required. +*/ + dsb(ishst); + } } void @@ -2082,11 +2091,12 @@ void pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count) { pd_entry_t *pde; - pt_entry_t *pte, pa, attr; + pt_entry_t attr, old_l3e, pa, *pte; vm_offset_t va; vm_page_t m; int i, lvl; + old_l3e = 0; va = sva; for (i = 0; i < count; i++) { pde = pmap_pde(kernel_pmap, va, &lvl); @@ -2100,11 +2110,19 @@ pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count) attr = ATTR_DEFAULT | ATTR_S1_AP(ATTR_S1_AP_RW) | ATTR_S1_XN | ATTR_S1_IDX(m->md.pv_memattr) | L3_PAGE; pte = pmap_l2_to_l3(pde, va); - pmap_load_store(pte, PHYS_TO_PTE(pa) | attr); + old_l3e |= pmap_load_store(pte, PHYS_TO_PTE(pa) | attr); va += L3_SIZE; } - pmap_s1_invalidate_range(kernel_pmap, sva, va, true); + if ((old_l3e & ATTR_DESCR_VALID) != 0) + pmap_s1_invalidate_range(kernel_pmap, sva, va, true); + else { + /* +* Because the old entries were invalid and the new mappings +* are not executable, an isb is not required. +*/ + dsb(ishst); + } } /* @@ -6441,7 +6459,7 @@ pmap_mapbios(vm_paddr_t pa, vm_size_t size) { struct pmap_preinit_mapping *ppim; vm_offset_t va, offset; - pd_entry_t *pde; + pd_entry_t old_l2e, *pde; pt_entry_t *l2; int i, lvl, l2_blocks, free_l2_count, start_idx; @@ -6501,6 +6519,7 @@ pmap_mapbios(vm_paddr_t pa, vm_size_t size) /* Map L2 blocks */ pa = rounddown2(pa, L2_SIZE); + old_l2e = 0; for (i = 0; i < l2_blocks; i++) { pde = pmap_pde(kernel_pmap, va, &lvl); KASSERT(pde != NULL, @@ -6511,14 +6530,22 @@ pmap_mapbios(vm_paddr_t pa, vm_size_t size) /* Insert L2_BLOCK */ l2 = pmap_l1_to_l2(pde, va); - pmap_load_store(l2, + old_l2e |= pmap_load_store(l2, PHYS_TO_PTE(pa) | ATTR_DEFAULT | ATTR_S1_XN |
git: 5ec2d94ade51 - main - vm_mmap_object: Update the spelling of true/false
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=5ec2d94ade51b2f2f129cf0c7f695582c7dccb81 commit 5ec2d94ade51b2f2f129cf0c7f695582c7dccb81 Author: Alan Cox AuthorDate: 2023-07-26 05:58:51 + Commit: Alan Cox CommitDate: 2023-07-27 05:25:53 + vm_mmap_object: Update the spelling of true/false Since fitit is already a bool, use true/false instead of TRUE/FALSE. MFC after: 2 weeks --- sys/vm/vm_mmap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sys/vm/vm_mmap.c b/sys/vm/vm_mmap.c index 408e077476dd..328fef007b1e 100644 --- a/sys/vm/vm_mmap.c +++ b/sys/vm/vm_mmap.c @@ -1577,12 +1577,12 @@ vm_mmap_object(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, return (EINVAL); if ((flags & MAP_FIXED) == 0) { - fitit = TRUE; + fitit = true; *addr = round_page(*addr); } else { if (*addr != trunc_page(*addr)) return (EINVAL); - fitit = FALSE; + fitit = false; } if (flags & MAP_ANON) {
git: 3d7c37425ee0 - main - amd64 pmap: Catch up with pctrie changes
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=3d7c37425ee07186c65d424306c1b295c30fa592 commit 3d7c37425ee07186c65d424306c1b295c30fa592 Author: Alan Cox AuthorDate: 2023-07-28 20:13:13 + Commit: Alan Cox CommitDate: 2023-07-28 20:13:13 + amd64 pmap: Catch up with pctrie changes Recent changes to the pctrie code make it necessary to initialize the kernel pmap's rangeset for PKU. --- sys/amd64/amd64/pmap.c | 4 1 file changed, 4 insertions(+) diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index a4b8c6dc4c06..c1968fc11844 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -1995,6 +1995,10 @@ pmap_bootstrap(vm_paddr_t *firstaddr) kernel_pmap->pm_stats.resident_count = res; vm_radix_init(&kernel_pmap->pm_root); kernel_pmap->pm_flags = pmap_flags; + if ((cpu_stdext_feature2 & CPUID_STDEXT2_PKU) != 0) { + rangeset_init(&kernel_pmap->pm_pkru, pkru_dup_range, + pkru_free_range, kernel_pmap, M_NOWAIT); + } /* * The kernel pmap is always active on all CPUs. Once CPUs are
Re: git: 50d663b14b31 - main - vm: Fix vm_map_find_min()
I see. That change fixed the case where the address hint is non-zero, e.g., 0x10, but not zero. On 7/30/23 05:58, Dmitry Chagin wrote: On Sun, Jul 30, 2023 at 01:30:37PM +0300, Dmitry Chagin wrote: On Wed, Jul 26, 2023 at 05:25:37AM +, Alan Cox wrote: The branch main has been updated by alc: URL: https://urldefense.com/v3/__https://cgit.FreeBSD.org/src/commit/?id=50d663b14b310d6020b4b6cc92d4fae985f086f2__;!!BuQPrrmRaQ!mJGmkdRJ06TT6ocFnVe7lPS7hSNIIhrCQH9IKMbB6XZVJuiUc2_wPJ55o1zzD6AhClmQwgQKHvKnW4rs75yVrtax$ commit 50d663b14b310d6020b4b6cc92d4fae985f086f2 Author: Alan Cox AuthorDate: 2023-07-25 07:24:19 + Commit: Alan Cox CommitDate: 2023-07-26 05:24:50 + vm: Fix vm_map_find_min() Fix the handling of address hints that are less than min_addr by vm_map_find_min(). Thank you for fixing that, however it still fails under Linuxulator. #include #include #include #include #include #include #include #include int main(int argc, char** argv) { struct stat sb; void *s32; int f, r; f = open(argv[0], O_RDONLY); assert(f > 0); r = fstat(f, &sb); assert(r == 0); s32 = mmap(NULL, sb.st_size, PROT_READ, MAP_32BIT|MAP_PRIVATE, f, 0); assert(s32 != MAP_FAILED); assert((uintptr_t)s32 < 0x8000); close(f); munmap(s32, sb.st_size); return (0); } hmm, it also fails natively with disable aslr Reported by:dchagin Reviewed by:kib Fixes: d8e6f4946cec0 "vm: Fix anonymous memory clustering under ASLR" Differential Revision: https://urldefense.com/v3/__https://reviews.freebsd.org/D41159__;!!BuQPrrmRaQ!mJGmkdRJ06TT6ocFnVe7lPS7hSNIIhrCQH9IKMbB6XZVJuiUc2_wPJ55o1zzD6AhClmQwgQKHvKnW4rs70ygLqzX$ --- sys/vm/vm_map.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c index 444e09986d4e..eb607d519247 100644 --- a/sys/vm/vm_map.c +++ b/sys/vm/vm_map.c @@ -2255,10 +2255,10 @@ vm_map_find_min(vm_map_t map, vm_object_t object, vm_ooffset_t offset, int rv; hint = *addr; - if (hint == 0) + if (hint == 0) { cow |= MAP_NO_HINT; - if (hint < min_addr) *addr = hint = min_addr; + } for (;;) { rv = vm_map_find(map, object, offset, addr, length, max_addr, find_space, prot, max, cow);
git: 37e5d49e1e5e - main - vm: Fix address hints of 0 with MAP_32BIT
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=37e5d49e1e5e750bf2a200ef2e117d14c4e9a578 commit 37e5d49e1e5e750bf2a200ef2e117d14c4e9a578 Author: Alan Cox AuthorDate: 2023-08-03 07:07:14 + Commit: Alan Cox CommitDate: 2023-08-12 07:35:21 + vm: Fix address hints of 0 with MAP_32BIT Also, rename min_addr to default_addr, which better reflects what it represents. The min_addr is not a minimum address in the same way that max_addr is actually a maximum address that can be allocated. For example, a non-zero hint can be less than min_addr and be allocated. Reported by:dchagin Reviewed by:dchagin, kib, markj Fixes: d8e6f4946cec0 "vm: Fix anonymous memory clustering under ASLR" Differential Revision: https://reviews.freebsd.org/D41397 --- sys/vm/vm_map.c | 16 sys/vm/vm_mmap.c | 14 ++ 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c index 8d98af7709cd..c77c00b8b5c6 100644 --- a/sys/vm/vm_map.c +++ b/sys/vm/vm_map.c @@ -2255,19 +2255,19 @@ done: /* * vm_map_find_min() is a variant of vm_map_find() that takes an - * additional parameter (min_addr) and treats the given address - * (*addr) differently. Specifically, it treats *addr as a hint + * additional parameter ("default_addr") and treats the given address + * ("*addr") differently. Specifically, it treats "*addr" as a hint * and not as the minimum address where the mapping is created. * * This function works in two phases. First, it tries to * allocate above the hint. If that fails and the hint is - * greater than min_addr, it performs a second pass, replacing - * the hint with min_addr as the minimum address for the + * greater than "default_addr", it performs a second pass, replacing + * the hint with "default_addr" as the minimum address for the * allocation. */ int vm_map_find_min(vm_map_t map, vm_object_t object, vm_ooffset_t offset, -vm_offset_t *addr, vm_size_t length, vm_offset_t min_addr, +vm_offset_t *addr, vm_size_t length, vm_offset_t default_addr, vm_offset_t max_addr, int find_space, vm_prot_t prot, vm_prot_t max, int cow) { @@ -2277,14 +2277,14 @@ vm_map_find_min(vm_map_t map, vm_object_t object, vm_ooffset_t offset, hint = *addr; if (hint == 0) { cow |= MAP_NO_HINT; - *addr = hint = min_addr; + *addr = hint = default_addr; } for (;;) { rv = vm_map_find(map, object, offset, addr, length, max_addr, find_space, prot, max, cow); - if (rv == KERN_SUCCESS || min_addr >= hint) + if (rv == KERN_SUCCESS || default_addr >= hint) return (rv); - *addr = hint = min_addr; + *addr = hint = default_addr; } } diff --git a/sys/vm/vm_mmap.c b/sys/vm/vm_mmap.c index 7876a055ca91..d904c4f38e40 100644 --- a/sys/vm/vm_mmap.c +++ b/sys/vm/vm_mmap.c @@ -1555,7 +1555,7 @@ vm_mmap_object(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, vm_prot_t maxprot, int flags, vm_object_t object, vm_ooffset_t foff, boolean_t writecounted, struct thread *td) { - vm_offset_t max_addr; + vm_offset_t default_addr, max_addr; int docow, error, findspace, rv; bool curmap, fitit; @@ -1630,10 +1630,16 @@ vm_mmap_object(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, max_addr = MAP_32BIT_MAX_ADDR; #endif if (curmap) { - rv = vm_map_find_min(map, object, foff, addr, size, + default_addr = round_page((vm_offset_t)td->td_proc->p_vmspace-> - vm_daddr + lim_max(td, RLIMIT_DATA)), max_addr, - findspace, prot, maxprot, docow); + vm_daddr + lim_max(td, RLIMIT_DATA)); +#ifdef MAP_32BIT + if ((flags & MAP_32BIT) != 0) + default_addr = 0; +#endif + rv = vm_map_find_min(map, object, foff, addr, size, + default_addr, max_addr, findspace, prot, maxprot, + docow); } else { rv = vm_map_find(map, object, foff, addr, size, max_addr, findspace, prot, maxprot, docow);
git: 902ed64fecbe - main - i386 pmap: Adapt recent amd64/arm64 superpage improvements
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=902ed64fecbe078e1cdd527b97af3958b413da11 commit 902ed64fecbe078e1cdd527b97af3958b413da11 Author: Alan Cox AuthorDate: 2023-09-24 18:21:36 + Commit: Alan Cox CommitDate: 2023-09-26 17:41:20 + i386 pmap: Adapt recent amd64/arm64 superpage improvements Don't recompute mpte during promotion. Optimize MADV_WILLNEED on existing superpages. Standardize promotion conditions across amd64, arm64, and i386. Stop requiring the accessed bit for superpage promotion. Tidy up pmap_promote_pde() calls. Retire PMAP_INLINE. It's no longer used. Note: Some of these changes are a prerequisite to fixing a panic that arises when attempting to create a wired superpage mapping by pmap_enter(psind=1) (as opposed to promotion). Reviewed by:kib MFC after: 1 week Differential Revision: https://reviews.freebsd.org/D41944 --- sys/i386/i386/pmap.c| 200 sys/i386/include/pmap.h | 2 +- 2 files changed, 137 insertions(+), 65 deletions(-) diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c index 4198849b1a5a..2d19fc51dd53 100644 --- a/sys/i386/i386/pmap.c +++ b/sys/i386/i386/pmap.c @@ -159,16 +159,6 @@ #endif #include -#if !defined(DIAGNOSTIC) -#ifdef __GNUC_GNU_INLINE__ -#define PMAP_INLINE__attribute__((__gnu_inline__)) inline -#else -#define PMAP_INLINEextern inline -#endif -#else -#define PMAP_INLINE -#endif - #ifdef PV_STATS #define PV_STAT(x) do { x ; } while (0) #else @@ -311,13 +301,14 @@ static intpmap_pvh_wired_mappings(struct md_page *pvh, int count); static voidpmap_abort_ptp(pmap_t pmap, vm_offset_t va, vm_page_t mpte); static boolean_t pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va); -static boolpmap_enter_4mpage(pmap_t pmap, vm_offset_t va, vm_page_t m, +static int pmap_enter_4mpage(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot); static int pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t newpde, u_int flags, vm_page_t m); static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, vm_page_t mpte); -static int pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte, bool promoted); +static int pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte, bool promoted, +bool allpte_PG_A_set); static void pmap_invalidate_pde_page(pmap_t pmap, vm_offset_t va, pd_entry_t pde); static void pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte); @@ -327,7 +318,8 @@ static void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode); static void pmap_kenter_pde(vm_offset_t va, pd_entry_t newpde); static void pmap_pde_attr(pd_entry_t *pde, int cache_bits); #if VM_NRESERVLEVEL > 0 -static void pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va); +static bool pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, +vm_page_t mpte); #endif static boolean_t pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t sva, vm_prot_t prot); @@ -993,7 +985,7 @@ __CONCAT(PMTYPE, init)(void) */ if (pseflag != 0 && KERNBASE <= i << PDRSHIFT && i << PDRSHIFT < KERNend && - pmap_insert_pt_page(kernel_pmap, mpte, true)) + pmap_insert_pt_page(kernel_pmap, mpte, true, true)) panic("pmap_init: pmap_insert_pt_page failed"); } PMAP_UNLOCK(kernel_pmap); @@ -1928,14 +1920,26 @@ pmap_add_delayed_free_list(vm_page_t m, struct spglist *free, * for mapping a distinct range of virtual addresses. The pmap's collection is * ordered by this virtual address range. * - * If "promoted" is false, then the page table page "mpte" must be zero filled. + * If "promoted" is false, then the page table page "mpte" must be zero filled; + * "mpte"'s valid field will be set to 0. + * + * If "promoted" is true and "allpte_PG_A_set" is false, then "mpte" must + * contain valid mappings with identical attributes except for PG_A; "mpte"'s + * valid field will be set to 1. + * + * If "promoted" and "allpte_PG_A_set" are both true, then "mpte" must contain + * valid mappings with identical attributes including PG_A; "mpte"'s valid + * field will be set to VM_PAGE_BITS_ALL. */ static __inline int -pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte, bool promoted) +pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte, bool promoted, +bool allpte_PG_A_set) { PMAP_LOCK_ASSERT(pmap, MA_OWNED); - mpte->valid = promoted ? VM_PAGE_BITS_ALL : 0; +
git: 2001bef84ba6 - main - vm: Eliminate unnecessary lock asserts
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=2001bef84ba64cee51abf91b5ad3aca071e75788 commit 2001bef84ba64cee51abf91b5ad3aca071e75788 Author: Alan Cox AuthorDate: 2024-10-27 17:40:43 + Commit: Alan Cox CommitDate: 2024-10-27 19:03:52 + vm: Eliminate unnecessary lock asserts There is no actual need for the VM object to be locked when initializing a VM page iterator. Reviewed by:dougm Differential Revision: https://reviews.freebsd.org/D47298 --- sys/vm/vm_page.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c index 054832e3f19a..57e5684b3178 100644 --- a/sys/vm/vm_page.c +++ b/sys/vm/vm_page.c @@ -1715,7 +1715,6 @@ void vm_page_iter_init(struct pctrie_iter *pages, vm_object_t object) { - VM_OBJECT_ASSERT_LOCKED(object); vm_radix_iter_init(pages, &object->rtree); } @@ -1729,7 +1728,6 @@ vm_page_iter_limit_init(struct pctrie_iter *pages, vm_object_t object, vm_pindex_t limit) { - VM_OBJECT_ASSERT_LOCKED(object); vm_radix_iter_limit_init(pages, &object->rtree, limit); }
git: fd630ae93634 - main - vm: Retire an unused declaration
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=fd630ae93634b3c7410a390c57408685caf8d937 commit fd630ae93634b3c7410a390c57408685caf8d937 Author: Alan Cox AuthorDate: 2024-11-24 19:23:48 + Commit: Alan Cox CommitDate: 2024-11-27 08:14:58 + vm: Retire an unused declaration The bio_transient_map was long ago replaced by a vmem arena. Reviewed by:kib, markj Differential Revision: https://reviews.freebsd.org/D47729 --- sys/sys/bio.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/sys/sys/bio.h b/sys/sys/bio.h index 1de841681710..74d2b03bd180 100644 --- a/sys/sys/bio.h +++ b/sys/sys/bio.h @@ -75,7 +75,6 @@ #ifdef _KERNEL struct disk; struct bio; -struct vm_map; typedef void bio_task_t(void *); @@ -144,7 +143,6 @@ struct bio_queue_head { int batched; }; -extern struct vm_map *bio_transient_map; extern int bio_transient_maxcnt; void biodone(struct bio *bp);
git: c296ac7e0f1c - main - vm: Optimize page rename
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=c296ac7e0f1c2fc9bc8bcab0177afb123ce6993a commit c296ac7e0f1c2fc9bc8bcab0177afb123ce6993a Author: Alan Cox AuthorDate: 2024-11-27 08:32:07 + Commit: Alan Cox CommitDate: 2024-11-30 08:59:15 + vm: Optimize page rename Rename vm_page_rename() to vm_page_iter_rename() to reflect its reimplementation using iterators, and pass the page to this function rather than spending clock cycles looking it up. Change its return value from 0/1 to a bool. Reviewed by:dougm, markj Differential Revision: https://reviews.freebsd.org/D47829 --- sys/vm/vm_object.c | 10 ++ sys/vm/vm_page.c | 28 sys/vm/vm_page.h | 3 ++- sys/vm/vm_reserv.c | 4 ++-- 4 files changed, 26 insertions(+), 19 deletions(-) diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c index 84981d7cc7cd..ff95469749b7 100644 --- a/sys/vm/vm_object.c +++ b/sys/vm/vm_object.c @@ -1604,8 +1604,9 @@ retry: continue; } - /* vm_page_rename() will dirty the page. */ - if (vm_page_rename(&pages, new_object, m->pindex - offidxstart)) { + /* vm_page_iter_rename() will dirty the page. */ + if (!vm_page_iter_rename(&pages, m, new_object, m->pindex - + offidxstart)) { vm_page_xunbusy(m); VM_OBJECT_WUNLOCK(new_object); VM_OBJECT_WUNLOCK(orig_object); @@ -1789,9 +1790,10 @@ vm_object_collapse_scan(vm_object_t object) * backing object to the main object. * * If the page was mapped to a process, it can remain mapped -* through the rename. vm_page_rename() will dirty the page. +* through the rename. vm_page_iter_rename() will dirty the +* page. */ - if (vm_page_rename(&pages, object, new_pindex)) { + if (!vm_page_iter_rename(&pages, p, object, new_pindex)) { vm_page_xunbusy(p); next = vm_object_collapse_scan_wait(&pages, object, NULL); diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c index a37619c7743e..8a23f900e987 100644 --- a/sys/vm/vm_page.c +++ b/sys/vm/vm_page.c @@ -2065,10 +2065,14 @@ vm_page_replace(vm_page_t mnew, vm_object_t object, vm_pindex_t pindex, } /* - * vm_page_rename: + * vm_page_iter_rename: * - * Move the current page, as identified by iterator, from its current - * object to the specified target object/offset. + * Tries to move the specified page from its current object to a new object + * and pindex, using the given iterator to remove the page from its current + * object. Returns true if the move was successful, and false if the move + * was aborted due to a failed memory allocation. + * + * Panics if a page already resides in the new object at the new pindex. * * Note: swap associated with the page must be invalidated by the move. We * have to do this for several reasons: (1) we aren't freeing the @@ -2082,18 +2086,18 @@ vm_page_replace(vm_page_t mnew, vm_object_t object, vm_pindex_t pindex, * * The objects must be locked. */ -int -vm_page_rename(struct pctrie_iter *pages, +bool +vm_page_iter_rename(struct pctrie_iter *old_pages, vm_page_t m, vm_object_t new_object, vm_pindex_t new_pindex) { - vm_page_t m, mpred; + vm_page_t mpred; vm_pindex_t opidx; + KASSERT((m->ref_count & VPRC_OBJREF) != 0, + ("%s: page %p is missing object ref", __func__, m)); + VM_OBJECT_ASSERT_WLOCKED(m->object); VM_OBJECT_ASSERT_WLOCKED(new_object); - m = vm_radix_iter_page(pages); - KASSERT(m->ref_count != 0, ("vm_page_rename: page %p has no refs", m)); - /* * Create a custom version of vm_page_insert() which does not depend * by m_prev and can cheat on the implementation aspects of the @@ -2103,7 +2107,7 @@ vm_page_rename(struct pctrie_iter *pages, m->pindex = new_pindex; if (vm_radix_insert_lookup_lt(&new_object->rtree, m, &mpred) != 0) { m->pindex = opidx; - return (1); + return (false); } /* @@ -2111,7 +2115,7 @@ vm_page_rename(struct pctrie_iter *pages, * the listq iterator is tainted. */ m->pindex = opidx; - vm_radix_iter_remove(pages); + vm_radix_iter_remove(old_pages); vm_page_remove_radixdone(m); /* Return back to the new pindex to complete vm_page_insert(). */ @@ -2121,7 +2125,7 @@ vm_page_rename(struct pctrie_iter *pages, vm_page_insert_radixdone(m, new_object, mpred);
git: 8c8d36b9d172 - main - vm: static-ize vm_page_alloc_after()
The branch main has been updated by alc: URL: https://cgit.FreeBSD.org/src/commit/?id=8c8d36b9d17239dc4e54731b6cf54c9f9fce43a9 commit 8c8d36b9d17239dc4e54731b6cf54c9f9fce43a9 Author: Alan Cox AuthorDate: 2024-11-16 22:20:14 + Commit: Alan Cox CommitDate: 2024-11-17 18:19:00 + vm: static-ize vm_page_alloc_after() This function is only intended for the internal use of the VM system. Reviewed by:dougm, kib, markj Differential Revision: https://reviews.freebsd.org/D47644 --- sys/vm/vm_page.c | 4 +++- sys/vm/vm_page.h | 1 - 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c index 6b49f0745c73..0b9b55337b52 100644 --- a/sys/vm/vm_page.c +++ b/sys/vm/vm_page.c @@ -162,6 +162,8 @@ SYSCTL_PROC(_vm, OID_AUTO, page_blacklist, CTLTYPE_STRING | CTLFLAG_RD | static uma_zone_t fakepg_zone; +static vm_page_t vm_page_alloc_after(vm_object_t object, vm_pindex_t pindex, +int req, vm_page_t mpred); static void vm_page_alloc_check(vm_page_t m); static vm_page_t vm_page_alloc_nofree_domain(int domain, int req); static bool _vm_page_busy_sleep(vm_object_t obj, vm_page_t m, @@ -2085,7 +2087,7 @@ vm_page_alloc(vm_object_t object, vm_pindex_t pindex, int req) * the resident page in the object with largest index smaller than the given * page index, or NULL if no such page exists. */ -vm_page_t +static vm_page_t vm_page_alloc_after(vm_object_t object, vm_pindex_t pindex, int req, vm_page_t mpred) { diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h index b85342b784de..893608bcacf1 100644 --- a/sys/vm/vm_page.h +++ b/sys/vm/vm_page.h @@ -608,7 +608,6 @@ void vm_page_activate (vm_page_t); void vm_page_advise(vm_page_t m, int advice); vm_page_t vm_page_mpred(vm_object_t, vm_pindex_t); vm_page_t vm_page_alloc(vm_object_t, vm_pindex_t, int); -vm_page_t vm_page_alloc_after(vm_object_t, vm_pindex_t, int, vm_page_t); vm_page_t vm_page_alloc_domain_after(vm_object_t, vm_pindex_t, int, int, vm_page_t); vm_page_t vm_page_alloc_contig(vm_object_t object, vm_pindex_t pindex, int req,