git: 5ee5c40402c9 - main - arm64 pmap: Defer bti lookup

2024-06-08 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=5ee5c40402c92a498ed8d6eeb6cf0b5c1680817b

commit 5ee5c40402c92a498ed8d6eeb6cf0b5c1680817b
Author: Alan Cox 
AuthorDate: 2024-06-07 05:23:59 +
Commit: Alan Cox 
CommitDate: 2024-06-08 07:26:55 +

arm64 pmap: Defer bti lookup

Defer the bti lookup until after page table page allocation is complete.
We sometimes release the pmap lock and sleep during page table page
allocation.  Consequently, the result of a bti lookup from before
page table page allocation could be stale when we finally create the
mapping based on it.

Modify pmap_bti_same() to update the prototype PTE at the same time as
checking the address range.  This eliminates the need for calling
pmap_pte_bti() in addition to pmap_bti_same().  pmap_bti_same() was
already doing most of the work of pmap_pte_bti().

Reviewed by:markj
Differential Revision:  https://reviews.freebsd.org/D45502
---
 sys/arm64/arm64/pmap.c | 73 ++
 1 file changed, 44 insertions(+), 29 deletions(-)

diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index 92c1c824ba4e..7b30b2a6ae37 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -508,7 +508,8 @@ static void pmap_update_entry(pmap_t pmap, pd_entry_t *pte, 
pd_entry_t newpte,
 static __inline vm_page_t pmap_remove_pt_page(pmap_t pmap, vm_offset_t va);
 
 static uma_zone_t pmap_bti_ranges_zone;
-static bool pmap_bti_same(pmap_t pmap, vm_offset_t sva, vm_offset_t eva);
+static bool pmap_bti_same(pmap_t pmap, vm_offset_t sva, vm_offset_t eva,
+pt_entry_t *pte);
 static pt_entry_t pmap_pte_bti(pmap_t pmap, vm_offset_t va);
 static void pmap_bti_on_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva);
 static void *bti_dup_range(void *ctx, void *data);
@@ -4955,21 +4956,22 @@ set_l3:
 #endif /* VM_NRESERVLEVEL > 0 */
 
 static int
-pmap_enter_largepage(pmap_t pmap, vm_offset_t va, pt_entry_t newpte, int flags,
+pmap_enter_largepage(pmap_t pmap, vm_offset_t va, pt_entry_t pte, int flags,
 int psind)
 {
-   pd_entry_t *l0p, *l1p, *l2p, origpte;
+   pd_entry_t *l0p, *l1p, *l2p, newpte, origpte;
vm_page_t mp;
 
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
KASSERT(psind > 0 && psind < MAXPAGESIZES,
("psind %d unexpected", psind));
-   KASSERT((PTE_TO_PHYS(newpte) & (pagesizes[psind] - 1)) == 0,
-   ("unaligned phys address %#lx newpte %#lx psind %d",
-   PTE_TO_PHYS(newpte), newpte, psind));
+   KASSERT((PTE_TO_PHYS(pte) & (pagesizes[psind] - 1)) == 0,
+   ("unaligned phys address %#lx pte %#lx psind %d",
+   PTE_TO_PHYS(pte), pte, psind));
 
 restart:
-   if (!pmap_bti_same(pmap, va, va + pagesizes[psind]))
+   newpte = pte;
+   if (!pmap_bti_same(pmap, va, va + pagesizes[psind], &newpte))
return (KERN_PROTECTION_FAILURE);
if (psind == 2) {
PMAP_ASSERT_L1_BLOCKS_SUPPORTED;
@@ -5123,9 +5125,6 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, 
vm_prot_t prot,
 
lock = NULL;
PMAP_LOCK(pmap);
-   /* Wait until we lock the pmap to protect the bti rangeset */
-   new_l3 |= pmap_pte_bti(pmap, va);
-
if ((flags & PMAP_ENTER_LARGEPAGE) != 0) {
KASSERT((m->oflags & VPO_UNMANAGED) != 0,
("managed largepage va %#lx flags %#x", va, flags));
@@ -5197,6 +5196,7 @@ havel3:
orig_l3 = pmap_load(l3);
opa = PTE_TO_PHYS(orig_l3);
pv = NULL;
+   new_l3 |= pmap_pte_bti(pmap, va);
 
/*
 * Is the specified virtual address already mapped?
@@ -5405,7 +5405,6 @@ pmap_enter_l2_rx(pmap_t pmap, vm_offset_t va, vm_page_t 
m, vm_prot_t prot,
new_l2 = (pd_entry_t)(VM_PAGE_TO_PTE(m) | ATTR_DEFAULT |
ATTR_S1_IDX(m->md.pv_memattr) | ATTR_S1_AP(ATTR_S1_AP_RO) |
L2_BLOCK);
-   new_l2 |= pmap_pte_bti(pmap, va);
if ((m->oflags & VPO_UNMANAGED) == 0) {
new_l2 |= ATTR_SW_MANAGED;
new_l2 &= ~ATTR_AF;
@@ -5478,7 +5477,7 @@ pmap_enter_l2(pmap_t pmap, vm_offset_t va, pd_entry_t 
new_l2, u_int flags,
 * and let vm_fault() cope.  Check after l2 allocation, since
 * it could sleep.
 */
-   if (!pmap_bti_same(pmap, va, va + L2_SIZE)) {
+   if (!pmap_bti_same(pmap, va, va + L2_SIZE, &new_l2)) {
KASSERT(l2pg != NULL, ("pmap_enter_l2: missing L2 PTP"));
pmap_abort_ptp(pmap, va, l2pg);
return (KERN_PROTECTION_FAILURE);
@@ -5633,7 +5632,6 @@ pmap_enter_l3c_rx(pmap_t pmap, vm_offset_t va, vm_page_t 
m, vm_page_t *ml3p,
l3e = VM_PAGE_TO_PTE(m) | ATTR_DEFAULT |
ATTR_S1_IDX(m->md.pv_memattr) | ATTR_S1_AP(ATTR_S

git: 268f19aacc6a - main - vm: Reduce address space fragmentation

2024-06-13 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=268f19aacc6af8f64c438e8515213023a2e66ed7

commit 268f19aacc6af8f64c438e8515213023a2e66ed7
Author: Alan Cox 
AuthorDate: 2024-06-09 16:58:27 +
Commit: Alan Cox 
CommitDate: 2024-06-13 20:13:45 +

vm: Reduce address space fragmentation

jemalloc performs two types of virtual memory allocations: (1) large
chunks of virtual memory, where the chunk size is a multiple of a
superpage and explicitly aligned, and (2) small allocations, mostly
128KB, where no alignment is requested.  Typically, it starts with a
small allocation, and over time it makes both types of allocation.

With anon_loc being updated on every allocation, we wind up with a
repeating pattern of a small allocation, a large gap, and a large,
aligned allocation.  (As an aside, we wind up allocating a reservation
for these small allocations, but it will never fill because the next
large, aligned allocation updates anon_loc, leaving a gap that will
never be filled with other small allocations.)

With this change, anon_loc isn't updated on every allocation.  So, the
small allocations will be clustered together, the large allocations will
be clustered together, and there will be fewer gaps between the
anonymous memory allocations.  In addition, I see a small reduction in
reservations allocated (e.g., 1.6% during buildworld), fewer partially
populated reservations, and a small increase in 64KB page promotions on
arm64.

Reviewed by:kib
MFC after:  1 week
Differential Revision:  https://reviews.freebsd.org/D39845
---
 sys/vm/vm_map.c | 14 +-
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c
index 3c7afcb6642f..fa71bb8a01d6 100644
--- a/sys/vm/vm_map.c
+++ b/sys/vm/vm_map.c
@@ -2247,8 +2247,15 @@ again:
rv = vm_map_insert(map, object, offset, *addr, *addr + length,
prot, max, cow);
}
-   if (rv == KERN_SUCCESS && update_anon)
-   map->anon_loc = *addr + length;
+
+   /*
+* Update the starting address for clustered anonymous memory mappings
+* if a starting address was not previously defined or an ASLR restart
+* placed an anonymous memory mapping at a lower address.
+*/
+   if (update_anon && rv == KERN_SUCCESS && (map->anon_loc == 0 ||
+   *addr < map->anon_loc))
+   map->anon_loc = *addr;
 done:
vm_map_unlock(map);
return (rv);
@@ -4041,9 +4048,6 @@ vm_map_delete(vm_map_t map, vm_offset_t start, 
vm_offset_t end)
entry->object.vm_object != NULL)
pmap_map_delete(map->pmap, entry->start, entry->end);
 
-   if (entry->end == map->anon_loc)
-   map->anon_loc = entry->start;
-
/*
 * Delete the entry only after removing all pmap
 * entries pointing to its pages.  (Otherwise, its



git: 383fd3ea0012 - main - arm64: Handle an unaligned start in pmap_mask_set_locked()

2024-07-04 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=383fd3ea00128cf65fbea0e4cbdb9849945c854b

commit 383fd3ea00128cf65fbea0e4cbdb9849945c854b
Author: Alan Cox 
AuthorDate: 2024-07-03 05:15:35 +
Commit: Alan Cox 
CommitDate: 2024-07-05 05:42:52 +

arm64: Handle an unaligned start in pmap_mask_set_locked()

In pmap_mask_set_locked(), correctly handle a starting address that is
in the middle of an L3C page.  The symptoms arising from this error
included assertion failures in pmap_demote_l3c().

Reported by:andrew
Reviewed by:markj
Fixes:  fd6cb031f577 "arm64 pmap: Add ATTR_CONTIGUOUS support [Part 1]"
Differential Revision:  https://reviews.freebsd.org/D45851
---
 sys/arm64/arm64/pmap.c | 18 --
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index f4a46823428a..a9cb8c7fe468 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -4403,8 +4403,22 @@ pmap_mask_set_locked(pmap_t pmap, vm_offset_t sva, 
vm_offset_t eva, pt_entry_t m
va = va_next;
}
if ((l3 & ATTR_CONTIGUOUS) != 0) {
-   l3p += L3C_ENTRIES - 1;
-   sva += L3C_SIZE - L3_SIZE;
+   /*
+* Does this L3C page extend beyond
+* the requested range?  Handle the
+* possibility that "va_next" is zero.
+*/
+   if ((sva | L3C_OFFSET) > va_next - 1)
+   break;
+
+   /*
+* Skip ahead to the last L3_PAGE
+* within this L3C page.
+*/
+   l3p = (pt_entry_t *)((uintptr_t)l3p |
+   ((L3C_ENTRIES - 1) *
+   sizeof(pt_entry_t)));
+   sva |= L3C_SIZE - L3_SIZE;
}
continue;
}



git: fb32ba6aa44d - main - amd64/arm64: Eliminate unnecessary demotions in pmap_protect()

2024-07-06 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=fb32ba6aa44dc86e70ad06b44f93a9709e78f3d1

commit fb32ba6aa44dc86e70ad06b44f93a9709e78f3d1
Author: Alan Cox 
AuthorDate: 2024-07-05 18:20:01 +
Commit: Alan Cox 
CommitDate: 2024-07-06 20:48:10 +

amd64/arm64: Eliminate unnecessary demotions in pmap_protect()

In pmap_protect(), when the mapping isn't changing, we don't need to
perform a superpage demotion, even though the requested change doesn't
cover the entire superpage.

Reviewed by:kib
MFC after:  1 week
Differential Revision:  https://reviews.freebsd.org/D45886
---
 sys/amd64/amd64/pmap.c | 21 +
 sys/arm64/arm64/pmap.c |  3 ++-
 2 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index 57943e815b5b..2bcf671be243 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -6796,8 +6796,7 @@ retry_pdpe:
 */
if ((ptpaddr & PG_PS) != 0) {
/*
-* Are we protecting the entire large page?  If not,
-* demote the mapping and fall through.
+* Are we protecting the entire large page?
 */
if (sva + NBPDR == va_next && eva >= va_next) {
/*
@@ -6807,9 +6806,23 @@ retry_pdpe:
if (pmap_protect_pde(pmap, pde, sva, prot))
anychanged = true;
continue;
-   } else if (!pmap_demote_pde(pmap, pde, sva)) {
+   }
+
+   /*
+* Does the large page mapping need to change?  If so,
+* demote it and fall through.
+*/
+   pbits = ptpaddr;
+   if ((prot & VM_PROT_WRITE) == 0)
+   pbits &= ~(PG_RW | PG_M);
+   if ((prot & VM_PROT_EXECUTE) == 0)
+   pbits |= pg_nx;
+   if (ptpaddr == pbits || !pmap_demote_pde(pmap, pde,
+   sva)) {
/*
-* The large page mapping was destroyed.
+* Either the large page mapping doesn't need
+* to change, or it was destroyed during
+* demotion.
 */
continue;
}
diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index a9cb8c7fe468..29552f722aa4 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -4373,7 +4373,8 @@ pmap_mask_set_locked(pmap_t pmap, vm_offset_t sva, 
vm_offset_t eva, pt_entry_t m
if (sva + L2_SIZE == va_next && eva >= va_next) {
pmap_protect_l2(pmap, l2, sva, mask, nbits);
continue;
-   } else if (pmap_demote_l2(pmap, l2, sva) == NULL)
+   } else if ((pmap_load(l2) & mask) == nbits ||
+   pmap_demote_l2(pmap, l2, sva) == NULL)
continue;
}
KASSERT((pmap_load(l2) & ATTR_DESCR_MASK) == L2_TABLE,



git: 3e00c11a4f43 - main - arm64: Support the L3 ATTR_CONTIGUOUS page size in pagesizes[]

2024-07-13 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=3e00c11a4f43bf1c7b88d25638e2bfee399e7674

commit 3e00c11a4f43bf1c7b88d25638e2bfee399e7674
Author: Alan Cox 
AuthorDate: 2024-07-12 07:44:56 +
Commit: Alan Cox 
CommitDate: 2024-07-13 17:43:42 +

arm64: Support the L3 ATTR_CONTIGUOUS page size in pagesizes[]

Update pagesizes[] to include the L3 ATTR_CONTIGUOUS (L3C) page size,
which is 64KB when the base page size is 4KB and 2MB when the base page
size is 16KB.

Add support for L3C pages to shm_create_largepage().

Add support for creating L3C page mappings to pmap_enter(psind=1).

Add support for reporting L3C page mappings to mincore(2) and
procstat(8).

Update vm_fault_soft_fast() and vm_fault_populate() to handle multiple
superpage sizes.

Declare arm64 as supporting two superpage reservation sizes, and
simulate two superpage reservation sizes, updating the vm_page's psind
field to reflect the correct page size from pagesizes[].  (The next
patch in this series will replace this simulation.  This patch is
already big enough.)

Co-authored-by: Eliot Solomon 
Reviewed by:kib
Differential Revision:  https://reviews.freebsd.org/D45766
---
 share/man/man7/arch.7   |   2 +-
 sys/arm64/arm64/pmap.c  | 162 +---
 sys/arm64/include/param.h   |   2 +-
 sys/arm64/include/vmparam.h |  25 ---
 sys/kern/imgact_elf.c   |   8 ++-
 sys/kern/kern_mib.c |   8 ++-
 sys/kern/kern_proc.c|  12 +++-
 sys/kern/uipc_shm.c |  15 +++-
 sys/sys/mman.h  |   4 +-
 sys/vm/vm_domainset.c   |   3 +
 sys/vm/vm_fault.c   |  32 ++---
 sys/vm/vm_glue.c|   5 +-
 sys/vm/vm_kern.c|   5 +-
 sys/vm/vm_map.c |  46 ++---
 sys/vm/vm_page.c|   6 +-
 sys/vm/vm_page.h|   2 +-
 sys/vm/vm_reserv.c  | 104 
 17 files changed, 344 insertions(+), 97 deletions(-)

diff --git a/share/man/man7/arch.7 b/share/man/man7/arch.7
index f3d2e1036706..88228b807e6a 100644
--- a/share/man/man7/arch.7
+++ b/share/man/man7/arch.7
@@ -218,7 +218,7 @@ is 8 bytes on all supported architectures except i386.
 .Ss Page Size
 .Bl -column -offset indent "Architecture" "Page Sizes"
 .It Sy Architecture Ta Sy Page Sizes
-.It aarch64 Ta 4K, 2M, 1G
+.It aarch64 Ta 4K, 64K, 2M, 1G
 .It amd64   Ta 4K, 2M, 1G
 .It armv7   Ta 4K, 1M
 .It i386Ta 4K, 2M (PAE), 4M
diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index c3357900e1be..2540b5eaf4b9 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -1631,11 +1631,14 @@ pmap_init(void)
if (superpages_enabled) {
KASSERT(MAXPAGESIZES > 1 && pagesizes[1] == 0,
("pmap_init: can't assign to pagesizes[1]"));
-   pagesizes[1] = L2_SIZE;
+   pagesizes[1] = L3C_SIZE;
+   KASSERT(MAXPAGESIZES > 2 && pagesizes[2] == 0,
+   ("pmap_init: can't assign to pagesizes[2]"));
+   pagesizes[2] = L2_SIZE;
if (L1_BLOCKS_SUPPORTED) {
-   KASSERT(MAXPAGESIZES > 2 && pagesizes[2] == 0,
-   ("pmap_init: can't assign to pagesizes[2]"));
-   pagesizes[2] = L1_SIZE;
+   KASSERT(MAXPAGESIZES > 3 && pagesizes[3] == 0,
+   ("pmap_init: can't assign to pagesizes[3]"));
+   pagesizes[3] = L1_SIZE;
}
}
 
@@ -4959,7 +4962,7 @@ static int
 pmap_enter_largepage(pmap_t pmap, vm_offset_t va, pt_entry_t pte, int flags,
 int psind)
 {
-   pd_entry_t *l0p, *l1p, *l2p, newpte, origpte;
+   pd_entry_t *l0p, *l1p, *l2p, *l3p, newpte, origpte, *tl3p;
vm_page_t mp;
 
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
@@ -4973,9 +4976,11 @@ restart:
newpte = pte;
if (!pmap_bti_same(pmap, va, va + pagesizes[psind], &newpte))
return (KERN_PROTECTION_FAILURE);
-   if (psind == 2) {
+   if (psind == 3) {
PMAP_ASSERT_L1_BLOCKS_SUPPORTED;
 
+   KASSERT(pagesizes[psind] == L1_SIZE,
+   ("pagesizes[%d] != L1_SIZE", psind));
l0p = pmap_l0(pmap, va);
if ((pmap_load(l0p) & ATTR_DESCR_VALID) == 0) {
mp = _pmap_alloc_l3(pmap, pmap_l0_pindex(va), NULL);
@@ -5005,7 +5010,9 @@ restart:
("va %#lx changing 1G phys page l1 %#lx newpte %#lx",
va, origpte, newpte));
pmap_store(l1p, newpte);
-   } else /* (psind == 1) */ {
+   } else if (psind == 2) {
+

git: 772ae9eddf87 - main - vm ASLR: Handle VM_NRESERV_LEVEL == 0

2024-07-13 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=772ae9eddf87d835279ea6aaaf08b8ca421101ff

commit 772ae9eddf87d835279ea6aaaf08b8ca421101ff
Author: Alan Cox 
AuthorDate: 2024-07-13 19:40:43 +
Commit: Alan Cox 
CommitDate: 2024-07-13 19:40:43 +

vm ASLR: Handle VM_NRESERV_LEVEL == 0

Some flavors of powerpc don't enable superpage reservations.

Fixes: 3e00c11a4f43 ("arm64: Support the L3 ATTR_CONTIGUOUS page ...")
---
 sys/vm/vm_map.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c
index b9c27e14d1d0..77297a0e3957 100644
--- a/sys/vm/vm_map.c
+++ b/sys/vm/vm_map.c
@@ -1993,7 +1993,7 @@ out:
return (result);
 }
 
-#if VM_NRESERVLEVEL == 1
+#if VM_NRESERVLEVEL <= 1
 static const int aslr_pages_rnd_64[2] = {0x1000, 0x10};
 static const int aslr_pages_rnd_32[2] = {0x100, 0x4};
 #elif VM_NRESERVLEVEL == 2



git: cd836f600418 - main - vm: Retire kmem_arena

2024-07-24 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=cd836f600418e892869d23cee857ce1a6cd5b863

commit cd836f600418e892869d23cee857ce1a6cd5b863
Author: Alan Cox 
AuthorDate: 2024-07-21 19:50:47 +
Commit: Alan Cox 
CommitDate: 2024-07-24 19:31:40 +

vm: Retire kmem_arena

It has simply been an alias for the kernel_arena for many years now.
Enough time has passed to retire it.  Any out-of-tree kernel modules
that directly use kmem_arena should switch to kernel_arena.

Reviewed by:kib, markj
Differential Revision:  https://reviews.freebsd.org/D46057
---
 sys/arm/nvidia/drm2/tegra_bo.c | 4 ++--
 sys/kern/subr_vmem.c   | 2 --
 sys/vm/vm_kern.h   | 1 -
 3 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/sys/arm/nvidia/drm2/tegra_bo.c b/sys/arm/nvidia/drm2/tegra_bo.c
index c27b9f39c508..08cd3de6a3fe 100644
--- a/sys/arm/nvidia/drm2/tegra_bo.c
+++ b/sys/arm/nvidia/drm2/tegra_bo.c
@@ -71,7 +71,7 @@ tegra_bo_destruct(struct tegra_bo *bo)
 
vm_object_deallocate(bo->cdev_pager);
if (bo->vbase != 0)
-   vmem_free(kmem_arena, bo->vbase, size);
+   vmem_free(kernel_arena, bo->vbase, size);
 }
 
 static void
@@ -137,7 +137,7 @@ tegra_bo_init_pager(struct tegra_bo *bo)
size = round_page(bo->gem_obj.size);
 
bo->pbase = VM_PAGE_TO_PHYS(bo->m[0]);
-   if (vmem_alloc(kmem_arena, size, M_WAITOK | M_BESTFIT, &bo->vbase))
+   if (vmem_alloc(kernel_arena, size, M_WAITOK | M_BESTFIT, &bo->vbase))
return (ENOMEM);
 
VM_OBJECT_WLOCK(bo->cdev_pager);
diff --git a/sys/kern/subr_vmem.c b/sys/kern/subr_vmem.c
index a706d944dc3f..9288b0935441 100644
--- a/sys/kern/subr_vmem.c
+++ b/sys/kern/subr_vmem.c
@@ -236,9 +236,7 @@ static uma_zone_t vmem_bt_zone;
 static struct vmem kernel_arena_storage;
 static struct vmem buffer_arena_storage;
 static struct vmem transient_arena_storage;
-/* kernel and kmem arenas are aliased for backwards KPI compat. */
 vmem_t *kernel_arena = &kernel_arena_storage;
-vmem_t *kmem_arena = &kernel_arena_storage;
 vmem_t *buffer_arena = &buffer_arena_storage;
 vmem_t *transient_arena = &transient_arena_storage;
 
diff --git a/sys/vm/vm_kern.h b/sys/vm/vm_kern.h
index 848f28fe90b8..942c03480364 100644
--- a/sys/vm/vm_kern.h
+++ b/sys/vm/vm_kern.h
@@ -69,7 +69,6 @@ extern struct vm_map exec_map_store;
 extern struct vm_map pipe_map_store;
 #definepipe_map(&pipe_map_store)
 extern struct vmem *kernel_arena;
-extern struct vmem *kmem_arena;
 extern struct vmem *buffer_arena;
 extern struct vmem *transient_arena;
 extern struct vmem *memguard_arena;



git: 096dfa338d73 - main - vm: Retire vm_page_alloc_freelist{,_domain}()

2024-07-24 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=096dfa338d7391cc957dba9cca44ceb7f78cb891

commit 096dfa338d7391cc957dba9cca44ceb7f78cb891
Author: Alan Cox 
AuthorDate: 2024-07-23 07:14:31 +
Commit: Alan Cox 
CommitDate: 2024-07-24 19:31:40 +

vm: Retire vm_page_alloc_freelist{,_domain}()

Once upon a time, I created vm_page_alloc_freelist{,_domain}() to
support faster allocation of pages that were mapped by the partial
direct map on 32-bit MIPS.  At the time, I expected that these
functions might find other uses too, but those other uses never
materialized.  So, these functions have not been used for some time
now.  Instead, people use the more general vm_page_alloc_contig().

Reviewed by:kib, markj
Differential Revision:  https://reviews.freebsd.org/D46063
---
 ObsoleteFiles.inc  |  4 
 share/man/man9/Makefile|  2 --
 share/man/man9/vm_page_alloc.9 | 26 +
 sys/vm/vm_page.c   | 51 +-
 sys/vm/vm_page.h   |  2 --
 5 files changed, 11 insertions(+), 74 deletions(-)

diff --git a/ObsoleteFiles.inc b/ObsoleteFiles.inc
index f8f0309d6ccf..02a34a2541eb 100644
--- a/ObsoleteFiles.inc
+++ b/ObsoleteFiles.inc
@@ -51,6 +51,10 @@
 #   xargs -n1 | sort | uniq -d;
 # done
 
+# 20240721: retire vm_page_alloc_freelist
+OLD_FILES+=usr/share/man/man9/vm_page_alloc_freelist.9.gz
+OLD_FILES+=usr/share/man/man9/vm_page_alloc_freelist_domain.9.gz
+
 # 20240716: retire mergemaster
 OLD_FILES+=usr/sbin/mergemaster
 OLD_FILES+=usr/share/man/man8/mergemaster.8.gz
diff --git a/share/man/man9/Makefile b/share/man/man9/Makefile
index 9880b7b2f5e4..f7c21ab541b6 100644
--- a/share/man/man9/Makefile
+++ b/share/man/man9/Makefile
@@ -2384,8 +2384,6 @@ MLINKS+=vm_page_alloc.9 vm_page_alloc_after.9 \
vm_page_alloc.9 vm_page_alloc_contig_domain.9 \
vm_page_alloc.9 vm_page_alloc_domain.9 \
vm_page_alloc.9 vm_page_alloc_domain_after.9 \
-   vm_page_alloc.9 vm_page_alloc_freelist.9 \
-   vm_page_alloc.9 vm_page_alloc_freelist_domain.9 \
vm_page_alloc.9 vm_page_alloc_noobj.9 \
vm_page_alloc.9 vm_page_alloc_noobj_contig.9 \
vm_page_alloc.9 vm_page_alloc_noobj_contig_domain.9 \
diff --git a/share/man/man9/vm_page_alloc.9 b/share/man/man9/vm_page_alloc.9
index de225e05d707..7d6cf1692bb1 100644
--- a/share/man/man9/vm_page_alloc.9
+++ b/share/man/man9/vm_page_alloc.9
@@ -28,7 +28,7 @@
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
 .\" DAMAGE.
 .\"
-.Dd November 11, 2021
+.Dd July 21, 2024
 .Dt VM_PAGE_ALLOC 9
 .Os
 .Sh NAME
@@ -87,17 +87,6 @@
 .Fa "vm_page_t mpred"
 .Fc
 .Ft vm_page_t
-.Fo vm_page_alloc_freelist
-.Fa "int freelist"
-.Fa "int req"
-.Fc
-.Ft vm_page_t
-.Fo vm_page_alloc_freelist_domain
-.Fa "int domain"
-.Fa "int freelist"
-.Fa "int req"
-.Fc
-.Ft vm_page_t
 .Fo vm_page_alloc_noobj
 .Fa "int req"
 .Fc
@@ -212,19 +201,6 @@ or
 will carry the machine-dependent encoding of the memory attribute.
 Additionally, the direct mapping of the page, if any, will be updated to
 reflect the requested memory attribute.
-.Pp
-The
-.Fn vm_page_alloc_freelist
-and
-.Fn vm_page_alloc_freelist_domain
-functions behave identically to
-.Fn vm_page_alloc_noobj
-and
-.Fn vm_page_alloc_noobj_domain ,
-respectively, except that a successful allocation will return a page from the
-specified physical memory freelist.
-These functions are not intended for use outside of the virtual memory
-subsystem and exist only to support the requirements of certain platforms.
 .Sh REQUEST FLAGS
 All page allocator functions accept a
 .Fa req
diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c
index c9ac79330696..64413ba10bfa 100644
--- a/sys/vm/vm_page.c
+++ b/sys/vm/vm_page.c
@@ -2406,11 +2406,10 @@ vm_page_alloc_contig_domain(vm_object_t object, 
vm_pindex_t pindex, int domain,
 
 /*
  * Allocate a physical page that is not intended to be inserted into a VM
- * object.  If the "freelist" parameter is not equal to VM_NFREELIST, then only
- * pages from the specified vm_phys freelist will be returned.
+ * object.
  */
-static __always_inline vm_page_t
-_vm_page_alloc_noobj_domain(int domain, const int freelist, int req)
+vm_page_t
+vm_page_alloc_noobj_domain(int domain, int req)
 {
struct vm_domain *vmd;
vm_page_t m;
@@ -2426,8 +2425,7 @@ _vm_page_alloc_noobj_domain(int domain, const int 
freelist, int req)
flags = (req & VM_ALLOC_NODUMP) != 0 ? PG_NODUMP : 0;
vmd = VM_DOMAIN(domain);
 again:
-   if (freelist == VM_NFREELIST &&
-   vmd->vmd_pgcache[VM_FREEPOOL_DIRECT].zone != NULL) {
+   if (vmd->vmd_pgcache[VM_FREEPOOL_DIRECT].zone != NULL) {
m = uma_zalloc(vmd->vmd_pgcache[VM_FREEPOOL_DIRECT].zone,

git: 98dd639c94f7 - main - arm64 pmap: Eliminate an unnecessary conditional

2024-07-24 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=98dd639c94f716858ae29958f484729b1d2fd387

commit 98dd639c94f716858ae29958f484729b1d2fd387
Author: Alan Cox 
AuthorDate: 2024-07-24 22:32:32 +
Commit: Alan Cox 
CommitDate: 2024-07-25 06:46:47 +

arm64 pmap: Eliminate an unnecessary conditional

Eliminate an unnecessary test whether a pointer is non-NULL from
pmap_bti_same().

Reviewed by:dougm
---
 sys/arm64/arm64/pmap.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index 2540b5eaf4b9..59de6ef37f09 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -9396,8 +9396,7 @@ pmap_bti_same(pmap_t pmap, vm_offset_t sva, vm_offset_t 
eva, pt_entry_t *pte)
return (false);
rs = next_rs;
}
-   if (rs != NULL)
-   *pte |= ATTR_S1_GP;
+   *pte |= ATTR_S1_GP;
return (true);
 }
 



git: 5b8c01d13a09 - main - amd64 pmap: Optimize PKU lookups when creating superpage mappings

2024-07-25 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=5b8c01d13a0970b11f47503fcd627d249a6e638a

commit 5b8c01d13a0970b11f47503fcd627d249a6e638a
Author: Alan Cox 
AuthorDate: 2024-07-25 06:57:53 +
Commit: Alan Cox 
CommitDate: 2024-07-26 05:38:46 +

amd64 pmap: Optimize PKU lookups when creating superpage mappings

Modify pmap_pkru_same() to update the prototype PTE at the same time as
checking the address range.  This eliminates the need for calling
pmap_pkru_get() in addition to pmap_pkru_same().  pmap_pkru_same() was
already doing most of the work of pmap_pkru_get().

Reviewed by:kib
Differential Revision:  https://reviews.freebsd.org/D46135
---
 sys/amd64/amd64/pmap.c | 29 +
 1 file changed, 17 insertions(+), 12 deletions(-)

diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index dcf9b4f5a4f3..778d07689ff0 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -573,7 +573,8 @@ struct pmap_pkru_range {
 };
 
 static uma_zone_t pmap_pkru_ranges_zone;
-static bool pmap_pkru_same(pmap_t pmap, vm_offset_t sva, vm_offset_t eva);
+static bool pmap_pkru_same(pmap_t pmap, vm_offset_t sva, vm_offset_t eva,
+pt_entry_t *pte);
 static pt_entry_t pmap_pkru_get(pmap_t pmap, vm_offset_t va);
 static void pmap_pkru_on_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva);
 static void *pkru_dup_range(void *ctx, void *data);
@@ -7071,11 +7072,9 @@ pmap_enter_largepage(pmap_t pmap, vm_offset_t va, 
pt_entry_t newpte, int flags,
PG_V = pmap_valid_bit(pmap);
 
 restart:
-   if (!pmap_pkru_same(pmap, va, va + pagesizes[psind]))
-   return (KERN_PROTECTION_FAILURE);
pten = newpte;
-   if (va < VM_MAXUSER_ADDRESS && pmap->pm_type == PT_X86)
-   pten |= pmap_pkru_get(pmap, va);
+   if (!pmap_pkru_same(pmap, va, va + pagesizes[psind], &pten))
+   return (KERN_PROTECTION_FAILURE);
 
if (psind == 2) {   /* 1G */
pml4e = pmap_pml4e(pmap, va);
@@ -7529,14 +7528,10 @@ pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t 
newpde, u_int flags,
 * and let vm_fault() cope.  Check after pde allocation, since
 * it could sleep.
 */
-   if (!pmap_pkru_same(pmap, va, va + NBPDR)) {
+   if (!pmap_pkru_same(pmap, va, va + NBPDR, &newpde)) {
pmap_abort_ptp(pmap, va, pdpg);
return (KERN_PROTECTION_FAILURE);
}
-   if (va < VM_MAXUSER_ADDRESS && pmap->pm_type == PT_X86) {
-   newpde &= ~X86_PG_PKU_MASK;
-   newpde |= pmap_pkru_get(pmap, va);
-   }
 
/*
 * If there are existing mappings, either abort or remove them.
@@ -11460,13 +11455,21 @@ pmap_pkru_deassign_all(pmap_t pmap)
rangeset_remove_all(&pmap->pm_pkru);
 }
 
+/*
+ * Returns true if the PKU setting is the same across the specified address
+ * range, and false otherwise.  When returning true, updates the referenced PTE
+ * to reflect the PKU setting.
+ */
 static bool
-pmap_pkru_same(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
+pmap_pkru_same(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, pt_entry_t *pte)
 {
struct pmap_pkru_range *next_ppr, *ppr;
vm_offset_t va;
+   u_int keyidx;
 
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+   KASSERT(pmap->pm_type != PT_X86 || (*pte & X86_PG_PKU_MASK) == 0,
+   ("pte %p has unexpected PKU %ld", pte, *pte & X86_PG_PKU_MASK));
if (pmap->pm_type != PT_X86 ||
(cpu_stdext_feature2 & CPUID_STDEXT2_PKU) == 0 ||
sva >= VM_MAXUSER_ADDRESS)
@@ -11478,14 +11481,16 @@ pmap_pkru_same(pmap_t pmap, vm_offset_t sva, 
vm_offset_t eva)
return (ppr == NULL ||
ppr->pkru_rs_el.re_start >= eva);
}
+   keyidx = ppr->pkru_keyidx;
while ((va = ppr->pkru_rs_el.re_end) < eva) {
next_ppr = rangeset_next(&pmap->pm_pkru, va);
if (next_ppr == NULL ||
va != next_ppr->pkru_rs_el.re_start ||
-   ppr->pkru_keyidx != next_ppr->pkru_keyidx)
+   keyidx != next_ppr->pkru_keyidx)
return (false);
ppr = next_ppr;
}
+   *pte |= X86_PG_PKU(keyidx);
return (true);
 }
 



git: f6ed52c1f010 - main - vm: Stop reducing vm_pageout_page_count at startup

2024-08-02 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=f6ed52c1f010aca5083e9c4dd3d0ad15aa8230a2

commit f6ed52c1f010aca5083e9c4dd3d0ad15aa8230a2
Author: Alan Cox 
AuthorDate: 2024-08-01 17:14:00 +
Commit: Alan Cox 
CommitDate: 2024-08-02 19:41:36 +

vm: Stop reducing vm_pageout_page_count at startup

Attempting to reduce vm_pageout_page_count at startup when the machine
has less than 8MB of physical memory is pointless, since we haven't run
on machines with so little memory in ages.

Reviewed by:kib, markj
Differential Revision:  https://reviews.freebsd.org/D46206
---
 sys/vm/vm_pageout.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c
index 8ad4bf4d3ab4..742e0afbc690 100644
--- a/sys/vm/vm_pageout.c
+++ b/sys/vm/vm_pageout.c
@@ -2305,9 +2305,6 @@ vm_pageout_init(void)
/*
 * Initialize some paging parameters.
 */
-   if (vm_cnt.v_page_count < 2000)
-   vm_pageout_page_count = 8;
-
freecount = 0;
for (i = 0; i < vm_ndomains; i++) {
struct vm_domain *vmd;



git: f076dd3ef02d - main - imgact_elf: Optimize pagesizes[] loop

2024-08-02 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=f076dd3ef02ddf7799eeaab8d405ee9d845e8dc6

commit f076dd3ef02ddf7799eeaab8d405ee9d845e8dc6
Author: Alan Cox 
AuthorDate: 2024-07-15 06:02:33 +
Commit: Alan Cox 
CommitDate: 2024-08-02 23:26:11 +

imgact_elf: Optimize pagesizes[] loop

Except for elements whose value is zero, the elements of pagesizes[] are
always sorted in increasing order, so once a loop starting from the end
of the array has found a non-zero element, it has found the largest
valued element and can stop iterating.

Reviewed by:kib
Differential Revision:  https://reviews.freebsd.org/D46215
---
 sys/kern/imgact_elf.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/sys/kern/imgact_elf.c b/sys/kern/imgact_elf.c
index a623a63e9c2e..28ffdd03dd6d 100644
--- a/sys/kern/imgact_elf.c
+++ b/sys/kern/imgact_elf.c
@@ -1158,8 +1158,10 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params 
*imgp)
maxalign = PAGE_SIZE;
maxsalign = PAGE_SIZE * 1024;
for (i = MAXPAGESIZES - 1; i > 0; i--) {
-   if (pagesizes[i] > maxsalign)
+   if (pagesizes[i] > maxsalign) {
maxsalign = pagesizes[i];
+   break;
+   }
}
 
mapsz = 0;



git: fa290859fa63 - main - vm: Assert that pagesizes[] is sorted

2024-08-04 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=fa290859fa63d65b5da3014038ae289c1b336700

commit fa290859fa63d65b5da3014038ae289c1b336700
Author: Alan Cox 
AuthorDate: 2024-08-03 20:18:16 +
Commit: Alan Cox 
CommitDate: 2024-08-04 19:53:15 +

vm: Assert that pagesizes[] is sorted

Ensure that pmap_init() properly initialized pagesizes[].  In part, we
are making this change to document the requirement that the non-zero
elements of pagesizes[] must be in ascending order.

Reviewed by:kib, markj
---
 sys/vm/vm_init.c | 22 ++
 1 file changed, 22 insertions(+)

diff --git a/sys/vm/vm_init.c b/sys/vm/vm_init.c
index 0fd13f73a180..a0d3651ba266 100644
--- a/sys/vm/vm_init.c
+++ b/sys/vm/vm_init.c
@@ -100,6 +100,24 @@ long physmem;
 static void vm_mem_init(void *);
 SYSINIT(vm_mem, SI_SUB_VM, SI_ORDER_FIRST, vm_mem_init, NULL);
 
+#ifdef INVARIANTS
+/*
+ * Ensure that pmap_init() correctly initialized pagesizes[].
+ */
+static void
+vm_check_pagesizes(void)
+{
+   int i;
+
+   KASSERT(pagesizes[0] == PAGE_SIZE, ("pagesizes[0] != PAGE_SIZE"));
+   for (i = 1; i < MAXPAGESIZES; i++) {
+   KASSERT((pagesizes[i - 1] != 0 &&
+   pagesizes[i - 1] < pagesizes[i]) || pagesizes[i] == 0,
+   ("pagesizes[%d ... %d] are misconfigured", i - 1, i));
+   }
+}
+#endif
+
 /*
  * vm_mem_init() initializes the virtual memory system.
  * This is done only by the first cpu up.
@@ -140,6 +158,10 @@ vm_mem_init(void *dummy)
kmem_init_zero_region();
pmap_init();
vm_pager_init();
+
+#ifdef INVARIANTS
+   vm_check_pagesizes();
+#endif
 }
 
 void



git: 841cf52595b6 - main - arm64 pmap: Add ATTR_CONTIGUOUS support [Part 2]

2024-04-09 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=841cf52595b6a6b98e266b63e54a7cf6fb6ca73e

commit 841cf52595b6a6b98e266b63e54a7cf6fb6ca73e
Author: Alan Cox 
AuthorDate: 2024-04-08 05:05:54 +
Commit: Alan Cox 
CommitDate: 2024-04-09 16:21:08 +

arm64 pmap: Add ATTR_CONTIGUOUS support [Part 2]

Create ATTR_CONTIGUOUS mappings in pmap_enter_object().  As a result,
when the base page size is 4 KB, the read-only data and text sections
of large (2 MB+) executables, e.g., clang, can be mapped using 64 KB
pages.  Similarly, when the base page size is 16 KB, the read-only
data section of large executables can be mapped using 2 MB pages.

Rename pmap_enter_2mpage().  Given that we have grown support for 16 KB
base pages, we should no longer include page sizes that may vary, e.g.,
2mpage, in pmap function names.  Requested by: andrew

Co-authored-by: Eliot Solomon 
Differential Revision:  https://reviews.freebsd.org/D44575
---
 sys/arm64/arm64/pmap.c | 252 +++--
 1 file changed, 245 insertions(+), 7 deletions(-)

diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index 258aa141653b..ea7ff18971e4 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -472,6 +472,8 @@ static vm_page_t pmap_enter_quick_locked(pmap_t pmap, 
vm_offset_t va,
 vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp);
 static int pmap_enter_l2(pmap_t pmap, vm_offset_t va, pd_entry_t new_l2,
 u_int flags, vm_page_t m, struct rwlock **lockp);
+static int pmap_enter_l3c(pmap_t pmap, vm_offset_t va, pt_entry_t l3e, u_int 
flags,
+vm_page_t m, vm_page_t *ml3p, struct rwlock **lockp);
 static bool pmap_every_pte_zero(vm_paddr_t pa);
 static int pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte, bool promoted,
 bool all_l3e_AF_set);
@@ -5177,13 +5179,13 @@ out:
 }
 
 /*
- * Tries to create a read- and/or execute-only 2MB page mapping.  Returns
+ * Tries to create a read- and/or execute-only L2 page mapping.  Returns
  * KERN_SUCCESS if the mapping was created.  Otherwise, returns an error
  * value.  See pmap_enter_l2() for the possible error values when "no sleep",
  * "no replace", and "no reclaim" are specified.
  */
 static int
-pmap_enter_2mpage(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
+pmap_enter_l2_rx(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
 struct rwlock **lockp)
 {
pd_entry_t new_l2;
@@ -5233,13 +5235,13 @@ pmap_every_pte_zero(vm_paddr_t pa)
 }
 
 /*
- * Tries to create the specified 2MB page mapping.  Returns KERN_SUCCESS if
+ * Tries to create the specified L2 page mapping.  Returns KERN_SUCCESS if
  * the mapping was created, and one of KERN_FAILURE, KERN_NO_SPACE, or
  * KERN_RESOURCE_SHORTAGE otherwise.  Returns KERN_FAILURE if
- * PMAP_ENTER_NOREPLACE was specified and a 4KB page mapping already exists
- * within the 2MB virtual address range starting at the specified virtual
+ * PMAP_ENTER_NOREPLACE was specified and a base page mapping already exists
+ * within the L2 virtual address range starting at the specified virtual
  * address.  Returns KERN_NO_SPACE if PMAP_ENTER_NOREPLACE was specified and a
- * 2MB page mapping already exists at the specified virtual address.  Returns
+ * L2 page mapping already exists at the specified virtual address.  Returns
  * KERN_RESOURCE_SHORTAGE if either (1) PMAP_ENTER_NOSLEEP was specified and a
  * page table page allocation failed or (2) PMAP_ENTER_NORECLAIM was specified
  * and a PV entry allocation failed.
@@ -5405,6 +5407,235 @@ pmap_enter_l2(pmap_t pmap, vm_offset_t va, pd_entry_t 
new_l2, u_int flags,
return (KERN_SUCCESS);
 }
 
+/*
+ * Tries to create a read- and/or execute-only L3C page mapping.  Returns
+ * KERN_SUCCESS if the mapping was created.  Otherwise, returns an error
+ * value.
+ */
+static int
+pmap_enter_l3c_rx(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_page_t *ml3p,
+vm_prot_t prot, struct rwlock **lockp)
+{
+   pt_entry_t l3e;
+
+   PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+   PMAP_ASSERT_STAGE1(pmap);
+   KASSERT(ADDR_IS_CANONICAL(va),
+   ("%s: Address not in canonical form: %lx", __func__, va));
+
+   l3e = PHYS_TO_PTE(VM_PAGE_TO_PHYS(m)) | ATTR_DEFAULT |
+   ATTR_S1_IDX(m->md.pv_memattr) | ATTR_S1_AP(ATTR_S1_AP_RO) |
+   ATTR_CONTIGUOUS | L3_PAGE;
+   l3e |= pmap_pte_bti(pmap, va);
+   if ((m->oflags & VPO_UNMANAGED) == 0) {
+   l3e |= ATTR_SW_MANAGED;
+   l3e &= ~ATTR_AF;
+   }
+   if ((prot & VM_PROT_EXECUTE) == 0 ||
+   m->md.pv_memattr == VM_MEMATTR_DEVICE)
+   l3e |= ATTR_S1_XN;
+   if (!ADDR_IS_KERNEL(va))
+   l3e |= ATTR_S1_AP(ATTR_S1_AP_USER) | ATTR_S1_PXN;
+   else
+   l3e |= ATTR_S1_UXN;
+   if (pmap != k

git: a803837cec6e - main - arm64 pmap: Add ATTR_CONTIGUOUS support [Part 3]

2024-05-07 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=a803837cec6e17e04849d59afac7b6431c70cb93

commit a803837cec6e17e04849d59afac7b6431c70cb93
Author: Alan Cox 
AuthorDate: 2024-04-17 16:39:46 +
Commit: Alan Cox 
CommitDate: 2024-05-08 02:31:14 +

arm64 pmap: Add ATTR_CONTIGUOUS support [Part 3]

Introduce L3C promotion of base page mappings.  When the base page size
is 4KB, use ATTR_CONTIGUOUS to promote 16 aligned, contiguous base page
mappings to a 64KB mapping.  Alternatively, when the base page size is
16KB, use ATTR_CONTIGUOUS to promote 128 aligned, contiguous base page
mappings to a 2MB mapping.

Given the frequency of L3C counter updates, switch to per-CPU counters
to avoid cache line ping ponging.

Revise the L3C counter descriptions to reflect the fact that the size
of an L3C mapping varies depending on the base page size.

Co-authored-by: Eliot Solomon 
Reviewed by:markj
Differential Revision:  https://reviews.freebsd.org/D44983
---
 sys/arm64/arm64/pmap.c | 168 -
 1 file changed, 154 insertions(+), 14 deletions(-)

diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index ea7ff18971e4..b1a85befa4e1 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -1684,15 +1684,23 @@ SYSCTL_ULONG(_vm_pmap_l2, OID_AUTO, promotions, 
CTLFLAG_RD,
 &pmap_l2_promotions, 0, "2MB page promotions");
 
 static SYSCTL_NODE(_vm_pmap, OID_AUTO, l3c, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
-"64KB page mapping counters");
+"L3C (64KB/2MB) page mapping counters");
 
-static u_long pmap_l3c_demotions;
-SYSCTL_ULONG(_vm_pmap_l3c, OID_AUTO, demotions, CTLFLAG_RD,
-&pmap_l3c_demotions, 0, "64KB page demotions");
+static COUNTER_U64_DEFINE_EARLY(pmap_l3c_demotions);
+SYSCTL_COUNTER_U64(_vm_pmap_l3c, OID_AUTO, demotions, CTLFLAG_RD,
+&pmap_l3c_demotions, "L3C (64KB/2MB) page demotions");
 
-static u_long pmap_l3c_mappings;
-SYSCTL_ULONG(_vm_pmap_l3c, OID_AUTO, mappings, CTLFLAG_RD,
-&pmap_l3c_mappings, 0, "64KB page mappings");
+static COUNTER_U64_DEFINE_EARLY(pmap_l3c_mappings);
+SYSCTL_COUNTER_U64(_vm_pmap_l3c, OID_AUTO, mappings, CTLFLAG_RD,
+&pmap_l3c_mappings, "L3C (64KB/2MB) page mappings");
+
+static COUNTER_U64_DEFINE_EARLY(pmap_l3c_p_failures);
+SYSCTL_COUNTER_U64(_vm_pmap_l3c, OID_AUTO, p_failures, CTLFLAG_RD,
+&pmap_l3c_p_failures, "L3C (64KB/2MB) page promotion failures");
+
+static COUNTER_U64_DEFINE_EARLY(pmap_l3c_promotions);
+SYSCTL_COUNTER_U64(_vm_pmap_l3c, OID_AUTO, promotions, CTLFLAG_RD,
+&pmap_l3c_promotions, "L3C (64KB/2MB) page promotions");
 
 /*
  * If the given value for "final_only" is false, then any cached intermediate-
@@ -4547,7 +4555,7 @@ pmap_update_entry(pmap_t pmap, pd_entry_t *ptep, 
pd_entry_t newpte,
 * be cached, so we invalidate intermediate entries as well as final
 * entries.
 */
-   pmap_s1_invalidate_range(pmap, va, va + size, false);
+   pmap_s1_invalidate_range(pmap, va, va + size, size == L3C_SIZE);
 
/* Create the new mapping */
for (lip = ptep; lip < ptep_end; lip++) {
@@ -4749,6 +4757,131 @@ setl3:
pmap);
return (true);
 }
+
+/*
+ * Tries to promote an aligned, contiguous set of base page mappings to a
+ * single L3C page mapping.  For promotion to occur, two conditions must be
+ * met: (1) the base page mappings must map aligned, contiguous physical
+ * memory and (2) the base page mappings must have identical characteristics
+ * except for the accessed flag.
+ */
+static bool
+pmap_promote_l3c(pmap_t pmap, pd_entry_t *l3p, vm_offset_t va)
+{
+   pd_entry_t all_l3e_AF, firstl3c, *l3, oldl3, pa;
+
+   PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+
+   /*
+* Currently, this function only supports promotion on stage 1 pmaps
+* because it tests stage 1 specific fields and performs a break-
+* before-make sequence that is incorrect for stage 2 pmaps.
+*/
+   if (pmap->pm_stage != PM_STAGE1 || !pmap_ps_enabled(pmap))
+   return (false);
+
+   /*
+* Compute the address of the first L3 entry in the superpage
+* candidate.
+*/
+   l3p = (pt_entry_t *)((uintptr_t)l3p & ~((L3C_ENTRIES *
+   sizeof(pt_entry_t)) - 1));
+
+   firstl3c = pmap_load(l3p);
+
+   /*
+* Examine the first L3 entry. Abort if this L3E is ineligible for
+* promotion...
+*/
+   if ((firstl3c & ATTR_SW_NO_PROMOTE) != 0)
+   return (false);
+   /* ...is not properly aligned... */
+   if ((PTE_TO_PHYS(firstl3c) & L3C_OFFSET) != 0 ||
+   (firstl3c & ATTR_DESCR_MASK) != L3_PAGE) { /* ...or is invalid. */
+   count

git: 94b09d388b81 - main - arm64: map kernel using large pages when page size is 16K

2024-05-12 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=94b09d388b81eb724769e506cdf0f51bba9b73fb

commit 94b09d388b81eb724769e506cdf0f51bba9b73fb
Author: Alan Cox 
AuthorDate: 2024-05-11 06:09:39 +
Commit: Alan Cox 
CommitDate: 2024-05-12 23:22:38 +

arm64: map kernel using large pages when page size is 16K

When the page size is 16K, use ATTR_CONTIGUOUS to map the kernel code
and data sections using 2M pages.  Previously, they were mapped using
16K pages.

Reviewed by:markj
Tested by:  markj
Differential Revision:  https://reviews.freebsd.org/D45162
---
 sys/arm64/arm64/locore.S | 26 +++---
 1 file changed, 19 insertions(+), 7 deletions(-)

diff --git a/sys/arm64/arm64/locore.S b/sys/arm64/arm64/locore.S
index f53cd365de55..fffebe8f2b02 100644
--- a/sys/arm64/arm64/locore.S
+++ b/sys/arm64/arm64/locore.S
@@ -516,11 +516,10 @@ booti_no_fdt:
 common:
 #if PAGE_SIZE != PAGE_SIZE_4K
/*
-* Create L3 pages. The kernel will be loaded at a 2M aligned
-* address, however L2 blocks are too large when the page size is
-* not 4k to map the kernel with such an aligned address. However,
-* when the page size is larger than 4k, L2 blocks are too large to
-* map the kernel with such an alignment.
+* Create L3 and L3C pages. The kernel will be loaded at a 2M aligned
+* address, enabling the creation of L3C pages. However, when the page
+* size is larger than 4k, L2 blocks are too large to map the kernel
+* with 2M alignment.
 */
 #definePTE_SHIFT   L3_SHIFT
 #defineBUILD_PTE_FUNC  build_l3_page_pagetable
@@ -784,13 +783,17 @@ LENTRY(link_l2_pagetable)
 LEND(link_l2_pagetable)
 
 /*
- * Builds count level 3 page table entries
+ * Builds count level 3 page table entries. Uses ATTR_CONTIGUOUS to create
+ * large page (L3C) mappings when the current VA and remaining count allow
+ * it.
  *  x6  = L3 table
  *  x7  = Block attributes
  *  x8  = VA start
  *  x9  = PA start (trashed)
  *  x10 = Entry count (trashed)
  *  x11, x12 and x13 are trashed
+ *
+ * VA start (x8) modulo L3C_SIZE must equal PA start (x9) modulo L3C_SIZE.
  */
 LENTRY(build_l3_page_pagetable)
/*
@@ -811,8 +814,17 @@ LENTRY(build_l3_page_pagetable)
/* Only use the output address bits */
lsr x9, x9, #L3_SHIFT
 
+   /* Check if an ATTR_CONTIGUOUS mapping is possible */
+1: tst x11, #(L3C_ENTRIES - 1)
+   b.ne2f
+   cmp x10, #L3C_ENTRIES
+   b.lo3f
+   orr x12, x12, #(ATTR_CONTIGUOUS)
+   b   2f
+3: and x12, x12, #(~ATTR_CONTIGUOUS)
+
/* Set the physical address for this virtual address */
-1: orr x13, x12, x9, lsl #L3_SHIFT
+2: orr x13, x12, x9, lsl #L3_SHIFT
 
/* Store the entry */
str x13, [x6, x11, lsl #3]



git: 4f77144279f2 - main - arm64 pmap: eliminate a redundant variable

2024-05-19 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=4f77144279f210ce65d77c13470c6363c3ce3c57

commit 4f77144279f210ce65d77c13470c6363c3ce3c57
Author: Alan Cox 
AuthorDate: 2024-05-19 19:22:53 +
Commit: Alan Cox 
CommitDate: 2024-05-19 19:33:19 +

arm64 pmap: eliminate a redundant variable

Moreover, if we attempt an L2 promotion on the kernel pmap from
pmap_enter_quick_locked(), this change eliminates the recomputation of
the L2 entry's address.

MFC after:  1 week
---
 sys/arm64/arm64/pmap.c | 7 +++
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index a6056a5edfc2..269513589d78 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -5848,7 +5848,6 @@ static vm_page_t
 pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
 vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp)
 {
-   pd_entry_t *pde;
pt_entry_t *l1, *l2, *l3, l3_val;
vm_paddr_t pa;
int lvl;
@@ -5913,13 +5912,13 @@ pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, 
vm_page_t m,
l3 = &l3[pmap_l3_index(va)];
} else {
mpte = NULL;
-   pde = pmap_pde(kernel_pmap, va, &lvl);
-   KASSERT(pde != NULL,
+   l2 = pmap_pde(kernel_pmap, va, &lvl);
+   KASSERT(l2 != NULL,
("pmap_enter_quick_locked: Invalid page entry, va: 0x%lx",
 va));
KASSERT(lvl == 2,
("pmap_enter_quick_locked: Invalid level %d", lvl));
-   l3 = pmap_l2_to_l3(pde, va);
+   l3 = pmap_l2_to_l3(l2, va);
}
 
/*



git: 9fc5e3fb39ca - main - arm64: set ATTR_CONTIGUOUS on the DMAP's L2 blocks

2024-05-22 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=9fc5e3fb39ca5b2239066b750bea2ce5775bd79b

commit 9fc5e3fb39ca5b2239066b750bea2ce5775bd79b
Author: Alan Cox 
AuthorDate: 2024-05-13 06:39:28 +
Commit: Alan Cox 
CommitDate: 2024-05-23 03:09:43 +

arm64: set ATTR_CONTIGUOUS on the DMAP's L2 blocks

On systems configured with 16KB pages, this change creates 1GB page
mappings in the direct map where possible.  Previously, the largest page
size that was used to implement the direct map was 32MB.  Similarly, on
systems configured with 4KB pages, this change creates 32MB page
mappings, instead of 2MB, in the direct map where 1GB is too large.

Implement demotion on L2C (32MB/1GB) page mappings within the DMAP.

Update sysctl vm.pmap.kernel_maps to report on L2C page mappings.

Reviewed by:markj
Tested by:  gallatin, Eliot Solomon 
Differential Revision:  https://reviews.freebsd.org/D45224
---
 sys/arm64/arm64/pmap.c  | 264 ++--
 sys/arm64/include/pte.h |   5 +
 2 files changed, 237 insertions(+), 32 deletions(-)

diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index 269513589d78..2ce313de36cf 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -429,7 +429,6 @@ void (*pmap_stage2_invalidate_all)(uint64_t);
 #defineTLBI_VA_SHIFT   12
 #defineTLBI_VA_MASK((1ul << 44) - 1)
 #defineTLBI_VA(addr)   (((addr) >> TLBI_VA_SHIFT) & 
TLBI_VA_MASK)
-#defineTLBI_VA_L3_INCR (L3_SIZE >> TLBI_VA_SHIFT)
 
 static int __read_frequently superpages_enabled = 1;
 SYSCTL_INT(_vm_pmap, OID_AUTO, superpages_enabled,
@@ -470,6 +469,7 @@ static pt_entry_t *pmap_demote_l1(pmap_t pmap, pt_entry_t 
*l1, vm_offset_t va);
 static pt_entry_t *pmap_demote_l2_locked(pmap_t pmap, pt_entry_t *l2,
 vm_offset_t va, struct rwlock **lockp);
 static pt_entry_t *pmap_demote_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t va);
+static bool pmap_demote_l2c(pmap_t pmap, pt_entry_t *l2p, vm_offset_t va);
 static bool pmap_demote_l3c(pmap_t pmap, pt_entry_t *l3p, vm_offset_t va);
 static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
 vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp);
@@ -1108,6 +1108,7 @@ pmap_bootstrap_l2_table(struct pmap_bootstrap_state 
*state)
 static void
 pmap_bootstrap_l2_block(struct pmap_bootstrap_state *state, int i)
 {
+   pt_entry_t contig;
u_int l2_slot;
bool first;
 
@@ -1118,7 +1119,7 @@ pmap_bootstrap_l2_block(struct pmap_bootstrap_state 
*state, int i)
pmap_bootstrap_l1_table(state);
 
MPASS((state->va & L2_OFFSET) == 0);
-   for (first = true;
+   for (first = true, contig = 0;
state->va < DMAP_MAX_ADDRESS &&
(physmap[i + 1] - state->pa) >= L2_SIZE;
state->va += L2_SIZE, state->pa += L2_SIZE) {
@@ -1129,13 +1130,27 @@ pmap_bootstrap_l2_block(struct pmap_bootstrap_state 
*state, int i)
if (!first && (state->pa & L1_OFFSET) == 0)
break;
 
+   /*
+* If we have an aligned, contiguous chunk of L2C_ENTRIES
+* L2 blocks, set the contiguous bit within each PTE so that
+* the chunk can be cached using only one TLB entry.
+*/
+   if ((state->pa & L2C_OFFSET) == 0) {
+   if (state->va + L2C_SIZE < DMAP_MAX_ADDRESS &&
+   physmap[i + 1] - state->pa >= L2C_SIZE) {
+   contig = ATTR_CONTIGUOUS;
+   } else {
+   contig = 0;
+   }
+   }
+
first = false;
l2_slot = pmap_l2_index(state->va);
MPASS((state->pa & L2_OFFSET) == 0);
MPASS(state->l2[l2_slot] == 0);
pmap_store(&state->l2[l2_slot], PHYS_TO_PTE(state->pa) |
ATTR_DEFAULT | ATTR_S1_XN | ATTR_KERN_GP |
-   ATTR_S1_IDX(VM_MEMATTR_WRITE_BACK) | L2_BLOCK);
+   ATTR_S1_IDX(VM_MEMATTR_WRITE_BACK) | contig | L2_BLOCK);
}
MPASS(state->va == (state->pa - dmap_phys_base + DMAP_MIN_ADDRESS));
 }
@@ -1667,6 +1682,20 @@ pmap_init(void)
vm_initialized = 1;
 }
 
+static SYSCTL_NODE(_vm_pmap, OID_AUTO, l1, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
+"L1 (1GB/64GB) page mapping counters");
+
+static COUNTER_U64_DEFINE_EARLY(pmap_l1_demotions);
+SYSCTL_COUNTER_U64(_vm_pmap_l1, OID_AUTO, demotions, CTLFLAG_RD,
+&pmap_l1_demotions, "L1 (1GB/64GB) page demotions");
+
+static SYSCTL_NODE(_vm_pmap, OID_AUTO, l2c, CTLFLAG_RD 

git: 3dc2a8848986 - main - arm64 pmap: Convert panic()s to KASSERT()s

2024-05-31 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=3dc2a8848986df2c10ae7df4ce87a1538f549a85

commit 3dc2a8848986df2c10ae7df4ce87a1538f549a85
Author: Alan Cox 
AuthorDate: 2024-05-31 17:22:14 +
Commit: Alan Cox 
CommitDate: 2024-05-31 21:54:27 +

arm64 pmap: Convert panic()s to KASSERT()s

There is no reason for the ATTR_SW_NO_PROMOTE checks in
pmap_update_{entry,strided}() to be panic()s instead of KASSERT()s.

Requested by:   markj
Reviewed by:markj
Differential Revision:  https://reviews.freebsd.org/D45424
---
 sys/arm64/arm64/pmap.c | 10 --
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index cd7837e58380..aaba6ca189a1 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -4565,9 +4565,8 @@ pmap_update_entry(pmap_t pmap, pd_entry_t *ptep, 
pd_entry_t newpte,
register_t intr;
 
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
-
-   if ((newpte & ATTR_SW_NO_PROMOTE) != 0)
-   panic("%s: Updating non-promote pte", __func__);
+   KASSERT((newpte & ATTR_SW_NO_PROMOTE) == 0,
+   ("%s: Updating non-promote pte", __func__));
 
/*
 * Ensure we don't get switched out with the page table in an
@@ -4608,9 +4607,8 @@ pmap_update_strided(pmap_t pmap, pd_entry_t *ptep, 
pd_entry_t *ptep_end,
register_t intr;
 
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
-
-   if ((newpte & ATTR_SW_NO_PROMOTE) != 0)
-   panic("%s: Updating non-promote pte", __func__);
+   KASSERT((newpte & ATTR_SW_NO_PROMOTE) == 0,
+   ("%s: Updating non-promote pte", __func__));
 
/*
 * Ensure we don't get switched out with the page table in an



git: f1d73aacdc47 - main - pmap: Skip some superpage promotion attempts that will fail

2024-06-03 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=f1d73aacdc47529310e2302094685295c032e28f

commit f1d73aacdc47529310e2302094685295c032e28f
Author: Alan Cox 
AuthorDate: 2024-06-02 08:56:47 +
Commit: Alan Cox 
CommitDate: 2024-06-04 05:38:05 +

pmap: Skip some superpage promotion attempts that will fail

Implement a simple heuristic to skip pointless promotion attempts by
pmap_enter_quick_locked() and moea64_enter().  Specifically, when
vm_fault() calls pmap_enter_quick() to map neighboring pages at the end
of a copy-on-write fault, there is no point in attempting promotion in
pmap_enter_quick_locked() and moea64_enter().  Promotion will fail
because the base pages have differing protection.

Reviewed by:kib
Differential Revision:  https://reviews.freebsd.org/D45431
MFC after:  1 week
---
 sys/amd64/amd64/pmap.c  |  3 ++-
 sys/arm64/arm64/pmap.c  |  3 ++-
 sys/i386/i386/pmap.c|  3 ++-
 sys/powerpc/aim/mmu_oea64.c |  9 +++--
 sys/riscv/riscv/pmap.c  |  3 ++-
 sys/vm/vm.h |  1 +
 sys/vm/vm_fault.c   | 11 ++-
 7 files changed, 26 insertions(+), 7 deletions(-)

diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index 8105c9d92478..2f3119aede67 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -7818,7 +7818,8 @@ pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, 
vm_page_t m,
 * If both the PTP and the reservation are fully populated, then
 * attempt promotion.
 */
-   if ((mpte == NULL || mpte->ref_count == NPTEPG) &&
+   if ((prot & VM_PROT_NO_PROMOTE) == 0 &&
+   (mpte == NULL || mpte->ref_count == NPTEPG) &&
(m->flags & PG_FICTITIOUS) == 0 &&
vm_reserv_level_iffullpop(m) == 0) {
if (pde == NULL)
diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index aaba6ca189a1..b6bc113ba8a4 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -6052,7 +6052,8 @@ pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, 
vm_page_t m,
 * If both the PTP and the reservation are fully populated, then
 * attempt promotion.
 */
-   if ((mpte == NULL || mpte->ref_count == NL3PG) &&
+   if ((prot & VM_PROT_NO_PROMOTE) == 0 &&
+   (mpte == NULL || mpte->ref_count == NL3PG) &&
(m->flags & PG_FICTITIOUS) == 0 &&
vm_reserv_level_iffullpop(m) == 0) {
if (l2 == NULL)
diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c
index 40d8ceaf42b9..5808c31a99af 100644
--- a/sys/i386/i386/pmap.c
+++ b/sys/i386/i386/pmap.c
@@ -4250,7 +4250,8 @@ pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, 
vm_page_t m,
 * If both the PTP and the reservation are fully populated, then
 * attempt promotion.
 */
-   if ((mpte == NULL || mpte->ref_count == NPTEPG) &&
+   if ((prot & VM_PROT_NO_PROMOTE) == 0 &&
+   (mpte == NULL || mpte->ref_count == NPTEPG) &&
(m->flags & PG_FICTITIOUS) == 0 &&
vm_reserv_level_iffullpop(m) == 0) {
if (pde == NULL)
diff --git a/sys/powerpc/aim/mmu_oea64.c b/sys/powerpc/aim/mmu_oea64.c
index 391f90bb04eb..273dc38214e2 100644
--- a/sys/powerpc/aim/mmu_oea64.c
+++ b/sys/powerpc/aim/mmu_oea64.c
@@ -1755,10 +1755,14 @@ out:
 * If the VA of the entered page is not aligned with its PA,
 * don't try page promotion as it is not possible.
 * This reduces the number of promotion failures dramatically.
+*
+* Ignore VM_PROT_NO_PROMOTE unless PMAP_ENTER_QUICK_LOCKED.
 */
if (moea64_ps_enabled(pmap) && pmap != kernel_pmap && pvo != NULL &&
(pvo->pvo_vaddr & PVO_MANAGED) != 0 &&
(va & HPT_SP_MASK) == (pa & HPT_SP_MASK) &&
+   ((prot & VM_PROT_NO_PROMOTE) == 0 ||
+   (flags & PMAP_ENTER_QUICK_LOCKED) == 0) &&
(m->flags & PG_FICTITIOUS) == 0 &&
vm_reserv_level_iffullpop(m) == 0)
moea64_sp_promote(pmap, va, m);
@@ -1850,8 +1854,9 @@ moea64_enter_quick(pmap_t pm, vm_offset_t va, vm_page_t m,
 vm_prot_t prot)
 {
 
-   moea64_enter(pm, va, m, prot & (VM_PROT_READ | VM_PROT_EXECUTE),
-   PMAP_ENTER_NOSLEEP | PMAP_ENTER_QUICK_LOCKED, 0);
+   moea64_enter(pm, va, m, prot & (VM_PROT_READ | VM_PROT_EXECUTE |
+   VM_PROT_NO_PROMOTE), PMAP_ENTER_NOSLEEP | PMAP_ENTER_QUICK_LOCKED,
+   0);
 }
 
 vm_paddr_t
diff --git a/sys/riscv/riscv/pmap.c b/sys/riscv/riscv/pmap.c
index 1e507f62696e..e8504bcb0f59 100644
--- a/sys/riscv/riscv/pmap.c
+++ b/sys/riscv/riscv/pmap.c
@@ -3519,7 +3519,8 @@ pmap_ente

git: 41dfea24eec2 - main - arm64 pmap: Enable L3C promotions by pmap_enter_quick()

2024-06-04 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=41dfea24eec242e1e083e2a879483a7c05c7e2ff

commit 41dfea24eec242e1e083e2a879483a7c05c7e2ff
Author: Alan Cox 
AuthorDate: 2024-06-01 18:17:52 +
Commit: Alan Cox 
CommitDate: 2024-06-05 04:25:51 +

arm64 pmap: Enable L3C promotions by pmap_enter_quick()

More precisely, implement L3C (64KB/2MB, depending on base page size)
promotion in pmap_enter_quick()'s helper function,
pmap_enter_quick_locked().  At the same time, use the recently
introduced flag VM_PROT_NO_PROMOTE from pmap_enter_object() to
pmap_enter_quick_locked() to avoid L3C promotion attempts that will
fail.

Reviewed by:kib
Differential Revision:  https://reviews.freebsd.org/D45445
---
 sys/arm64/arm64/pmap.c | 29 +++--
 1 file changed, 23 insertions(+), 6 deletions(-)

diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index 03d0a1cc6676..8ac7b8f6a135 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -5883,9 +5883,19 @@ pmap_enter_object(pmap_t pmap, vm_offset_t start, 
vm_offset_t end,
((rv = pmap_enter_l3c_rx(pmap, va, m, &mpte, prot,
&lock)) == KERN_SUCCESS || rv == KERN_NO_SPACE))
m = &m[L3C_ENTRIES - 1];
-   else
-   mpte = pmap_enter_quick_locked(pmap, va, m, prot, mpte,
-   &lock);
+   else {
+   /*
+* In general, if a superpage mapping were possible,
+* it would have been created above.  That said, if
+* start and end are not superpage aligned, then
+* promotion might be possible at the ends of [start,
+* end).  However, in practice, those promotion
+* attempts are so unlikely to succeed that they are
+* not worth trying.
+*/
+   mpte = pmap_enter_quick_locked(pmap, va, m, prot |
+   VM_PROT_NO_PROMOTE, mpte, &lock);
+   }
m = TAILQ_NEXT(m, listq);
}
if (lock != NULL)
@@ -6048,12 +6058,19 @@ pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, 
vm_page_t m,
 
 #if VM_NRESERVLEVEL > 0
/*
-* If both the PTP and the reservation are fully populated, then
-* attempt promotion.
+* First, attempt L3C promotion, if the virtual and physical addresses
+* are aligned with each other and an underlying reservation has the
+* neighboring L3 pages allocated.  The first condition is simply an
+* optimization that recognizes some eventual promotion failures early
+* at a lower run-time cost.  Then, attempt L2 promotion, if both the
+* PTP and the reservation are fully populated.
 */
if ((prot & VM_PROT_NO_PROMOTE) == 0 &&
-   (mpte == NULL || mpte->ref_count == NL3PG) &&
+   (va & L3C_OFFSET) == (pa & L3C_OFFSET) &&
(m->flags & PG_FICTITIOUS) == 0 &&
+   vm_reserv_is_populated(m, L3C_ENTRIES) &&
+   pmap_promote_l3c(pmap, l3, va) &&
+   (mpte == NULL || mpte->ref_count == NL3PG) &&
vm_reserv_level_iffullpop(m) == 0) {
if (l2 == NULL)
l2 = pmap_pde(pmap, va, &lvl);



git: 60847070f908 - main - vm: Eliminate a redundant call to vm_reserv_break_all()

2024-06-05 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=60847070f908c7c5ebb2ea4c851f8b98680fd01a

commit 60847070f908c7c5ebb2ea4c851f8b98680fd01a
Author: Alan Cox 
AuthorDate: 2024-06-05 06:40:20 +
Commit: Alan Cox 
CommitDate: 2024-06-05 17:39:47 +

vm: Eliminate a redundant call to vm_reserv_break_all()

When vm_object_collapse() was changed in commit 98087a0 to call
vm_object_terminate(), rather than destroying the object directly, its
call to vm_reserv_break_all() should have been removed, as
vm_object_terminate() calls vm_reserv_break_all().

Reviewed by:kib, markj
MFC after:  1 week
Differential Revision:  https://reviews.freebsd.org/D45495
---
 sys/vm/vm_object.c | 8 
 1 file changed, 8 deletions(-)

diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c
index 905df5454355..0af4402938ba 100644
--- a/sys/vm/vm_object.c
+++ b/sys/vm/vm_object.c
@@ -1953,14 +1953,6 @@ vm_object_collapse(vm_object_t object)
 */
vm_object_collapse_scan(object);
 
-#if VM_NRESERVLEVEL > 0
-   /*
-* Break any reservations from backing_object.
-*/
-   if (__predict_false(!LIST_EMPTY(&backing_object->rvq)))
-   vm_reserv_break_all(backing_object);
-#endif
-
/*
 * Move the pager from backing_object to object.
 *



git: 9fabf97682ce - main - arm64: fix free queue and reservation configuration for 16KB pages

2024-03-24 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=9fabf97682ce494865c8b26c218f2d00a36c99ea

commit 9fabf97682ce494865c8b26c218f2d00a36c99ea
Author: Eliot Solomon 
AuthorDate: 2023-11-18 21:13:21 +
Commit: Alan Cox 
CommitDate: 2024-03-24 17:22:20 +

arm64: fix free queue and reservation configuration for 16KB pages

Correctly configure the free page queues and the reservation size when
the base page size is 16KB.  In particular, the reservation size was
less than the L2 Block size, making L2 promotions and mappings all but
impossible.

Reviewed by:markj
Tested by:  gallatin
Differential Revision:  https://reviews.freebsd.org/D42737
---
 sys/arm64/arm64/copyinout.S |  1 +
 sys/arm64/include/vmparam.h | 18 +-
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/sys/arm64/arm64/copyinout.S b/sys/arm64/arm64/copyinout.S
index 005fa61bfe82..23f56ae85daa 100644
--- a/sys/arm64/arm64/copyinout.S
+++ b/sys/arm64/arm64/copyinout.S
@@ -30,6 +30,7 @@
 #include 
 #include 
 
+#include 
 #include 
 
 #include "assym.inc"
diff --git a/sys/arm64/include/vmparam.h b/sys/arm64/include/vmparam.h
index 0967d3c0aedf..d5d4a5691f37 100644
--- a/sys/arm64/include/vmparam.h
+++ b/sys/arm64/include/vmparam.h
@@ -99,8 +99,17 @@
  * are used by UMA, the physical memory allocator reduces the likelihood of
  * both 2MB page TLB misses and cache misses during the page table walk when
  * a 2MB page TLB miss does occur.
+ *
+ * When PAGE_SIZE is 16KB, an allocation size of 32MB is supported.  This
+ * size is used by level 0 reservations and L2 BLOCK mappings.
  */
+#if PAGE_SIZE == PAGE_SIZE_4K
 #defineVM_NFREEORDER   13
+#elif PAGE_SIZE == PAGE_SIZE_16K
+#defineVM_NFREEORDER   12
+#else
+#error Unsupported page size
+#endif
 
 /*
  * Enable superpage reservations: 1 level.
@@ -110,10 +119,17 @@
 #endif
 
 /*
- * Level 0 reservations consist of 512 pages.
+ * Level 0 reservations consist of 512 pages when PAGE_SIZE is 4KB, and
+ * 2048 pages when PAGE_SIZE is 16KB.
  */
 #ifndefVM_LEVEL_0_ORDER
+#if PAGE_SIZE == PAGE_SIZE_4K
 #defineVM_LEVEL_0_ORDER9
+#elif PAGE_SIZE == PAGE_SIZE_16K
+#defineVM_LEVEL_0_ORDER11
+#else
+#error Unsupported page size
+#endif
 #endif
 
 /**



git: fd6cb031f577 - main - arm64 pmap: Add ATTR_CONTIGUOUS support [Part 1]

2024-03-30 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=fd6cb031f577a449894e73daa8f6bd309ba27c73

commit fd6cb031f577a449894e73daa8f6bd309ba27c73
Author: Eliot Solomon 
AuthorDate: 2024-03-24 19:01:47 +
Commit: Alan Cox 
CommitDate: 2024-03-30 18:37:17 +

arm64 pmap: Add ATTR_CONTIGUOUS support [Part 1]

The ATTR_CONTIGUOUS bit within an L3 page table entry designates that
L3 page as being part of an aligned, physically contiguous collection
of L3 pages.  For example, 16 aligned, physically contiguous 4 KB pages
can form a 64 KB superpage, occupying a single TLB entry.  While this
change only creates ATTR_CONTIGUOUS mappings in a few places,
specifically, the direct map and pmap_kenter{,_device}(), it adds all
of the necessary code for handling them once they exist, including
demotion, protection, and removal.  Consequently, new ATTR_CONTIGUOUS
usage can be added (and tested) incrementally.

Modify the implementation of sysctl vm.pmap.kernel_maps so that it
correctly reports the number of ATTR_CONTIGUOUS mappings on machines
configured to use a 16 KB base page size, where an ATTR_CONTIGUOUS
mapping consists of 128 base pages.

Additionally, this change adds support for creating L2 superpage
mappings to pmap_kenter{,_device}().

Reviewed by:markj
Tested by:  gallatin
Differential Revision:  https://reviews.freebsd.org/D42737
---
 sys/arm64/arm64/pmap.c  | 767 +---
 sys/arm64/include/pte.h |  21 ++
 2 files changed, 740 insertions(+), 48 deletions(-)

diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index ba72f1dac8d0..12e5e1d73b38 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -461,18 +461,33 @@ static bool pmap_activate_int(pmap_t pmap);
 static void pmap_alloc_asid(pmap_t pmap);
 static int pmap_change_props_locked(vm_offset_t va, vm_size_t size,
 vm_prot_t prot, int mode, bool skip_unmapped);
+static bool pmap_copy_l3c(pmap_t pmap, pt_entry_t *l3p, vm_offset_t va,
+pt_entry_t l3e, vm_page_t ml3, struct rwlock **lockp);
 static pt_entry_t *pmap_demote_l1(pmap_t pmap, pt_entry_t *l1, vm_offset_t va);
 static pt_entry_t *pmap_demote_l2_locked(pmap_t pmap, pt_entry_t *l2,
 vm_offset_t va, struct rwlock **lockp);
 static pt_entry_t *pmap_demote_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t va);
+static bool pmap_demote_l3c(pmap_t pmap, pt_entry_t *l3p, vm_offset_t va);
 static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
 vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp);
 static int pmap_enter_l2(pmap_t pmap, vm_offset_t va, pd_entry_t new_l2,
 u_int flags, vm_page_t m, struct rwlock **lockp);
+static bool pmap_every_pte_zero(vm_paddr_t pa);
+static int pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte, bool promoted,
+bool all_l3e_AF_set);
+static pt_entry_t pmap_load_l3c(pt_entry_t *l3p);
+static void pmap_mask_set_l3c(pmap_t pmap, pt_entry_t *l3p, vm_offset_t va,
+vm_offset_t *vap, vm_offset_t va_next, pt_entry_t mask, pt_entry_t nbits);
+static bool pmap_pv_insert_l3c(pmap_t pmap, vm_offset_t va, vm_page_t m,
+struct rwlock **lockp);
+static void pmap_remove_kernel_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t va);
 static int pmap_remove_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t sva,
 pd_entry_t l1e, struct spglist *free, struct rwlock **lockp);
 static int pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t sva,
 pd_entry_t l2e, struct spglist *free, struct rwlock **lockp);
+static bool pmap_remove_l3c(pmap_t pmap, pt_entry_t *l3p, vm_offset_t va,
+vm_offset_t *vap, vm_offset_t va_next, vm_page_t ml3, struct spglist *free,
+struct rwlock **lockp);
 static void pmap_reset_asid_set(pmap_t pmap);
 static bool pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va,
 vm_page_t m, struct rwlock **lockp);
@@ -483,6 +498,8 @@ static vm_page_t _pmap_alloc_l3(pmap_t pmap, vm_pindex_t 
ptepindex,
 static void _pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m,
 struct spglist *free);
 static int pmap_unuse_pt(pmap_t, vm_offset_t, pd_entry_t, struct spglist *);
+static void pmap_update_entry(pmap_t pmap, pd_entry_t *pte, pd_entry_t newpte,
+vm_offset_t va, vm_size_t size);
 static __inline vm_page_t pmap_remove_pt_page(pmap_t pmap, vm_offset_t va);
 
 static uma_zone_t pmap_bti_ranges_zone;
@@ -1121,19 +1138,20 @@ pmap_bootstrap_l2_block(struct pmap_bootstrap_state 
*state, int i)
 static void
 pmap_bootstrap_l3_page(struct pmap_bootstrap_state *state, int i)
 {
+   pt_entry_t contig;
u_int l3_slot;
bool first;
 
-   if ((physmap[i + 1] - state->pa) < L3_SIZE)
+   if (physmap[i + 1] - state->pa < L3_SIZE)
return;
 
/* Make sure there is a valid L2 table */
pmap_bootstrap_l2_table(state);
 
MPASS((state->va &a

git: e0388a906ca7 - main - arm64: enable superpage mappings by pmap_mapdev{,_attr}()

2024-03-30 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=e0388a906ca77d07c99e8762d47dccaaaefd8bab

commit e0388a906ca77d07c99e8762d47dccaaaefd8bab
Author: Alan Cox 
AuthorDate: 2024-03-30 20:35:32 +
Commit: Alan Cox 
CommitDate: 2024-03-30 20:41:30 +

arm64: enable superpage mappings by pmap_mapdev{,_attr}()

In order for pmap_kenter{,_device}() to create superpage mappings,
either 64 KB or 2 MB, pmap_mapdev{,_attr}() must request appropriately
aligned virtual addresses.

Reviewed by:markj
Tested by:  gallatin
Differential Revision:  https://reviews.freebsd.org/D42737
---
 sys/kern/subr_devmap.c | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/sys/kern/subr_devmap.c b/sys/kern/subr_devmap.c
index 5976f16c7577..441ffeb1270a 100644
--- a/sys/kern/subr_devmap.c
+++ b/sys/kern/subr_devmap.c
@@ -273,6 +273,13 @@ pmap_mapdev(vm_paddr_t pa, vm_size_t size)
KASSERT(va >= VM_MAX_KERNEL_ADDRESS - PMAP_MAPDEV_EARLY_SIZE,
("Too many early devmap mappings"));
} else
+#endif
+#ifdef __aarch64__
+   if (size >= L2_SIZE && (pa & L2_OFFSET) == 0)
+   va = kva_alloc_aligned(size, L2_SIZE);
+   else if (size >= L3C_SIZE && (pa & L3C_OFFSET) == 0)
+   va = kva_alloc_aligned(size, L3C_SIZE);
+   else
 #endif
va = kva_alloc(size);
if (!va)
@@ -304,6 +311,13 @@ pmap_mapdev_attr(vm_paddr_t pa, vm_size_t size, 
vm_memattr_t ma)
KASSERT(va >= (VM_MAX_KERNEL_ADDRESS - 
(PMAP_MAPDEV_EARLY_SIZE)),
("Too many early devmap mappings 2"));
} else
+#ifdef __aarch64__
+   if (size >= L2_SIZE && (pa & L2_OFFSET) == 0)
+   va = kva_alloc_aligned(size, L2_SIZE);
+   else if (size >= L3C_SIZE && (pa & L3C_OFFSET) == 0)
+   va = kva_alloc_aligned(size, L3C_SIZE);
+   else
+#endif
va = kva_alloc(size);
if (!va)
panic("pmap_mapdev: Couldn't alloc kernel virtual memory");



git: 22c098843127 - main - arm64: correctly handle a failed BTI check in pmap_enter_l2()

2024-04-03 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=22c098843127f6a31e25e94b07b35677f038f6d6

commit 22c098843127f6a31e25e94b07b35677f038f6d6
Author: Alan Cox 
AuthorDate: 2024-04-03 05:21:08 +
Commit: Alan Cox 
CommitDate: 2024-04-03 16:19:30 +

arm64: correctly handle a failed BTI check in pmap_enter_l2()

If pmap_enter_l2() does not create a mapping because the BTI check
fails, then we should release the reference on the page table page
acquired from pmap_alloc_l2().  Otherwise, the page table page will
never be reclaimed.
---
 sys/arm64/arm64/pmap.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index 12e5e1d73b38..258aa141653b 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -5269,8 +5269,11 @@ pmap_enter_l2(pmap_t pmap, vm_offset_t va, pd_entry_t 
new_l2, u_int flags,
 * and let vm_fault() cope.  Check after l2 allocation, since
 * it could sleep.
 */
-   if (!pmap_bti_same(pmap, va, va + L2_SIZE))
+   if (!pmap_bti_same(pmap, va, va + L2_SIZE)) {
+   KASSERT(l2pg != NULL, ("pmap_enter_l2: missing L2 PTP"));
+   pmap_abort_ptp(pmap, va, l2pg);
return (KERN_PROTECTION_FAILURE);
+   }
 
/*
 * If there are existing mappings, either abort or remove them.



git: 7beeacb27b27 - main - Honor the vm page's PG_NODUMP flag on arm and i386.

2021-01-04 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=7beeacb27b2792dfdab9c806c00d50b6ac9fc34b

commit 7beeacb27b2792dfdab9c806c00d50b6ac9fc34b
Author: Alan Cox 
AuthorDate: 2021-01-04 21:59:05 +
Commit: Alan Cox 
CommitDate: 2021-01-04 22:15:42 +

Honor the vm page's PG_NODUMP flag on arm and i386.

Reviewed by:kib, markj
MFC after:  1 week
Differential Revision:  https://reviews.freebsd.org/D27949
---
 sys/arm/arm/minidump_machdep.c| 3 +++
 sys/i386/i386/minidump_machdep_base.c | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/sys/arm/arm/minidump_machdep.c b/sys/arm/arm/minidump_machdep.c
index 92e15bec860b..c5f9cb58302e 100644
--- a/sys/arm/arm/minidump_machdep.c
+++ b/sys/arm/arm/minidump_machdep.c
@@ -68,8 +68,11 @@ static uint64_t counter, progress;
 static int
 is_dumpable(vm_paddr_t pa)
 {
+   vm_page_t m;
int i;
 
+   if ((m = vm_phys_paddr_to_vm_page(pa)) != NULL)
+   return ((m->flags & PG_NODUMP) == 0);
for (i = 0; dump_avail[i] != 0 || dump_avail[i + 1] != 0; i += 2) {
if (pa >= dump_avail[i] && pa < dump_avail[i + 1])
return (1);
diff --git a/sys/i386/i386/minidump_machdep_base.c 
b/sys/i386/i386/minidump_machdep_base.c
index 9e803c6fd813..e2b4234eba11 100644
--- a/sys/i386/i386/minidump_machdep_base.c
+++ b/sys/i386/i386/minidump_machdep_base.c
@@ -65,8 +65,11 @@ static uint64_t counter, progress;
 static int
 is_dumpable(vm_paddr_t pa)
 {
+   vm_page_t m;
int i;
 
+   if ((m = vm_phys_paddr_to_vm_page(pa)) != NULL)
+   return ((m->flags & PG_NODUMP) == 0);
for (i = 0; dump_avail[i] != 0 || dump_avail[i + 1] != 0; i += 2) {
if (pa >= dump_avail[i] && pa < dump_avail[i + 1])
return (1);
___
dev-commits-src-main@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/dev-commits-src-main
To unsubscribe, send any mail to "dev-commits-src-main-unsubscr...@freebsd.org"


git: 5a181b8bce99 - main - Prefer the use of vm_page_domain() to vm_phys_domain().

2021-01-10 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=5a181b8bce9958be9e3c2b3840f5a56b712c108e

commit 5a181b8bce9958be9e3c2b3840f5a56b712c108e
Author: Alan Cox 
AuthorDate: 2021-01-10 08:51:33 +
Commit: Alan Cox 
CommitDate: 2021-01-10 19:25:33 +

Prefer the use of vm_page_domain() to vm_phys_domain().

When we already have the vm page in hand, use vm_page_domain() instead
of vm_phys_domain().  The former has a trivial constant-time
implementation whereas the latter iterates over the mem_affinity array.

Reviewed by:kib, markj
Differential Revision:  https://reviews.freebsd.org/D28005
---
 sys/amd64/amd64/pmap.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index 5267203d7473..2ec303d687a7 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -5259,7 +5259,7 @@ retry:
pc->pc_map[0] = PC_FREE0 & ~1ul;/* preallocated bit 0 */
pc->pc_map[1] = PC_FREE1;
pc->pc_map[2] = PC_FREE2;
-   pvc = &pv_chunks[vm_phys_domain(m->phys_addr)];
+   pvc = &pv_chunks[vm_page_domain(m)];
mtx_lock(&pvc->pvc_lock);
TAILQ_INSERT_TAIL(&pvc->pvc_list, pc, pc_lru);
mtx_unlock(&pvc->pvc_lock);
@@ -5360,7 +5360,7 @@ retry:
pc->pc_map[1] = PC_FREE1;
pc->pc_map[2] = PC_FREE2;
TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
-   TAILQ_INSERT_TAIL(&new_tail[pc_to_domain(pc)], pc, pc_lru);
+   TAILQ_INSERT_TAIL(&new_tail[vm_page_domain(m)], pc, pc_lru);
PV_STAT(atomic_add_int(&pv_entry_spare, _NPCPV));
 
/*
___
dev-commits-src-main@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/dev-commits-src-main
To unsubscribe, send any mail to "dev-commits-src-main-unsubscr...@freebsd.org"


git: 6f6a166eaf5e - main - arm64: Use page_to_pvh() when the vm_page_t is known

2021-06-21 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=6f6a166eaf5e59dedb761ea6152417433a841e3b

commit 6f6a166eaf5e59dedb761ea6152417433a841e3b
Author: Alan Cox 
AuthorDate: 2021-06-21 07:45:21 +
Commit: Alan Cox 
CommitDate: 2021-06-21 22:25:06 +

arm64: Use page_to_pvh() when the vm_page_t is known

When support for a sparse pv_table was added, the implementation of
pa_to_pvh() changed from a simple constant-time calculation to iterating
over the array vm_phys_segs[].  To mitigate this issue, an alternative
function, page_to_pvh(), was introduced that still runs in constant time
but requires the vm_page_t to be known.  However, three cases where the
vm_page_t is known were not converted to page_to_pvh().  This change
converts those three cases.

Reviewed by:kib, markj
MFC after:  1 week
Differential Revision:  https://reviews.freebsd.org/D30832
---
 sys/arm64/arm64/pmap.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index ffc83be852bd..5f321be98528 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -3474,7 +3474,7 @@ pmap_pv_promote_l2(pmap_t pmap, vm_offset_t va, 
vm_paddr_t pa,
va = va & ~L2_OFFSET;
pv = pmap_pvh_remove(&m->md, pmap, va);
KASSERT(pv != NULL, ("pmap_pv_promote_l2: pv not found"));
-   pvh = pa_to_pvh(pa);
+   pvh = page_to_pvh(m);
TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next);
pvh->pv_gen++;
/* Free the remaining NPTEPG - 1 pv entries. */
@@ -3896,7 +3896,7 @@ havel3:
if ((om->a.flags & PGA_WRITEABLE) != 0 &&
TAILQ_EMPTY(&om->md.pv_list) &&
((om->flags & PG_FICTITIOUS) != 0 ||
-   TAILQ_EMPTY(&pa_to_pvh(opa)->pv_list)))
+   TAILQ_EMPTY(&page_to_pvh(om)->pv_list)))
vm_page_aflag_clear(om, PGA_WRITEABLE);
} else {
KASSERT((orig_l3 & ATTR_AF) != 0,
@@ -5000,7 +5000,7 @@ pmap_remove_pages(pmap_t pmap)
case 1:
pmap_resident_count_dec(pmap,
L2_SIZE / PAGE_SIZE);
-   pvh = pa_to_pvh(tpte & ~ATTR_MASK);
+   pvh = page_to_pvh(m);
TAILQ_REMOVE(&pvh->pv_list, pv,pv_next);
pvh->pv_gen++;
if (TAILQ_EMPTY(&pvh->pv_list)) {
___
dev-commits-src-main@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/dev-commits-src-main
To unsubscribe, send any mail to "dev-commits-src-main-unsubscr...@freebsd.org"


git: 62ea198e95f1 - main - arm64: remove an unneeded test from pmap_clear_modify()

2021-06-23 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=62ea198e95f139e6b8041ec44f75d65aa26970d0

commit 62ea198e95f139e6b8041ec44f75d65aa26970d0
Author: Alan Cox 
AuthorDate: 2021-06-23 05:10:20 +
Commit: Alan Cox 
CommitDate: 2021-06-23 19:22:46 +

arm64: remove an unneeded test from pmap_clear_modify()

The page table entry for a 4KB page mapping must be valid if a PV entry
for the mapping exists, so there is no point in testing each page table
entry's validity when iterating over a PV list.

Reviewed by:kib, markj
MFC after:  1 week
Differential Revision:  https://reviews.freebsd.org/D30875
---
 sys/arm64/arm64/pmap.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index 5f321be98528..7def96bca70b 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -5664,8 +5664,7 @@ restart:
l2 = pmap_l2(pmap, pv->pv_va);
l3 = pmap_l2_to_l3(l2, pv->pv_va);
oldl3 = pmap_load(l3);
-   if (pmap_l3_valid(oldl3) &&
-   (oldl3 & (ATTR_S1_AP_RW_BIT | ATTR_SW_DBM)) == ATTR_SW_DBM){
+   if ((oldl3 & (ATTR_S1_AP_RW_BIT | ATTR_SW_DBM)) == ATTR_SW_DBM){
pmap_set_bits(l3, ATTR_S1_AP(ATTR_S1_AP_RO));
pmap_invalidate_page(pmap, pv->pv_va);
}
___
dev-commits-src-main@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/dev-commits-src-main
To unsubscribe, send any mail to "dev-commits-src-main-unsubscr...@freebsd.org"


git: 4c57d6d55516 - main - amd64/pmap: fix user page table page accounting

2021-12-05 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=4c57d6d5551629df348e2087d2382ae7cbf8b312

commit 4c57d6d5551629df348e2087d2382ae7cbf8b312
Author: Alan Cox 
AuthorDate: 2021-12-05 23:40:53 +
Commit: Alan Cox 
CommitDate: 2021-12-06 01:13:43 +

amd64/pmap: fix user page table page accounting

When a superpage mapping is destroyed and the original page table page
containing 4KB mappings that was being held in reserve is deallocated,
the recently introduced user page table page count was not being
decremented.  Consequentially, the count was wrong and would grow over
time.  For example, after multiple iterations of "buildworld", I was
seeing implausible counts, like the following:

vm.pmap.kernel_pt_page_count: 2184
vm.pmap.user_pt_page_count: 2280849
vm.pmap.pv_page_count: 106

With this change, I now see:

vm.pmap.kernel_pt_page_count: 2183
vm.pmap.user_pt_page_count: 344
vm.pmap.pv_page_count: 105

Reviewed by:kib, markj
Differential Revision:  https://reviews.freebsd.org/D33276
---
 sys/amd64/amd64/pmap.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index e9973a420de3..153664698e43 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -6140,7 +6140,7 @@ pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t 
sva,
if (mpte != NULL) {
KASSERT(mpte->valid == VM_PAGE_BITS_ALL,
("pmap_remove_pde: pte page not promoted"));
-   pmap_resident_count_adj(pmap, -1);
+   pmap_pt_page_count_adj(pmap, -1);
KASSERT(mpte->ref_count == NPTEPG,
("pmap_remove_pde: pte page ref count error"));
mpte->ref_count = 0;
@@ -8408,7 +8408,7 @@ pmap_remove_pages(pmap_t pmap)
if (mpte != NULL) {
KASSERT(mpte->valid == 
VM_PAGE_BITS_ALL,
("pmap_remove_pages: pte 
page not promoted"));
-   pmap_resident_count_adj(pmap, 
-1);
+   pmap_pt_page_count_adj(pmap, 
-1);
KASSERT(mpte->ref_count == 
NPTEPG,
("pmap_remove_pages: pte 
page reference count error"));
mpte->ref_count = 0;



git: b7ec0d268b73 - main - arm64: Introduce and use pmap_pte_exists()

2021-12-23 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=b7ec0d268b73ce20c4f785d21cde9b174c91a553

commit b7ec0d268b73ce20c4f785d21cde9b174c91a553
Author: Alan Cox 
AuthorDate: 2021-12-23 18:50:14 +
Commit: Alan Cox 
CommitDate: 2021-12-24 04:56:02 +

arm64: Introduce and use pmap_pte_exists()

Use pmap_pte_exists() instead of pmap_pte() when the caller expects a
mapping to exist at a particular level.  The caller benefits in two
ways from using pmap_pte_exists().  First, because the level is
specified to pmap_pte_exists() as a constant, rather than returned, the
compiler can specialize the implementation of pmap_pte_exists() to the
caller's exact needs, i.e., generate fewer instructions.  Consequently,
within a GENERIC-NODEBUG kernel, 704 bytes worth of instructions are
eliminated from the inner loops of various pmap functions.  Second,
suppose that the mapping doesn't exist.  Rather than requiring every
caller to implement its own KASSERT()s to report missing mappings, the
caller can optionally have pmap_pte_exists() provide the KASSERT().

Reviewed by:andrew, kib
Tested by:  andrew (an earlier version)
MFC after:  1 week
Differential Revision:  https://reviews.freebsd.org/D33597
---
 sys/arm64/arm64/pmap.c | 90 ++
 1 file changed, 55 insertions(+), 35 deletions(-)

diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index e69d0f9c1d81..0d1e604f22ef 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -596,6 +596,46 @@ pmap_pte(pmap_t pmap, vm_offset_t va, int *level)
return (l3);
 }
 
+/*
+ * If the given pmap has an L{1,2}_BLOCK or L3_PAGE entry at the specified
+ * level that maps the specified virtual address, then a pointer to that entry
+ * is returned.  Otherwise, NULL is returned, unless INVARIANTS are enabled
+ * and a diagnostic message is provided, in which case this function panics.
+ */
+static __always_inline pt_entry_t *
+pmap_pte_exists(pmap_t pmap, vm_offset_t va, int level, const char *diag)
+{
+   pd_entry_t *l0p, *l1p, *l2p;
+   pt_entry_t desc, *l3p;
+
+   KASSERT(level >= 0 && level < 4,
+   ("%s: %s passed an out-of-range level (%d)", __func__, diag,
+   level));
+   l0p = pmap_l0(pmap, va);
+   desc = pmap_load(l0p) & ATTR_DESCR_MASK;
+   if (desc == L0_TABLE && level > 0) {
+   l1p = pmap_l0_to_l1(l0p, va);
+   desc = pmap_load(l1p) & ATTR_DESCR_MASK;
+   if (desc == L1_BLOCK && level == 1)
+   return (l1p);
+   else if (desc == L1_TABLE && level > 1) {
+   l2p = pmap_l1_to_l2(l1p, va);
+   desc = pmap_load(l2p) & ATTR_DESCR_MASK;
+   if (desc == L2_BLOCK && level == 2)
+   return (l2p);
+   else if (desc == L2_TABLE && level > 2) {
+   l3p = pmap_l2_to_l3(l2p, va);
+   desc = pmap_load(l3p) & ATTR_DESCR_MASK;
+   if (desc == L3_PAGE && level == 3)
+   return (l3p);
+   }
+   }
+   }
+   KASSERT(diag == NULL,
+   ("%s: va %#lx is not mapped at level %d", diag, va, level));
+   return (NULL);
+}
+
 bool
 pmap_ps_enabled(pmap_t pmap __unused)
 {
@@ -1483,12 +1523,8 @@ PMAP_INLINE void
 pmap_kremove(vm_offset_t va)
 {
pt_entry_t *pte;
-   int lvl;
-
-   pte = pmap_pte(kernel_pmap, va, &lvl);
-   KASSERT(pte != NULL, ("pmap_kremove: Invalid address"));
-   KASSERT(lvl == 3, ("pmap_kremove: Invalid pte level %d", lvl));
 
+   pte = pmap_pte_exists(kernel_pmap, va, 3, __func__);
pmap_clear(pte);
pmap_invalidate_page(kernel_pmap, va);
 }
@@ -1498,7 +1534,6 @@ pmap_kremove_device(vm_offset_t sva, vm_size_t size)
 {
pt_entry_t *pte;
vm_offset_t va;
-   int lvl;
 
KASSERT((sva & L3_OFFSET) == 0,
   ("pmap_kremove_device: Invalid virtual address"));
@@ -1507,10 +1542,7 @@ pmap_kremove_device(vm_offset_t sva, vm_size_t size)
 
va = sva;
while (size != 0) {
-   pte = pmap_pte(kernel_pmap, va, &lvl);
-   KASSERT(pte != NULL, ("Invalid page table, va: 0x%lx", va));
-   KASSERT(lvl == 3,
-   ("Invalid device pagetable level: %d != 3", lvl));
+   pte = pmap_pte_exists(kernel_pmap, va, 3, __func__);
pmap_clear(pte);
 
va += PAGE_SIZE;
@@ -1584,7 +1616,6 @@ pmap_qremove(vm_offset_t sva, int count)
 {
pt_entry_t *pte;
vm

git: 03f9cc89e1f5 - main - arm64: Fix "set-but-not-used" warnings in the pmap

2021-12-27 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=03f9cc89e1f5ddf5f54785cb10f551ab94d139ac

commit 03f9cc89e1f5ddf5f54785cb10f551ab94d139ac
Author: Alan Cox 
AuthorDate: 2021-12-27 17:37:04 +
Commit: Alan Cox 
CommitDate: 2021-12-27 17:48:15 +

arm64: Fix "set-but-not-used" warnings in the pmap

MFC after:  1 week
---
 sys/arm64/arm64/pmap.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index 0d1e604f22ef..6d12f66807c3 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -2113,7 +2113,7 @@ retry:
 void
 pmap_release(pmap_t pmap)
 {
-   boolean_t rv;
+   boolean_t rv __diagused;
struct spglist free;
struct asid_set *set;
vm_page_t m;
@@ -2839,7 +2839,7 @@ pmap_pv_insert_l2(pmap_t pmap, vm_offset_t va, pd_entry_t 
l2e, u_int flags,
 static void
 pmap_remove_kernel_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t va)
 {
-   pt_entry_t newl2, oldl2;
+   pt_entry_t newl2, oldl2 __diagused;
vm_page_t ml3;
vm_paddr_t ml3pa;
 
@@ -5376,7 +5376,7 @@ pmap_ts_referenced(vm_page_t m)
pv_entry_t pv, pvf;
pmap_t pmap;
struct rwlock *lock;
-   pd_entry_t *pde, tpde;
+   pd_entry_t *pde, tpde __diagused;
pt_entry_t *pte, tpte;
vm_offset_t va;
vm_paddr_t pa;
@@ -6918,7 +6918,7 @@ pmap_map_io_transient(vm_page_t page[], vm_offset_t 
vaddr[], int count,
 {
vm_paddr_t paddr;
boolean_t needs_mapping;
-   int error, i;
+   int error __diagused, i;
 
/*
 * Allocate any KVA space that we need, this is done in a separate



git: e161dfa91897 - main - Fix pmap_is_prefaultable() on arm64 and riscv

2021-12-27 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=e161dfa918974b4392c7c5127bd51f28ea5f8b6a

commit e161dfa918974b4392c7c5127bd51f28ea5f8b6a
Author: Alan Cox 
AuthorDate: 2021-12-25 03:54:01 +
Commit: Alan Cox 
CommitDate: 2021-12-28 01:17:14 +

Fix pmap_is_prefaultable() on arm64 and riscv

The current implementations never correctly return TRUE. In all cases,
when they currently return TRUE, they should have returned FALSE.  And,
in some cases, when they currently return FALSE, they should have
returned TRUE.  Except for its effects on performance, specifically,
additional page faults and pointless calls to pmap_enter_quick() that
abort, this error is harmless.  That is why it has gone unnoticed.

Add a comment to the amd64, arm64, and riscv implementations
describing how their return values are computed.

Reviewed by:kib, markj
MFC after:  1 week
Differential Revision:  https://reviews.freebsd.org/D33659
---
 sys/amd64/amd64/pmap.c |  5 +
 sys/arm64/arm64/pmap.c | 12 +---
 sys/riscv/riscv/pmap.c |  6 +-
 3 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index f6efce1303d4..42ad1bd24136 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -8567,6 +8567,11 @@ pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
boolean_t rv;
 
PG_V = pmap_valid_bit(pmap);
+
+   /*
+* Return TRUE if and only if the PTE for the specified virtual
+* address is allocated but invalid.
+*/
rv = FALSE;
PMAP_LOCK(pmap);
pde = pmap_pde(pmap, addr);
diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index 6d12f66807c3..4bd3eef7a18f 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -5246,15 +5246,21 @@ pmap_is_modified(vm_page_t m)
 boolean_t
 pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
 {
+   pd_entry_t *pde;
pt_entry_t *pte;
boolean_t rv;
int lvl;
 
+   /*
+* Return TRUE if and only if the L3 entry for the specified virtual
+* address is allocated but invalid.
+*/
rv = FALSE;
PMAP_LOCK(pmap);
-   pte = pmap_pte(pmap, addr, &lvl);
-   if (pte != NULL && pmap_load(pte) != 0) {
-   rv = TRUE;
+   pde = pmap_pde(pmap, addr, &lvl);
+   if (pde != NULL && lvl == 2) {
+   pte = pmap_l2_to_l3(pde, addr);
+   rv = pmap_load(pte) == 0;
}
PMAP_UNLOCK(pmap);
return (rv);
diff --git a/sys/riscv/riscv/pmap.c b/sys/riscv/riscv/pmap.c
index 9abf75a731f5..1dc62418b165 100644
--- a/sys/riscv/riscv/pmap.c
+++ b/sys/riscv/riscv/pmap.c
@@ -3850,10 +3850,14 @@ pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
pt_entry_t *l3;
boolean_t rv;
 
+   /*
+* Return TRUE if and only if the L3 entry for the specified virtual
+* address is allocated but invalid.
+*/
rv = FALSE;
PMAP_LOCK(pmap);
l3 = pmap_l3(pmap, addr);
-   if (l3 != NULL && pmap_load(l3) != 0) {
+   if (l3 != NULL && pmap_load(l3) == 0) {
rv = TRUE;
}
PMAP_UNLOCK(pmap);



git: 3c2ee7b28cfd - main - arm64: Enhance pmap_pte_exists()'s error reporting

2021-12-28 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=3c2ee7b28cfd715e28e72d76efd89ba3c38aa970

commit 3c2ee7b28cfd715e28e72d76efd89ba3c38aa970
Author: Alan Cox 
AuthorDate: 2021-12-28 23:17:42 +
Commit: Alan Cox 
CommitDate: 2021-12-28 23:46:21 +

arm64: Enhance pmap_pte_exists()'s error reporting

Report the descriptor type and level at which the page table does not
match the caller's expectations.

MFC after:  1 week
---
 sys/arm64/arm64/pmap.c | 15 +++
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index 4bd3eef7a18f..15bb813f591d 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -607,6 +607,7 @@ pmap_pte_exists(pmap_t pmap, vm_offset_t va, int level, 
const char *diag)
 {
pd_entry_t *l0p, *l1p, *l2p;
pt_entry_t desc, *l3p;
+   int walk_level __diagused;
 
KASSERT(level >= 0 && level < 4,
("%s: %s passed an out-of-range level (%d)", __func__, diag,
@@ -628,11 +629,17 @@ pmap_pte_exists(pmap_t pmap, vm_offset_t va, int level, 
const char *diag)
desc = pmap_load(l3p) & ATTR_DESCR_MASK;
if (desc == L3_PAGE && level == 3)
return (l3p);
-   }
-   }
-   }
+   else
+   walk_level = 3;
+   } else
+   walk_level = 2;
+   } else
+   walk_level = 1;
+   } else
+   walk_level = 0;
KASSERT(diag == NULL,
-   ("%s: va %#lx is not mapped at level %d", diag, va, level));
+   ("%s: va %#lx not mapped at level %d, desc %ld at level %d",
+   diag, va, level, desc, walk_level));
return (NULL);
 }
 



git: 24b82aa0c543 - main - arm64: Simplify pmap_ts_referenced

2021-12-28 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=24b82aa0c543cc6d63bfbde651b2325ae360dc50

commit 24b82aa0c543cc6d63bfbde651b2325ae360dc50
Author: Alan Cox 
AuthorDate: 2021-12-28 00:27:52 +
Commit: Alan Cox 
CommitDate: 2021-12-28 23:59:39 +

arm64: Simplify pmap_ts_referenced

Use pmap_pte_exists() in place of multiple KASSERT()s.

Eliminate an unnecessary NULL check.

MFC after:  1 week
---
 sys/arm64/arm64/pmap.c | 26 +-
 1 file changed, 5 insertions(+), 21 deletions(-)

diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index 15bb813f591d..68164708dce9 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -5389,11 +5389,10 @@ pmap_ts_referenced(vm_page_t m)
pv_entry_t pv, pvf;
pmap_t pmap;
struct rwlock *lock;
-   pd_entry_t *pde, tpde __diagused;
pt_entry_t *pte, tpte;
vm_offset_t va;
vm_paddr_t pa;
-   int cleared, lvl, md_gen, not_cleared, pvh_gen;
+   int cleared, md_gen, not_cleared, pvh_gen;
struct spglist free;
 
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
@@ -5424,14 +5423,7 @@ retry:
}
}
va = pv->pv_va;
-   pde = pmap_pde(pmap, va, &lvl);
-   KASSERT(pde != NULL, ("pmap_ts_referenced: no l1 table found"));
-   KASSERT(lvl == 1,
-   ("pmap_ts_referenced: invalid pde level %d", lvl));
-   tpde = pmap_load(pde);
-   KASSERT((tpde & ATTR_DESCR_MASK) == L1_TABLE,
-   ("pmap_ts_referenced: found an invalid l1 table"));
-   pte = pmap_l1_to_l2(pde, va);
+   pte = pmap_pte_exists(pmap, va, 2, __func__);
tpte = pmap_load(pte);
if (pmap_pte_dirty(pmap, tpte)) {
/*
@@ -5441,7 +5433,6 @@ retry:
 */
vm_page_dirty(m);
}
-
if ((tpte & ATTR_AF) != 0) {
/*
 * Since this reference bit is shared by 512 4KB pages,
@@ -5472,7 +5463,7 @@ retry:
}
PMAP_UNLOCK(pmap);
/* Rotate the PV list if it has more than one entry. */
-   if (pv != NULL && TAILQ_NEXT(pv, pv_next) != NULL) {
+   if (TAILQ_NEXT(pv, pv_next) != NULL) {
TAILQ_REMOVE(&pvh->pv_list, pv, pv_next);
TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next);
pvh->pv_gen++;
@@ -5499,14 +5490,7 @@ small_mappings:
goto retry;
}
}
-   pde = pmap_pde(pmap, pv->pv_va, &lvl);
-   KASSERT(pde != NULL, ("pmap_ts_referenced: no l2 table found"));
-   KASSERT(lvl == 2,
-   ("pmap_ts_referenced: invalid pde level %d", lvl));
-   tpde = pmap_load(pde);
-   KASSERT((tpde & ATTR_DESCR_MASK) == L2_TABLE,
-   ("pmap_ts_referenced: found an invalid l2 table"));
-   pte = pmap_l2_to_l3(pde, pv->pv_va);
+   pte = pmap_pte_exists(pmap, pv->pv_va, 3, __func__);
tpte = pmap_load(pte);
if (pmap_pte_dirty(pmap, tpte))
vm_page_dirty(m);
@@ -5520,7 +5504,7 @@ small_mappings:
}
PMAP_UNLOCK(pmap);
/* Rotate the PV list if it has more than one entry. */
-   if (pv != NULL && TAILQ_NEXT(pv, pv_next) != NULL) {
+   if (TAILQ_NEXT(pv, pv_next) != NULL) {
TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
m->md.pv_gen++;



git: 5d1ee799de65 - main - arm64 pmap: Eliminate an unused global variable

2023-05-27 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=5d1ee799de65ca62cd94c1602b41255bdbc3312d

commit 5d1ee799de65ca62cd94c1602b41255bdbc3312d
Author: Alan Cox 
AuthorDate: 2023-05-27 06:23:48 +
Commit: Alan Cox 
CommitDate: 2023-05-27 06:38:20 +

arm64 pmap: Eliminate an unused global variable

The global variable "pmap_last_pa" was copied from the amd64 pmap as a
part of commit c15085278cb5 "arm64 pmap: implement per-superpage locks"
but it is neither used nor needed by the arm64 pmap.
---
 sys/arm64/arm64/pmap.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index 16e671295ca6..6bc9adba71e0 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -341,7 +341,6 @@ struct pv_chunks_list __exclusive_cache_line 
pv_chunks[PMAP_MEMDOM];
 __exclusive_cache_line static struct pmap_large_md_page pv_dummy_large;
 #define pv_dummy pv_dummy_large.pv_page
 __read_mostly static struct pmap_large_md_page *pv_table;
-__read_mostly vm_paddr_t pmap_last_pa;
 
 vm_paddr_t dmap_phys_base; /* The start of the dmap region */
 vm_paddr_t dmap_phys_max;  /* The limit of the dmap region */



git: 3e7e2bb2467e - main - arm64 pmap: Make VM_PAGE_TO_PV_LIST_LOCK() a constant-time operation

2023-05-29 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=3e7e2bb2467e8bb682176125397168c88c3913c6

commit 3e7e2bb2467e8bb682176125397168c88c3913c6
Author: Alan Cox 
AuthorDate: 2023-05-29 06:01:37 +
Commit: Alan Cox 
CommitDate: 2023-05-29 16:22:55 +

arm64 pmap: Make VM_PAGE_TO_PV_LIST_LOCK() a constant-time operation

The prior implementation of VM_PAGE_TO_PV_LIST_LOCK() performed a
linear-time search of the vm_phys_segs[] array.  However, in contrast to
PHYS_TO_PV_LIST_LOCK(), that search is unnecessary because every (non-
fictitious) vm_page contains the index of the vm_phys_seg in which it
resides.

Change most of the remaining uses of CHANGE_PV_LIST_LOCK_TO_PHYS() and
PHYS_TO_PV_LIST_LOCK() to CHANGE_PV_LIST_LOCK_TO_VM_PAGE() and
VM_PAGE_TO_PV_LIST_LOCK(), respectively.

Collectively, these changes also reduce the size of a GENERIC-NODEBUG
kernel's pmap.

Before:

  text   databss dec   hex   filename
 70144   3200   2248   75592   0x12748   pmap.o

After:

  text   databss dec   hex   filename
 69192   3200   2248   74640   0x12390   pmap.o

Reviewed by:kib, markj
Differential Revision:  https://reviews.freebsd.org/D40306
---
 sys/arm64/arm64/pmap.c | 43 ++-
 1 file changed, 26 insertions(+), 17 deletions(-)

diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index 6bc9adba71e0..150532b68c75 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -202,6 +202,10 @@ struct pmap_large_md_page {
int pv_pad[2];
 };
 
+__exclusive_cache_line static struct pmap_large_md_page pv_dummy_large;
+#define pv_dummy pv_dummy_large.pv_page
+__read_mostly static struct pmap_large_md_page *pv_table;
+
 static struct pmap_large_md_page *
 _pa_to_pmdp(vm_paddr_t pa)
 {
@@ -252,11 +256,19 @@ page_to_pmdp(vm_page_t m)
_lock;  \
 })
 
-#defineCHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa)  do {\
+static struct rwlock *
+VM_PAGE_TO_PV_LIST_LOCK(vm_page_t m)
+{
+   if ((m->flags & PG_FICTITIOUS) == 0)
+   return (&page_to_pmdp(m)->pv_lock);
+   else
+   return (&pv_dummy_large.pv_lock);
+}
+
+#defineCHANGE_PV_LIST_LOCK(lockp, new_lock)do {\
struct rwlock **_lockp = (lockp);   \
-   struct rwlock *_new_lock;   \
+   struct rwlock *_new_lock = (new_lock);  \
\
-   _new_lock = PHYS_TO_PV_LIST_LOCK(pa);   \
if (_new_lock != *_lockp) { \
if (*_lockp != NULL)\
rw_wunlock(*_lockp);\
@@ -265,8 +277,11 @@ page_to_pmdp(vm_page_t m)
}   \
 } while (0)
 
+#defineCHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa)  \
+   CHANGE_PV_LIST_LOCK(lockp, PHYS_TO_PV_LIST_LOCK(pa))
+
 #defineCHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m)\
-   CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, VM_PAGE_TO_PHYS(m))
+   CHANGE_PV_LIST_LOCK(lockp, VM_PAGE_TO_PV_LIST_LOCK(m))
 
 #defineRELEASE_PV_LIST_LOCK(lockp) do {\
struct rwlock **_lockp = (lockp);   \
@@ -277,9 +292,6 @@ page_to_pmdp(vm_page_t m)
}   \
 } while (0)
 
-#defineVM_PAGE_TO_PV_LIST_LOCK(m)  \
-   PHYS_TO_PV_LIST_LOCK(VM_PAGE_TO_PHYS(m))
-
 /*
  * The presence of this flag indicates that the mapping is writeable.
  * If the ATTR_S1_AP_RO bit is also set, then the mapping is clean, otherwise
@@ -338,10 +350,6 @@ struct pv_chunks_list {
 
 struct pv_chunks_list __exclusive_cache_line pv_chunks[PMAP_MEMDOM];
 
-__exclusive_cache_line static struct pmap_large_md_page pv_dummy_large;
-#define pv_dummy pv_dummy_large.pv_page
-__read_mostly static struct pmap_large_md_page *pv_table;
-
 vm_paddr_t dmap_phys_base; /* The start of the dmap region */
 vm_paddr_t dmap_phys_max;  /* The limit of the dmap region */
 vm_offset_t dmap_max_addr; /* The virtual address limit of the dmap */
@@ -3427,7 +3435,7 @@ pmap_remove_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t 
sva,
if (old_l2 & ATTR_SW_MANAGED) {
m = PHYS_TO_VM_PAGE(PTE_TO_PHYS(old_l2));
pvh = page_to_pvh(m);
-   CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, PTE_TO_PHYS(old_l2));
+   CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m);
pmap_pvh_free(pvh, pmap, sva);
for (mt = m; mt < &m[L2_SIZE / PAGE_SIZE]; mt++) {
if (pmap_pte_dirty(pmap, old_l2))
@@ -3533,7 +3541,7 @@ pmap_remove_l3

git: 8d7ee2047c5e - main - pmap: don't recompute mpte during promotion

2022-09-10 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=8d7ee2047c5e8b4db51c682aee4161ebfd1238e5

commit 8d7ee2047c5e8b4db51c682aee4161ebfd1238e5
Author: Alan Cox 
AuthorDate: 2022-09-09 23:34:58 +
Commit: Alan Cox 
CommitDate: 2022-09-11 06:19:22 +

pmap: don't recompute mpte during promotion

When attempting to promote 4KB user-space mappings to a 2MB user-space
mapping, the address of the struct vm_page representing the page table
page that contains the 4KB mappings is already known to the caller.
Pass that address to the promotion function rather than making the
promotion function recompute it, which on arm64 entails iteration over
the vm_phys_segs array by PHYS_TO_VM_PAGE().  And, while I'm here,
eliminate unnecessary arithmetic from the calculation of the first PTE's
address on arm64.

MFC after:  1 week
---
 sys/amd64/amd64/pmap.c | 12 ++--
 sys/arm64/arm64/pmap.c | 14 ++
 2 files changed, 12 insertions(+), 14 deletions(-)

diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index 326103a1affb..e3f281784893 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -1277,7 +1277,7 @@ static vm_page_t pmap_large_map_getptp_unlocked(void);
 static vm_paddr_t pmap_large_map_kextract(vm_offset_t va);
 #if VM_NRESERVLEVEL > 0
 static void pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va,
-struct rwlock **lockp);
+vm_page_t mpte, struct rwlock **lockp);
 #endif
 static boolean_t pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t 
sva,
 vm_prot_t prot);
@@ -6737,13 +6737,12 @@ pmap_pde_ept_executable(pmap_t pmap, pd_entry_t pde)
  * identical characteristics. 
  */
 static void
-pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va,
+pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, vm_page_t mpte,
 struct rwlock **lockp)
 {
pd_entry_t newpde;
pt_entry_t *firstpte, oldpte, pa, *pte;
pt_entry_t PG_G, PG_A, PG_M, PG_RW, PG_V, PG_PKU_MASK;
-   vm_page_t mpte;
int PG_PTE_CACHE;
 
PG_A = pmap_accessed_bit(pmap);
@@ -6823,7 +6822,8 @@ setpte:
 * mapping the superpage is demoted by pmap_demote_pde() or
 * destroyed by pmap_remove_pde(). 
 */
-   mpte = PHYS_TO_VM_PAGE(*pde & PG_FRAME);
+   if (mpte == NULL)
+   mpte = PHYS_TO_VM_PAGE(*pde & PG_FRAME);
KASSERT(mpte >= vm_page_array &&
mpte < &vm_page_array[vm_page_array_size],
("pmap_promote_pde: page table page is out of range"));
@@ -7237,7 +7237,7 @@ unchanged:
pmap_ps_enabled(pmap) &&
(m->flags & PG_FICTITIOUS) == 0 &&
vm_reserv_level_iffullpop(m) == 0)
-   pmap_promote_pde(pmap, pde, va, &lock);
+   pmap_promote_pde(pmap, pde, va, mpte, &lock);
 #endif
 
rv = KERN_SUCCESS;
@@ -10183,7 +10183,7 @@ pmap_emulate_accessed_dirty(pmap_t pmap, vm_offset_t 
va, int ftype)
pmap_ps_enabled(pmap) &&
(m->flags & PG_FICTITIOUS) == 0 &&
vm_reserv_level_iffullpop(m) == 0) {
-   pmap_promote_pde(pmap, pde, va, &lock);
+   pmap_promote_pde(pmap, pde, va, mpte, &lock);
 #ifdef INVARIANTS
atomic_add_long(&ad_emulation_superpage_promotions, 1);
 #endif
diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index deea00bc5d13..c86e9f562729 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -3787,18 +3787,15 @@ pmap_pv_promote_l2(pmap_t pmap, vm_offset_t va, 
vm_paddr_t pa,
  * identical characteristics.
  */
 static void
-pmap_promote_l2(pmap_t pmap, pd_entry_t *l2, vm_offset_t va,
+pmap_promote_l2(pmap_t pmap, pd_entry_t *l2, vm_offset_t va, vm_page_t mpte,
 struct rwlock **lockp)
 {
pt_entry_t *firstl3, *l3, newl2, oldl3, pa;
-   vm_page_t mpte;
-   vm_offset_t sva;
 
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
PMAP_ASSERT_STAGE1(pmap);
 
-   sva = va & ~L2_OFFSET;
-   firstl3 = pmap_l2_to_l3(l2, sva);
+   firstl3 = (pt_entry_t *)PHYS_TO_DMAP(pmap_load(l2) & ~ATTR_MASK);
newl2 = pmap_load(firstl3);
 
if (((newl2 & (~ATTR_MASK | ATTR_AF)) & L2_OFFSET) != ATTR_AF ||
@@ -3851,7 +3848,8 @@ setl3:
 * mapping the superpage is demoted by pmap_demote_l2() or
 * destroyed by pmap_remove_l3().
 */
-   mpte = PHYS_TO_VM_PAGE(pmap_load(l2) & ~ATTR_MASK);
+   if (mpte == NULL)
+   mpte = PHYS_TO_VM_PAGE(pmap_load(l2) & ~ATTR_MASK);
KASSERT(mpte >= vm_page_array &&
mpte < &vm_page_array[vm_page_array_size],
("pmap_promote_l2: page table page is out of range"));
@@ -3871,7 +3869,7 @@ setl3:
newl2 &am

git: 1d5ebad06c20 - main - pmap: optimize MADV_WILLNEED on existing superpages

2022-09-30 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=1d5ebad06c20b1aed3b0c323c4675678afec5e55

commit 1d5ebad06c20b1aed3b0c323c4675678afec5e55
Author: Alan Cox 
AuthorDate: 2022-09-30 06:54:02 +
Commit: Alan Cox 
CommitDate: 2022-09-30 17:14:05 +

pmap: optimize MADV_WILLNEED on existing superpages

Specifically, avoid pointless calls to pmap_enter_quick_locked() when
madvise(MADV_WILLNEED) is applied to an existing superpage mapping.

Reported by:mhorne
Reviewed by:kib, markj
MFC after:  1 week
Differential Revision:  https://reviews.freebsd.org/D36801
---
 sys/amd64/amd64/pmap.c | 64 +++---
 sys/arm64/arm64/pmap.c | 59 +++---
 2 files changed, 75 insertions(+), 48 deletions(-)

diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index f4df664f0cca..b9b031d55d7d 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -1258,7 +1258,7 @@ static boolean_t pmap_demote_pde_locked(pmap_t pmap, 
pd_entry_t *pde,
 vm_offset_t va, struct rwlock **lockp);
 static boolean_t pmap_demote_pdpe(pmap_t pmap, pdp_entry_t *pdpe,
 vm_offset_t va);
-static boolpmap_enter_2mpage(pmap_t pmap, vm_offset_t va, vm_page_t m,
+static int pmap_enter_2mpage(pmap_t pmap, vm_offset_t va, vm_page_t m,
vm_prot_t prot, struct rwlock **lockp);
 static int pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t newpde,
u_int flags, vm_page_t m, struct rwlock **lockp);
@@ -7271,13 +7271,12 @@ out:
 }
 
 /*
- * Tries to create a read- and/or execute-only 2MB page mapping.  Returns true
- * if successful.  Returns false if (1) a page table page cannot be allocated
- * without sleeping, (2) a mapping already exists at the specified virtual
- * address, or (3) a PV entry cannot be allocated without reclaiming another
- * PV entry.
+ * Tries to create a read- and/or execute-only 2MB page mapping.  Returns
+ * KERN_SUCCESS if the mapping was created.  Otherwise, returns an error
+ * value.  See pmap_enter_pde() for the possible error values when "no sleep",
+ * "no replace", and "no reclaim" are specified.
  */
-static bool
+static int
 pmap_enter_2mpage(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
 struct rwlock **lockp)
 {
@@ -7295,8 +7294,7 @@ pmap_enter_2mpage(pmap_t pmap, vm_offset_t va, vm_page_t 
m, vm_prot_t prot,
if (va < VM_MAXUSER_ADDRESS)
newpde |= PG_U;
return (pmap_enter_pde(pmap, va, newpde, PMAP_ENTER_NOSLEEP |
-   PMAP_ENTER_NOREPLACE | PMAP_ENTER_NORECLAIM, NULL, lockp) ==
-   KERN_SUCCESS);
+   PMAP_ENTER_NOREPLACE | PMAP_ENTER_NORECLAIM, NULL, lockp));
 }
 
 /*
@@ -7319,12 +7317,19 @@ pmap_every_pte_zero(vm_paddr_t pa)
 
 /*
  * Tries to create the specified 2MB page mapping.  Returns KERN_SUCCESS if
- * the mapping was created, and either KERN_FAILURE or KERN_RESOURCE_SHORTAGE
- * otherwise.  Returns KERN_FAILURE if PMAP_ENTER_NOREPLACE was specified and
- * a mapping already exists at the specified virtual address.  Returns
- * KERN_RESOURCE_SHORTAGE if PMAP_ENTER_NOSLEEP was specified and a page table
- * page allocation failed.  Returns KERN_RESOURCE_SHORTAGE if
- * PMAP_ENTER_NORECLAIM was specified and a PV entry allocation failed.
+ * the mapping was created, and one of KERN_FAILURE, KERN_NO_SPACE,
+ * KERN_PROTECTION_FAILURE, or KERN_RESOURCE_FAILURE otherwise.  Returns
+ * KERN_FAILURE if either (1) PMAP_ENTER_NOREPLACE was specified and a 4KB
+ * page mapping already exists within the 2MB virtual address range starting
+ * at the specified virtual address or (2) the requested 2MB page mapping is
+ * not supported due to hardware errata.  Returns KERN_NO_SPACE if
+ * PMAP_ENTER_NOREPLACE was specified and a 2MB page mapping already exists at
+ * the specified virtual address.  Returns KERN_PROTECTION_FAILURE if the PKRU
+ * settings are not the same across the 2MB virtual address range starting at
+ * the specified virtual address.  Returns KERN_RESOURCE_SHORTAGE if either
+ * (1) PMAP_ENTER_NOSLEEP was specified and a page table page allocation
+ * failed or (2) PMAP_ENTER_NORECLAIM was specified and a PV entry allocation
+ * failed.
  *
  * The parameter "m" is only used when creating a managed, writeable mapping.
  */
@@ -7380,14 +7385,23 @@ pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t 
newpde, u_int flags,
if ((oldpde & PG_V) != 0) {
KASSERT(pdpg == NULL || pdpg->ref_count > 1,
("pmap_enter_pde: pdpg's reference count is too low"));
-   if ((flags & PMAP_ENTER_NOREPLACE) != 0 && (va <
-   VM_MAXUSER_ADDRESS || (oldpde & PG_PS) != 0 ||
-   !pmap_every_pte_zero(oldpde & PG_FRAME))) 

git: f0878da03b37 - main - pmap: standardize promotion conditions between amd64 and arm64

2022-12-12 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=f0878da03b374e3fa3578b363f02bfd50ac0e5bd

commit f0878da03b374e3fa3578b363f02bfd50ac0e5bd
Author: Alan Cox 
AuthorDate: 2022-10-08 07:20:25 +
Commit: Alan Cox 
CommitDate: 2022-12-12 17:32:50 +

pmap: standardize promotion conditions between amd64 and arm64

On amd64, don't abort promotion due to a missing accessed bit in a
mapping before possibly write protecting that mapping.  Previously,
in some cases, we might not repromote after madvise(MADV_FREE) because
there was no write fault to trigger the repromotion.  Conversely, on
arm64, don't pointlessly, yet harmlessly, write protect physical pages
that aren't part of the physical superpage.

Don't count aborted promotions due to explicit promotion prohibition
(arm64) or hardware errata (amd64) as ordinary promotion failures.

Reviewed by:kib, markj
MFC after:  2 weeks
Differential Revision:  https://reviews.freebsd.org/D36916
---
 sys/amd64/amd64/pmap.c | 37 ++---
 sys/arm64/arm64/pmap.c | 50 --
 2 files changed, 74 insertions(+), 13 deletions(-)

diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index eb8980ae4fed..a44993efb409 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -6771,19 +6771,36 @@ pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, 
vm_offset_t va, vm_page_t mpte,
 
/*
 * Examine the first PTE in the specified PTP.  Abort if this PTE is
-* either invalid, unused, or does not map the first 4KB physical page
-* within a 2MB page. 
+* ineligible for promotion due to hardware errata, invalid, or does
+* not map the first 4KB physical page within a 2MB page.
 */
firstpte = (pt_entry_t *)PHYS_TO_DMAP(*pde & PG_FRAME);
newpde = *firstpte;
-   if ((newpde & ((PG_FRAME & PDRMASK) | PG_A | PG_V)) != (PG_A | PG_V) ||
-   !pmap_allow_2m_x_page(pmap, pmap_pde_ept_executable(pmap,
-   newpde))) {
+   if (!pmap_allow_2m_x_page(pmap, pmap_pde_ept_executable(pmap, newpde)))
+   return;
+   if ((newpde & ((PG_FRAME & PDRMASK) | PG_V)) != PG_V) {
counter_u64_add(pmap_pde_p_failures, 1);
CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#lx"
" in pmap %p", va, pmap);
return;
}
+
+   /*
+* Both here and in the below "for" loop, to allow for repromotion
+* after MADV_FREE, conditionally write protect a clean PTE before
+* possibly aborting the promotion due to other PTE attributes.  Why?
+* Suppose that MADV_FREE is applied to a part of a superpage, the
+* address range [S, E).  pmap_advise() will demote the superpage
+* mapping, destroy the 4KB page mapping at the end of [S, E), and
+* clear PG_M and PG_A in the PTEs for the rest of [S, E).  Later,
+* imagine that the memory in [S, E) is recycled, but the last 4KB
+* page in [S, E) is not the last to be rewritten, or simply accessed.
+* In other words, there is still a 4KB page in [S, E), call it P,
+* that is writeable but PG_M and PG_A are clear in P's PTE.  Unless
+* we write protect P before aborting the promotion, if and when P is
+* finally rewritten, there won't be a page fault to trigger
+* repromotion.
+*/
 setpde:
if ((newpde & (PG_M | PG_RW)) == PG_RW) {
/*
@@ -6794,16 +6811,22 @@ setpde:
goto setpde;
newpde &= ~PG_RW;
}
+   if ((newpde & PG_A) == 0) {
+   counter_u64_add(pmap_pde_p_failures, 1);
+   CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#lx"
+   " in pmap %p", va, pmap);
+   return;
+   }
 
/*
 * Examine each of the other PTEs in the specified PTP.  Abort if this
 * PTE maps an unexpected 4KB physical page or does not have identical
 * characteristics to the first PTE.
 */
-   pa = (newpde & (PG_PS_FRAME | PG_A | PG_V)) + NBPDR - PAGE_SIZE;
+   pa = (newpde & (PG_PS_FRAME | PG_V)) + NBPDR - PAGE_SIZE;
for (pte = firstpte + NPTEPG - 1; pte > firstpte; pte--) {
oldpte = *pte;
-   if ((oldpte & (PG_FRAME | PG_A | PG_V)) != pa) {
+   if ((oldpte & (PG_FRAME | PG_V)) != pa) {
counter_u64_add(pmap_pde_p_failures, 1);
CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#lx"
" in pmap %p", va, pmap);
diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index 3f46

git: 4ccd6c137f5b - main - arm64: Implement final level only TLB invalidations

2022-01-03 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=4ccd6c137f5b53361efe54b78b815c7902258572

commit 4ccd6c137f5b53361efe54b78b815c7902258572
Author: Alan Cox 
AuthorDate: 2021-12-29 07:50:05 +
Commit: Alan Cox 
CommitDate: 2022-01-03 19:14:18 +

arm64: Implement final level only TLB invalidations

A feature of arm64's instruction for TLB invalidation is the ability
to determine whether cached intermediate entries, i.e., L{0,1,2}_TABLE
entries, are invalidated in addition to the final entry, e.g., an
L3_PAGE entry.

Update pmap_invalidate_{page,range}() to support both types of
invalidation, allowing the caller to determine which type of
invalidation is performed.

Update the callers to request the appropriate type of invalidation.

Eliminate redundant TLB invalidations in pmap_abort_ptp() and
pmap_remove_l3_range().

Add a comment to pmap_invalidate_all() making clear that it always
invalidates entries at all levels.

As expected, these changes result in a tiny yet measurable
performance improvement.

Reviewed by:kib, markj
MFC after:  3 weeks
Differential Revision:  https://reviews.freebsd.org/D33705
---
 sys/arm64/arm64/pmap.c | 144 +++--
 1 file changed, 92 insertions(+), 52 deletions(-)

diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index 68164708dce9..130d4a255286 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -1223,10 +1223,35 @@ SYSCTL_ULONG(_vm_pmap_l2, OID_AUTO, promotions, 
CTLFLAG_RD,
 &pmap_l2_promotions, 0, "2MB page promotions");
 
 /*
- * Invalidate a single TLB entry.
+ * If the given value for "final_only" is false, then any cached intermediate-
+ * level entries, i.e., L{0,1,2}_TABLE entries, are invalidated in addition to
+ * any cached final-level entry, i.e., either an L{1,2}_BLOCK or L3_PAGE entry.
+ * Otherwise, just the cached final-level entry is invalidated.
  */
 static __inline void
-pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
+pmap_invalidate_kernel(uint64_t r, bool final_only)
+{
+   if (final_only)
+   __asm __volatile("tlbi vaale1is, %0" : : "r" (r));
+   else
+   __asm __volatile("tlbi vaae1is, %0" : : "r" (r));
+}
+
+static __inline void
+pmap_invalidate_user(uint64_t r, bool final_only)
+{
+   if (final_only)
+   __asm __volatile("tlbi vale1is, %0" : : "r" (r));
+   else
+   __asm __volatile("tlbi vae1is, %0" : : "r" (r));
+}
+
+/*
+ * Invalidates any cached final- and optionally intermediate-level TLB entries
+ * for the specified virtual address in the given virtual address space.
+ */
+static __inline void
+pmap_invalidate_page(pmap_t pmap, vm_offset_t va, bool final_only)
 {
uint64_t r;
 
@@ -1235,17 +1260,22 @@ pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
dsb(ishst);
if (pmap == kernel_pmap) {
r = atop(va);
-   __asm __volatile("tlbi vaae1is, %0" : : "r" (r));
+   pmap_invalidate_kernel(r, final_only);
} else {
r = ASID_TO_OPERAND(COOKIE_TO_ASID(pmap->pm_cookie)) | atop(va);
-   __asm __volatile("tlbi vae1is, %0" : : "r" (r));
+   pmap_invalidate_user(r, final_only);
}
dsb(ish);
isb();
 }
 
+/*
+ * Invalidates any cached final- and optionally intermediate-level TLB entries
+ * for the specified virtual address range in the given virtual address space.
+ */
 static __inline void
-pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
+pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva,
+bool final_only)
 {
uint64_t end, r, start;
 
@@ -1256,18 +1286,22 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, 
vm_offset_t eva)
start = atop(sva);
end = atop(eva);
for (r = start; r < end; r++)
-   __asm __volatile("tlbi vaae1is, %0" : : "r" (r));
+   pmap_invalidate_kernel(r, final_only);
} else {
start = end = ASID_TO_OPERAND(COOKIE_TO_ASID(pmap->pm_cookie));
start |= atop(sva);
end |= atop(eva);
for (r = start; r < end; r++)
-   __asm __volatile("tlbi vae1is, %0" : : "r" (r));
+   pmap_invalidate_user(r, final_only);
}
dsb(ish);
isb();
 }
 
+/*
+ * Invalidates all cached intermediate- and final-level TLB entries for the
+ * given virtual address space.
+ */
 static __inline void
 pmap_invalidate_all(pmap_t pmap)
 {
@@ -1513,7 +1547,7 @

git: 0c188c06c627 - main - arm64: replace pa_to_pvh() with page_to_pvh() in pmap_remove_l2()

2021-06-23 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=0c188c06c627b5de30b7cde00d071a80ecfa

commit 0c188c06c627b5de30b7cde00d071a80ecfa
Author: Alan Cox 
AuthorDate: 2021-06-23 19:14:31 +
Commit: Alan Cox 
CommitDate: 2021-06-24 03:35:46 +

arm64: replace pa_to_pvh() with page_to_pvh() in pmap_remove_l2()

Revise pmap_remove_l2() to use the constant-time function page_to_pvh()
instead of the linear-time function pa_to_pvh().

Reviewed by:kib, markj
MFC after:  1 week
Differential Revision:  https://reviews.freebsd.org/D30876
---
 sys/arm64/arm64/pmap.c | 18 --
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index 7def96bca70b..bc3d4fd6446b 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -2834,8 +2834,7 @@ pmap_remove_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t 
sva,
 {
struct md_page *pvh;
pt_entry_t old_l2;
-   vm_offset_t eva, va;
-   vm_page_t m, ml3;
+   vm_page_t m, ml3, mt;
 
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
KASSERT((sva & L2_OFFSET) == 0, ("pmap_remove_l2: sva is not aligned"));
@@ -2853,19 +2852,18 @@ pmap_remove_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t 
sva,
pmap->pm_stats.wired_count -= L2_SIZE / PAGE_SIZE;
pmap_resident_count_dec(pmap, L2_SIZE / PAGE_SIZE);
if (old_l2 & ATTR_SW_MANAGED) {
+   m = PHYS_TO_VM_PAGE(old_l2 & ~ATTR_MASK);
+   pvh = page_to_pvh(m);
CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, old_l2 & ~ATTR_MASK);
-   pvh = pa_to_pvh(old_l2 & ~ATTR_MASK);
pmap_pvh_free(pvh, pmap, sva);
-   eva = sva + L2_SIZE;
-   for (va = sva, m = PHYS_TO_VM_PAGE(old_l2 & ~ATTR_MASK);
-   va < eva; va += PAGE_SIZE, m++) {
+   for (mt = m; mt < &m[L2_SIZE / PAGE_SIZE]; mt++) {
if (pmap_pte_dirty(pmap, old_l2))
-   vm_page_dirty(m);
+   vm_page_dirty(mt);
if (old_l2 & ATTR_AF)
-   vm_page_aflag_set(m, PGA_REFERENCED);
-   if (TAILQ_EMPTY(&m->md.pv_list) &&
+   vm_page_aflag_set(mt, PGA_REFERENCED);
+   if (TAILQ_EMPTY(&mt->md.pv_list) &&
TAILQ_EMPTY(&pvh->pv_list))
-   vm_page_aflag_clear(m, PGA_WRITEABLE);
+   vm_page_aflag_clear(mt, PGA_WRITEABLE);
}
}
if (pmap == kernel_pmap) {
___
dev-commits-src-main@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/dev-commits-src-main
To unsubscribe, send any mail to "dev-commits-src-main-unsubscr...@freebsd.org"


git: c94249decd16 - main - arm64: make it possible to define PV_STATS

2021-06-24 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=c94249decd16de71a00d837ee132954d9f259e49

commit c94249decd16de71a00d837ee132954d9f259e49
Author: Alan Cox 
AuthorDate: 2021-06-24 23:09:23 +
Commit: Alan Cox 
CommitDate: 2021-06-24 23:32:56 +

arm64: make it possible to define PV_STATS

Remove an #if 0 that results in a compilation error if PV_STATS is
defined.  Aside from this #if 0, there is nothing wrong with the
PV_STATS code.

MFC after:  1 week
---
 sys/arm64/arm64/pmap.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index bc3d4fd6446b..a6f716370810 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -2226,7 +2226,6 @@ pv_to_chunk(pv_entry_t pv)
 
 static const uint64_t pc_freemask[_NPCM] = { PC_FREE0, PC_FREE1, PC_FREE2 };
 
-#if 0
 #ifdef PV_STATS
 static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail;
 
@@ -2251,7 +2250,6 @@ SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_count, 
CTLFLAG_RD, &pv_entry_count, 0,
 SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0,
"Current number of spare pv entries");
 #endif
-#endif /* 0 */
 
 /*
  * We are in a serious low memory condition.  Resort to
___
dev-commits-src-main@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/dev-commits-src-main
To unsubscribe, send any mail to "dev-commits-src-main-unsubscr...@freebsd.org"


git: 5dd84e315a9f - main - arm64: fix a potential KVA leak in pmap_demote_l1()

2021-06-25 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=5dd84e315a9f72017f9f628aa67f08a6493a

commit 5dd84e315a9f72017f9f628aa67f08a6493a
Author: Alan Cox 
AuthorDate: 2021-06-26 03:29:38 +
Commit: Alan Cox 
CommitDate: 2021-06-26 04:01:32 +

arm64: fix a potential KVA leak in pmap_demote_l1()

In the unlikely event that the 1 GB page mapping being demoted is used
to access the L1 page table page containing the 1 GB page mapping and
the vm_page_alloc() to allocate a new L2 page table page fails, we
would leak a page of kernel virtual address space.  Fix this leak.

MFC after:  1 week
---
 sys/arm64/arm64/pmap.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index a6f716370810..76ca8eab70ff 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -6010,7 +6010,8 @@ pmap_demote_l1(pmap_t pmap, pt_entry_t *l1, vm_offset_t 
va)
VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) {
CTR2(KTR_PMAP, "pmap_demote_l1: failure for va %#lx"
" in pmap %p", va, pmap);
-   return (NULL);
+   l2 = NULL;
+   goto fail;
}
 
l2phys = VM_PAGE_TO_PHYS(ml2);
@@ -6039,6 +6040,7 @@ pmap_demote_l1(pmap_t pmap, pt_entry_t *l1, vm_offset_t 
va)
 
pmap_update_entry(pmap, l1, l2phys | L1_TABLE, va, PAGE_SIZE);
 
+fail:
if (tmpl1 != 0) {
pmap_kremove(tmpl1);
kva_free(tmpl1, PAGE_SIZE);
___
dev-commits-src-main@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/dev-commits-src-main
To unsubscribe, send any mail to "dev-commits-src-main-unsubscr...@freebsd.org"


git: 19c288b3a664 - main - arm64: eliminate a duplicated #define

2021-06-26 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=19c288b3a6640742ab45200031661fe5be710d7f

commit 19c288b3a6640742ab45200031661fe5be710d7f
Author: Alan Cox 
AuthorDate: 2021-06-27 06:40:23 +
Commit: Alan Cox 
CommitDate: 2021-06-27 06:44:58 +

arm64: eliminate a duplicated #define
---
 sys/arm64/include/pte.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sys/arm64/include/pte.h b/sys/arm64/include/pte.h
index 16a72be65fd0..5d4412d2c141 100644
--- a/sys/arm64/include/pte.h
+++ b/sys/arm64/include/pte.h
@@ -131,7 +131,6 @@ typedef uint64_tpt_entry_t; /* page 
table entry */
 #defineL3_SHIFT12
 #defineL3_SIZE (1 << L3_SHIFT)
 #defineL3_OFFSET   (L3_SIZE - 1)
-#defineL3_SHIFT12
 #defineL3_INVAL0x0
/* 0x1 is reserved */
/* 0x2 also marks an invalid address */
___
dev-commits-src-main@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/dev-commits-src-main
To unsubscribe, send any mail to "dev-commits-src-main-unsubscr...@freebsd.org"


git: 26a357245f21 - main - arm64: a few simplications to pmap_remove_{all, write}

2021-06-28 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=26a357245f2197eea4dbbae0956d5c71ef8ba4f1

commit 26a357245f2197eea4dbbae0956d5c71ef8ba4f1
Author: Alan Cox 
AuthorDate: 2021-06-29 02:57:04 +
Commit: Alan Cox 
CommitDate: 2021-06-29 03:21:24 +

arm64: a few simplications to pmap_remove_{all,write}

Eliminate some unnecessary unlocking and relocking when we have to retry
the operation to avoid deadlock.  (All of the other pmap functions that
iterate over a PV list already implemented retries without these same
unlocking and relocking operations.)

Avoid a pointer dereference by using an existing local variable that
already holds the desired value.

Eliminate some unnecessary repetition of code on a failed fcmpset.
Specifically, there is no point in retesting the DBM bit because it
cannot change state while the pmap lock is held.

Reviewed by:kib, markj
MFC after:  1 week
Differential Revision:  https://reviews.freebsd.org/D30931
---
 sys/arm64/arm64/pmap.c | 20 +++-
 1 file changed, 7 insertions(+), 13 deletions(-)

diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index 76ca8eab70ff..79b9d20231aa 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -3130,8 +3130,8 @@ pmap_remove_all(vm_page_t m)
SLIST_INIT(&free);
lock = VM_PAGE_TO_PV_LIST_LOCK(m);
pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : page_to_pvh(m);
-retry:
rw_wlock(lock);
+retry:
while ((pv = TAILQ_FIRST(&pvh->pv_list)) != NULL) {
pmap = PV_PMAP(pv);
if (!PMAP_TRYLOCK(pmap)) {
@@ -3140,7 +3140,6 @@ retry:
PMAP_LOCK(pmap);
rw_wlock(lock);
if (pvh_gen != pvh->pv_gen) {
-   rw_wunlock(lock);
PMAP_UNLOCK(pmap);
goto retry;
}
@@ -3151,7 +3150,6 @@ retry:
("pmap_remove_all: no page table entry found"));
KASSERT(lvl == 2,
("pmap_remove_all: invalid pte level %d", lvl));
-
pmap_demote_l2_locked(pmap, pte, va, &lock);
PMAP_UNLOCK(pmap);
}
@@ -3165,7 +3163,6 @@ retry:
PMAP_LOCK(pmap);
rw_wlock(lock);
if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) {
-   rw_wunlock(lock);
PMAP_UNLOCK(pmap);
goto retry;
}
@@ -5224,8 +5221,8 @@ pmap_remove_write(vm_page_t m)
return;
lock = VM_PAGE_TO_PV_LIST_LOCK(m);
pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : page_to_pvh(m);
-retry_pv_loop:
rw_wlock(lock);
+retry:
TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) {
pmap = PV_PMAP(pv);
PMAP_ASSERT_STAGE1(pmap);
@@ -5236,12 +5233,11 @@ retry_pv_loop:
rw_wlock(lock);
if (pvh_gen != pvh->pv_gen) {
PMAP_UNLOCK(pmap);
-   rw_wunlock(lock);
-   goto retry_pv_loop;
+   goto retry;
}
}
va = pv->pv_va;
-   pte = pmap_pte(pmap, pv->pv_va, &lvl);
+   pte = pmap_pte(pmap, va, &lvl);
if ((pmap_load(pte) & ATTR_SW_DBM) != 0)
(void)pmap_demote_l2_locked(pmap, pte, va, &lock);
KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m),
@@ -5261,17 +5257,15 @@ retry_pv_loop:
if (pvh_gen != pvh->pv_gen ||
md_gen != m->md.pv_gen) {
PMAP_UNLOCK(pmap);
-   rw_wunlock(lock);
-   goto retry_pv_loop;
+   goto retry;
}
}
pte = pmap_pte(pmap, pv->pv_va, &lvl);
oldpte = pmap_load(pte);
-retry:
if ((oldpte & ATTR_SW_DBM) != 0) {
-   if (!atomic_fcmpset_long(pte, &oldpte,
+   while (!atomic_fcmpset_64(pte, &oldpte,
(oldpte | ATTR_S1_AP_RW_BIT) & ~ATTR_SW_DBM))
-   goto retry;
+   cpu_spinwait();
if ((oldpte & ATTR_S1_AP_RW_BIT) ==
ATTR_S1_AP(ATTR_S1_AP_RW))
vm_page_dirty(m);
___
dev-commits

git: 1a8bcf30f97e - main - amd64: a simplication to pmap_remove_{all, write}

2021-06-30 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=1a8bcf30f97e6153def2af781db2fe54f5c0d106

commit 1a8bcf30f97e6153def2af781db2fe54f5c0d106
Author: Alan Cox 
AuthorDate: 2021-06-30 05:59:21 +
Commit: Alan Cox 
CommitDate: 2021-06-30 18:12:25 +

amd64: a simplication to pmap_remove_{all,write}

Eliminate some unnecessary unlocking and relocking when we have to retry
the operation to avoid deadlock.  (All of the other pmap functions that
iterate over a PV list already implemented retries without these same
unlocking and relocking operations.)

Reviewed by:kib, markj
MFC after:  1 week
Differential Revision:  https://reviews.freebsd.org/D30951
---
 sys/amd64/amd64/pmap.c | 8 ++--
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index a4046cc1f687..ea017b8a61a8 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -6307,8 +6307,8 @@ pmap_remove_all(vm_page_t m)
lock = VM_PAGE_TO_PV_LIST_LOCK(m);
pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy :
pa_to_pvh(VM_PAGE_TO_PHYS(m));
-retry:
rw_wlock(lock);
+retry:
while ((pv = TAILQ_FIRST(&pvh->pv_list)) != NULL) {
pmap = PV_PMAP(pv);
if (!PMAP_TRYLOCK(pmap)) {
@@ -6317,7 +6317,6 @@ retry:
PMAP_LOCK(pmap);
rw_wlock(lock);
if (pvh_gen != pvh->pv_gen) {
-   rw_wunlock(lock);
PMAP_UNLOCK(pmap);
goto retry;
}
@@ -6336,7 +6335,6 @@ retry:
PMAP_LOCK(pmap);
rw_wlock(lock);
if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) {
-   rw_wunlock(lock);
PMAP_UNLOCK(pmap);
goto retry;
}
@@ -8460,8 +8458,8 @@ pmap_remove_write(vm_page_t m)
lock = VM_PAGE_TO_PV_LIST_LOCK(m);
pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy :
pa_to_pvh(VM_PAGE_TO_PHYS(m));
-retry_pv_loop:
rw_wlock(lock);
+retry_pv_loop:
TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) {
pmap = PV_PMAP(pv);
if (!PMAP_TRYLOCK(pmap)) {
@@ -8471,7 +8469,6 @@ retry_pv_loop:
rw_wlock(lock);
if (pvh_gen != pvh->pv_gen) {
PMAP_UNLOCK(pmap);
-   rw_wunlock(lock);
goto retry_pv_loop;
}
}
@@ -8496,7 +8493,6 @@ retry_pv_loop:
if (pvh_gen != pvh->pv_gen ||
md_gen != m->md.pv_gen) {
PMAP_UNLOCK(pmap);
-   rw_wunlock(lock);
goto retry_pv_loop;
}
}
___
dev-commits-src-main@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/dev-commits-src-main
To unsubscribe, send any mail to "dev-commits-src-main-unsubscr...@freebsd.org"


git: e41fde3ed71c - main - On a failed fcmpset don't pointlessly repeat tests

2021-07-05 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=e41fde3ed71c1e4fce81eac002c9f5b0926e6c49

commit e41fde3ed71c1e4fce81eac002c9f5b0926e6c49
Author: Alan Cox 
AuthorDate: 2021-07-04 05:20:42 +
Commit: Alan Cox 
CommitDate: 2021-07-06 02:07:40 +

On a failed fcmpset don't pointlessly repeat tests

In a few places, on a failed compare-and-set, both the amd64 pmap and
the arm64 pmap repeat tests on bits that won't change state while the
pmap is locked.  Eliminate some of these unnecessary tests.

Reviewed by:andrew, kib, markj
MFC after:  1 week
Differential Revision:  https://reviews.freebsd.org/D31014
---
 sys/amd64/amd64/pmap.c | 11 +--
 sys/arm64/arm64/pmap.c | 15 ---
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index ea017b8a61a8..5e0b6d76ae0a 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -8459,7 +8459,7 @@ pmap_remove_write(vm_page_t m)
pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy :
pa_to_pvh(VM_PAGE_TO_PHYS(m));
rw_wlock(lock);
-retry_pv_loop:
+retry:
TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) {
pmap = PV_PMAP(pv);
if (!PMAP_TRYLOCK(pmap)) {
@@ -8469,7 +8469,7 @@ retry_pv_loop:
rw_wlock(lock);
if (pvh_gen != pvh->pv_gen) {
PMAP_UNLOCK(pmap);
-   goto retry_pv_loop;
+   goto retry;
}
}
PG_RW = pmap_rw_bit(pmap);
@@ -8493,7 +8493,7 @@ retry_pv_loop:
if (pvh_gen != pvh->pv_gen ||
md_gen != m->md.pv_gen) {
PMAP_UNLOCK(pmap);
-   goto retry_pv_loop;
+   goto retry;
}
}
PG_M = pmap_modified_bit(pmap);
@@ -8503,12 +8503,11 @@ retry_pv_loop:
("pmap_remove_write: found a 2mpage in page %p's pv list",
m));
pte = pmap_pde_to_pte(pde, pv->pv_va);
-retry:
oldpte = *pte;
if (oldpte & PG_RW) {
-   if (!atomic_cmpset_long(pte, oldpte, oldpte &
+   while (!atomic_fcmpset_long(pte, &oldpte, oldpte &
~(PG_RW | PG_M)))
-   goto retry;
+   cpu_spinwait();
if ((oldpte & PG_M) != 0)
vm_page_dirty(m);
pmap_invalidate_page(pmap, pv->pv_va);
diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index 79b9d20231aa..bf476490b6be 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -3223,10 +3223,12 @@ pmap_protect_l2(pmap_t pmap, pt_entry_t *l2, 
vm_offset_t sva, pt_entry_t mask,
 * Return if the L2 entry already has the desired access restrictions
 * in place.
 */
-retry:
if ((old_l2 & mask) == nbits)
return;
 
+   while (!atomic_fcmpset_64(l2, &old_l2, (old_l2 & ~mask) | nbits))
+   cpu_spinwait();
+
/*
 * When a dirty read/write superpage mapping is write protected,
 * update the dirty field of each of the superpage's constituent 4KB
@@ -3240,9 +3242,6 @@ retry:
vm_page_dirty(mt);
}
 
-   if (!atomic_fcmpset_64(l2, &old_l2, (old_l2 & ~mask) | nbits))
-   goto retry;
-
/*
 * Since a promotion must break the 4KB page mappings before making
 * the 2MB page mapping, a pmap_invalidate_page() suffices.
@@ -3334,7 +,7 @@ pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t 
eva, vm_prot_t prot)
for (l3p = pmap_l2_to_l3(l2, sva); sva != va_next; l3p++,
sva += L3_SIZE) {
l3 = pmap_load(l3p);
-retry:
+
/*
 * Go to the next L3 entry if the current one is
 * invalid or already has the desired access
@@ -3351,6 +3350,10 @@ retry:
continue;
}
 
+   while (!atomic_fcmpset_64(l3p, &l3, (l3 & ~mask) |
+   nbits))
+   cpu_spinwait();
+
/*
 * When a dirty read/write mapping is write protected,
 * update the page's dirty field.
@@ -3360,8 +3363,6 @@ retry:
pmap_pte_dirty(pmap, l3))
vm_page_dirt

git: 0add3c9945c8 - main - arm64: Simplify fcmpset failure in pmap_promote_l2()

2021-07-07 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=0add3c9945c85c7f766f9225866e99e2a805819b

commit 0add3c9945c85c7f766f9225866e99e2a805819b
Author: Alan Cox 
AuthorDate: 2021-07-07 18:16:03 +
Commit: Alan Cox 
CommitDate: 2021-07-07 18:34:11 +

arm64: Simplify fcmpset failure in pmap_promote_l2()

When the initial fcmpset in pmap_promote_l2() fails, there is no need
to repeat the check for the physical address being 2MB aligned or for
the accessed bit being set.  While the pmap is locked the hardware can
only transition the accessed bit from 0 to 1, and we have already
determined that it is 1 when the fcmpset fails.

MFC after:  1 week
---
 sys/arm64/arm64/pmap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index bf476490b6be..7758a84d81d5 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -3502,7 +3502,6 @@ pmap_promote_l2(pmap_t pmap, pd_entry_t *l2, vm_offset_t 
va,
firstl3 = pmap_l2_to_l3(l2, sva);
newl2 = pmap_load(firstl3);
 
-setl2:
if (((newl2 & (~ATTR_MASK | ATTR_AF)) & L2_OFFSET) != ATTR_AF) {
atomic_add_long(&pmap_l2_p_failures, 1);
CTR2(KTR_PMAP, "pmap_promote_l2: failure for va %#lx"
@@ -3510,6 +3509,7 @@ setl2:
return;
}
 
+setl2:
if ((newl2 & (ATTR_S1_AP_RW_BIT | ATTR_SW_DBM)) ==
(ATTR_S1_AP(ATTR_S1_AP_RO) | ATTR_SW_DBM)) {
/*
___
dev-commits-src-main@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/dev-commits-src-main
To unsubscribe, send any mail to "dev-commits-src-main-unsubscr...@freebsd.org"


git: d411b285bc29 - main - pmap: Micro-optimize pmap_remove_pages() on amd64 and arm64

2021-07-13 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=d411b285bc293a37e062d8fb15b85212ce16abab

commit d411b285bc293a37e062d8fb15b85212ce16abab
Author: Alan Cox 
AuthorDate: 2021-07-12 23:25:37 +
Commit: Alan Cox 
CommitDate: 2021-07-13 22:33:23 +

pmap: Micro-optimize pmap_remove_pages() on amd64 and arm64

Reduce the live ranges for three variables so that they do not span the
call to PHYS_TO_VM_PAGE().  This enables the compiler to generate
slightly smaller machine code.

Reviewed by:kib, markj
MFC after:  1 week
Differential Revision:  https://reviews.freebsd.org/D31161
---
 sys/amd64/amd64/pmap.c | 14 ++
 sys/arm64/arm64/pmap.c | 19 ++-
 2 files changed, 20 insertions(+), 13 deletions(-)

diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index f8bd17dc6238..31681e255af1 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -8201,6 +8201,16 @@ pmap_remove_pages(pmap_t pmap)
continue;
}
 
+   /* Mark free */
+   pc->pc_map[field] |= bitmask;
+
+   /*
+* Because this pmap is not active on other
+* processors, the dirty bit cannot have
+* changed state since we last loaded pte.
+*/
+   pte_clear(pte);
+
if (superpage)
pa = tpte & PG_PS_FRAME;
else
@@ -8217,8 +8227,6 @@ pmap_remove_pages(pmap_t pmap)
("pmap_remove_pages: bad tpte %#jx",
(uintmax_t)tpte));
 
-   pte_clear(pte);
-
/*
 * Update the vm_page_t clean/reference bits.
 */
@@ -8232,8 +8240,6 @@ pmap_remove_pages(pmap_t pmap)
 
CHANGE_PV_LIST_LOCK_TO_VM_PAGE(&lock, m);
 
-   /* Mark free */
-   pc->pc_map[field] |= bitmask;
if (superpage) {
pmap_resident_count_adj(pmap, -NBPDR / 
PAGE_SIZE);
pvh = pa_to_pvh(tpte & PG_PS_FRAME);
diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index 7758a84d81d5..8ed1b86bd58c 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -4951,6 +4951,16 @@ pmap_remove_pages(pmap_t pmap)
continue;
}
 
+   /* Mark free */
+   pc->pc_map[field] |= bitmask;
+
+   /*
+* Because this pmap is not active on other
+* processors, the dirty bit cannot have
+* changed state since we last loaded pte.
+*/
+   pmap_clear(pte);
+
pa = tpte & ~ATTR_MASK;
 
m = PHYS_TO_VM_PAGE(pa);
@@ -4964,13 +4974,6 @@ pmap_remove_pages(pmap_t pmap)
("pmap_remove_pages: bad pte %#jx",
(uintmax_t)tpte));
 
-   /*
-* Because this pmap is not active on other
-* processors, the dirty bit cannot have
-* changed state since we last loaded pte.
-*/
-   pmap_clear(pte);
-
/*
 * Update the vm_page_t clean/reference bits.
 */
@@ -4988,8 +4991,6 @@ pmap_remove_pages(pmap_t pmap)
 
CHANGE_PV_LIST_LOCK_TO_VM_PAGE(&lock, m);
 
-   /* Mark free */
-   pc->pc_map[field] |= bitmask;
switch (lvl) {
case 1:
pmap_resident_count_dec(pmap,
___
dev-commits-src-main@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/dev-commits-src-main
To unsubscribe, send any mail to "dev-commits-src-main-unsubscr...@freebsd.org"


git: 325ff9327459 - main - Clear the accessed bit when copying a managed superpage mapping

2021-07-14 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=325ff9327459bc7307130675fa19367ff8b02310

commit 325ff9327459bc7307130675fa19367ff8b02310
Author: Alan Cox 
AuthorDate: 2021-07-13 07:30:43 +
Commit: Alan Cox 
CommitDate: 2021-07-14 18:06:10 +

Clear the accessed bit when copying a managed superpage mapping

pmap_copy() is used to speculatively create mappings, so those mappings
should not have their access bit preset.

Reviewed by:kib, markj
MFC after:  1 week
Differential Revision:  https://reviews.freebsd.org/D31162
---
 sys/amd64/amd64/pmap.c | 17 -
 sys/arm64/arm64/pmap.c | 18 --
 2 files changed, 32 insertions(+), 3 deletions(-)

diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index 31681e255af1..427fbdf44830 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -7775,6 +7775,9 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t 
dst_addr, vm_size_t len,
continue;

if (srcptepaddr & PG_PS) {
+   /*
+* We can only virtual copy whole superpages.
+*/
if ((addr & PDRMASK) != 0 || addr + NBPDR > end_addr)
continue;
pde = pmap_alloc_pde(dst_pmap, addr, &dst_pdpg, NULL);
@@ -7783,7 +7786,19 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t 
dst_addr, vm_size_t len,
if (*pde == 0 && ((srcptepaddr & PG_MANAGED) == 0 ||
pmap_pv_insert_pde(dst_pmap, addr, srcptepaddr,
PMAP_ENTER_NORECLAIM, &lock))) {
-   *pde = srcptepaddr & ~PG_W;
+   /*
+* We leave the dirty bit unchanged because
+* managed read/write superpage mappings are
+* required to be dirty.  However, managed
+* superpage mappings are not required to
+* have their accessed bit set, so we clear
+* it because we don't know if this mapping
+* will be used.
+*/
+   srcptepaddr &= ~PG_W;
+   if ((srcptepaddr & PG_MANAGED) != 0)
+   srcptepaddr &= ~PG_A;
+   *pde = srcptepaddr;
pmap_resident_count_adj(dst_pmap, NBPDR /
PAGE_SIZE);
counter_u64_add(pmap_pde_mappings, 1);
diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index 8ed1b86bd58c..678feae55c25 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -4557,6 +4557,9 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t 
dst_addr, vm_size_t len,
if (srcptepaddr == 0)
continue;
if ((srcptepaddr & ATTR_DESCR_MASK) == L2_BLOCK) {
+   /*
+* We can only virtual copy whole superpages.
+*/
if ((addr & L2_OFFSET) != 0 ||
addr + L2_SIZE > end_addr)
continue;
@@ -4567,8 +4570,19 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t 
dst_addr, vm_size_t len,
((srcptepaddr & ATTR_SW_MANAGED) == 0 ||
pmap_pv_insert_l2(dst_pmap, addr, srcptepaddr,
PMAP_ENTER_NORECLAIM, &lock))) {
-   mask = ATTR_SW_WIRED;
-   pmap_store(l2, srcptepaddr & ~mask);
+   /*
+* We leave the dirty bit unchanged because
+* managed read/write superpage mappings are
+* required to be dirty.  However, managed
+* superpage mappings are not required to
+* have their accessed bit set, so we clear
+* it because we don't know if this mapping
+* will be used.
+*/
+   srcptepaddr &= ~ATTR_SW_WIRED;
+   if ((srcptepaddr & ATTR_SW_MANAGED) != 0)
+   srcptepaddr &= ~ATTR_AF;
+   pmap_store(l2, srcptepaddr);
pmap_resident_count_inc(dst_pmap, L2_SIZE /

git: 7fb152d22935 - main - arm64: Sync icache when creating executable superpage mappings

2021-07-15 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=7fb152d22935e014afcad4ddc0b3a7e3c2795762

commit 7fb152d22935e014afcad4ddc0b3a7e3c2795762
Author: Alan Cox 
AuthorDate: 2021-07-14 17:59:49 +
Commit: Alan Cox 
CommitDate: 2021-07-15 22:34:54 +

arm64: Sync icache when creating executable superpage mappings

Reviewed by:andrew, kib, markj
MFC after:  1 week
Differential Revision:  https://reviews.freebsd.org/D31181
---
 sys/arm64/arm64/pmap.c | 14 +++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index 190b56285e76..13941f4f61ea 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -4041,7 +4041,7 @@ pmap_enter_2mpage(pmap_t pmap, vm_offset_t va, vm_page_t 
m, vm_prot_t prot,
if (pmap != kernel_pmap)
new_l2 |= ATTR_S1_nG;
return (pmap_enter_l2(pmap, va, new_l2, PMAP_ENTER_NOSLEEP |
-   PMAP_ENTER_NOREPLACE | PMAP_ENTER_NORECLAIM, NULL, lockp) ==
+   PMAP_ENTER_NOREPLACE | PMAP_ENTER_NORECLAIM, m, lockp) ==
KERN_SUCCESS);
 }
 
@@ -4071,8 +4071,6 @@ pmap_every_pte_zero(vm_paddr_t pa)
  * KERN_RESOURCE_SHORTAGE if PMAP_ENTER_NOSLEEP was specified and a page table
  * page allocation failed.  Returns KERN_RESOURCE_SHORTAGE if
  * PMAP_ENTER_NORECLAIM was specified and a PV entry allocation failed.
- *
- * The parameter "m" is only used when creating a managed, writeable mapping.
  */
 static int
 pmap_enter_l2(pmap_t pmap, vm_offset_t va, pd_entry_t new_l2, u_int flags,
@@ -4159,6 +4157,16 @@ pmap_enter_l2(pmap_t pmap, vm_offset_t va, pd_entry_t 
new_l2, u_int flags,
pmap->pm_stats.wired_count += L2_SIZE / PAGE_SIZE;
pmap->pm_stats.resident_count += L2_SIZE / PAGE_SIZE;
 
+   /*
+* Conditionally sync the icache.  See pmap_enter() for details.
+*/
+   if ((new_l2 & ATTR_S1_XN) == 0 && ((new_l2 & ~ATTR_MASK) !=
+   (old_l2 & ~ATTR_MASK) || (old_l2 & ATTR_S1_XN) != 0) &&
+   pmap != kernel_pmap && m->md.pv_memattr == VM_MEMATTR_WRITE_BACK) {
+   cpu_icache_sync_range(PHYS_TO_DMAP(new_l2 & ~ATTR_MASK),
+   L2_SIZE);
+   }
+
/*
 * Map the superpage.
 */
___
dev-commits-src-main@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/dev-commits-src-main
To unsubscribe, send any mail to "dev-commits-src-main-unsubscr...@freebsd.org"


git: b7de53528836 - main - amd64: Eliminate a redundant test from pmap_enter_object()

2021-07-23 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=b7de535288362b072cf2801007e4d7e0e903d467

commit b7de535288362b072cf2801007e4d7e0e903d467
Author: Alan Cox 
AuthorDate: 2021-07-24 03:50:10 +
Commit: Alan Cox 
CommitDate: 2021-07-24 04:15:42 +

amd64: Eliminate a redundant test from pmap_enter_object()

The call to pmap_allow_2m_x_page() in pmap_enter_object() is redundant.
Specifically, even without the call to pmap_allow_2m_x_page() in
pmap_enter_object(), pmap_allow_2m_x_page() is eventually called by
pmap_enter_pde(), so the outcome will be the same.  Essentially,
calling pmap_allow_2m_x_page() in pmap_enter_object() amounts to
"optimizing" for the unexpected case.

Reviewed by:kib
MFC after:  1 week
---
 sys/amd64/amd64/pmap.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index 427fbdf44830..688412594e6c 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -7334,7 +7334,6 @@ pmap_enter_object(pmap_t pmap, vm_offset_t start, 
vm_offset_t end,
va = start + ptoa(diff);
if ((va & PDRMASK) == 0 && va + NBPDR <= end &&
m->psind == 1 && pmap_ps_enabled(pmap) &&
-   pmap_allow_2m_x_page(pmap, (prot & VM_PROT_EXECUTE) != 0) &&
pmap_enter_2mpage(pmap, va, m, prot, &lock))
m = &m[NBPDR / PAGE_SIZE - 1];
else
___
dev-commits-src-main@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/dev-commits-src-main
To unsubscribe, send any mail to "dev-commits-src-main-unsubscr...@freebsd.org"


git: 3687797618b6 - main - amd64: Don't repeat unnecessary tests when cmpset fails

2021-07-24 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=3687797618b6c978ad733bd206a623e5df47dbe3

commit 3687797618b6c978ad733bd206a623e5df47dbe3
Author: Alan Cox 
AuthorDate: 2021-07-24 08:50:27 +
Commit: Alan Cox 
CommitDate: 2021-07-24 18:06:47 +

amd64: Don't repeat unnecessary tests when cmpset fails

When a cmpset for removing the PG_RW bit in pmap_promote_pde() fails,
there is no need to repeat the alignment, PG_A, and PG_V tests just to
reload the PTE's value.  The only bit that we need be concerned with at
this point is PG_M.  Use fcmpset instead.

MFC after:  1 week
---
 sys/amd64/amd64/pmap.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index aea4394ebcc0..47315c560831 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -6615,7 +6615,6 @@ pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, 
vm_offset_t va,
 * within a 2MB page. 
 */
firstpte = (pt_entry_t *)PHYS_TO_DMAP(*pde & PG_FRAME);
-setpde:
newpde = *firstpte;
if ((newpde & ((PG_FRAME & PDRMASK) | PG_A | PG_V)) != (PG_A | PG_V) ||
!pmap_allow_2m_x_page(pmap, pmap_pde_ept_executable(pmap,
@@ -6625,12 +6624,13 @@ setpde:
" in pmap %p", va, pmap);
return;
}
+setpde:
if ((newpde & (PG_M | PG_RW)) == PG_RW) {
/*
 * When PG_M is already clear, PG_RW can be cleared without
 * a TLB invalidation.
 */
-   if (!atomic_cmpset_long(firstpte, newpde, newpde & ~PG_RW))
+   if (!atomic_fcmpset_long(firstpte, &newpde, newpde & ~PG_RW))
goto setpde;
newpde &= ~PG_RW;
}
@@ -6642,7 +6642,6 @@ setpde:
 */
pa = (newpde & (PG_PS_FRAME | PG_A | PG_V)) + NBPDR - PAGE_SIZE;
for (pte = firstpte + NPTEPG - 1; pte > firstpte; pte--) {
-setpte:
oldpte = *pte;
if ((oldpte & (PG_FRAME | PG_A | PG_V)) != pa) {
counter_u64_add(pmap_pde_p_failures, 1);
@@ -6650,12 +6649,13 @@ setpte:
" in pmap %p", va, pmap);
return;
}
+setpte:
if ((oldpte & (PG_M | PG_RW)) == PG_RW) {
/*
 * When PG_M is already clear, PG_RW can be cleared
 * without a TLB invalidation.
 */
-   if (!atomic_cmpset_long(pte, oldpte, oldpte & ~PG_RW))
+   if (!atomic_fcmpset_long(pte, &oldpte, oldpte & ~PG_RW))
goto setpte;
oldpte &= ~PG_RW;
CTR2(KTR_PMAP, "pmap_promote_pde: protect for va %#lx"
___
dev-commits-src-main@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/dev-commits-src-main
To unsubscribe, send any mail to "dev-commits-src-main-unsubscr...@freebsd.org"


git: fc2e4f15a904 - main - iommu_gas: Eliminate unnecessary wrappers

2022-06-16 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=fc2e4f15a9047bbf546cd675ed590b88e54362bd

commit fc2e4f15a9047bbf546cd675ed590b88e54362bd
Author: Alan Cox 
AuthorDate: 2022-06-14 19:01:36 +
Commit: Alan Cox 
CommitDate: 2022-06-17 06:06:52 +

iommu_gas: Eliminate unnecessary wrappers

Eliminate trivial wrappers for several iommu_gas functions that serve
no functional purpose.

Reviewed by:br, dougm, kib
MFC after:  3 weeks
Differential Revision:  https://reviews.freebsd.org/D35487
---
 sys/dev/iommu/busdma_iommu.c |  8 
 sys/dev/iommu/iommu.h|  8 
 sys/dev/iommu/iommu_gas.c| 41 -
 3 files changed, 4 insertions(+), 53 deletions(-)

diff --git a/sys/dev/iommu/busdma_iommu.c b/sys/dev/iommu/busdma_iommu.c
index e06d96dad027..ae8c98922e41 100644
--- a/sys/dev/iommu/busdma_iommu.c
+++ b/sys/dev/iommu/busdma_iommu.c
@@ -594,7 +594,7 @@ iommu_bus_dmamap_load_something1(struct bus_dma_tag_iommu 
*tag,
if (seg + 1 < tag->common.nsegments)
gas_flags |= IOMMU_MF_CANSPLIT;
 
-   error = iommu_map(domain, &tag->common, buflen1,
+   error = iommu_gas_map(domain, &tag->common, buflen1,
offset, e_flags, gas_flags, ma + idx, &entry);
if (error != 0)
break;
@@ -1046,7 +1046,7 @@ bus_dma_iommu_load_ident(bus_dma_tag_t dmat, bus_dmamap_t 
map1,
map = (struct bus_dmamap_iommu *)map1;
waitok = (flags & BUS_DMA_NOWAIT) != 0;
 
-   entry = iommu_map_alloc_entry(domain, waitok ? 0 : IOMMU_PGF_WAITOK);
+   entry = iommu_gas_alloc_entry(domain, waitok ? 0 : IOMMU_PGF_WAITOK);
if (entry == NULL)
return (ENOMEM);
entry->start = start;
@@ -1054,14 +1054,14 @@ bus_dma_iommu_load_ident(bus_dma_tag_t dmat, 
bus_dmamap_t map1,
ma = malloc(sizeof(vm_page_t) * atop(length), M_TEMP, waitok ?
M_WAITOK : M_NOWAIT);
if (ma == NULL) {
-   iommu_map_free_entry(domain, entry);
+   iommu_gas_free_entry(domain, entry);
return (ENOMEM);
}
for (i = 0; i < atop(length); i++) {
ma[i] = vm_page_getfake(entry->start + PAGE_SIZE * i,
VM_MEMATTR_DEFAULT);
}
-   error = iommu_map_region(domain, entry, IOMMU_MAP_ENTRY_READ |
+   error = iommu_gas_map_region(domain, entry, IOMMU_MAP_ENTRY_READ |
((flags & BUS_DMA_NOWRITE) ? 0 : IOMMU_MAP_ENTRY_WRITE),
waitok ? IOMMU_MF_CANWAIT : 0, ma);
if (error == 0) {
diff --git a/sys/dev/iommu/iommu.h b/sys/dev/iommu/iommu.h
index ee1149e6ea8f..3800213a1d64 100644
--- a/sys/dev/iommu/iommu.h
+++ b/sys/dev/iommu/iommu.h
@@ -162,14 +162,6 @@ struct iommu_ctx *iommu_instantiate_ctx(struct iommu_unit 
*iommu,
 device_t iommu_get_requester(device_t dev, uint16_t *rid);
 int iommu_init_busdma(struct iommu_unit *unit);
 void iommu_fini_busdma(struct iommu_unit *unit);
-struct iommu_map_entry *iommu_map_alloc_entry(struct iommu_domain *iodom,
-u_int flags);
-void iommu_map_free_entry(struct iommu_domain *, struct iommu_map_entry *);
-int iommu_map(struct iommu_domain *iodom,
-const struct bus_dma_tag_common *common, iommu_gaddr_t size, int offset,
-u_int eflags, u_int flags, vm_page_t *ma, struct iommu_map_entry **res);
-int iommu_map_region(struct iommu_domain *domain,
-struct iommu_map_entry *entry, u_int eflags, u_int flags, vm_page_t *ma);
 
 void iommu_gas_init_domain(struct iommu_domain *domain);
 void iommu_gas_fini_domain(struct iommu_domain *domain);
diff --git a/sys/dev/iommu/iommu_gas.c b/sys/dev/iommu/iommu_gas.c
index 27954de9db39..a65bb23e87c5 100644
--- a/sys/dev/iommu/iommu_gas.c
+++ b/sys/dev/iommu/iommu_gas.c
@@ -799,36 +799,6 @@ iommu_gas_reserve_region_extend(struct iommu_domain 
*domain,
return (error);
 }
 
-struct iommu_map_entry *
-iommu_map_alloc_entry(struct iommu_domain *domain, u_int flags)
-{
-   struct iommu_map_entry *res;
-
-   res = iommu_gas_alloc_entry(domain, flags);
-
-   return (res);
-}
-
-void
-iommu_map_free_entry(struct iommu_domain *domain, struct iommu_map_entry 
*entry)
-{
-
-   iommu_gas_free_entry(domain, entry);
-}
-
-int
-iommu_map(struct iommu_domain *domain,
-const struct bus_dma_tag_common *common, iommu_gaddr_t size, int offset,
-u_int eflags, u_int flags, vm_page_t *ma, struct iommu_map_entry **res)
-{
-   int error;
-
-   error = iommu_gas_map(domain, common, size, offset, eflags, flags,
-   ma, res);
-
-   return (error);
-}
-
 void
 iommu_unmap_msi(struct iommu_ctx *ctx)
 {
@@ -917,17 +887,6 @@ iommu_translate_msi(struct iommu_domain *domain, uint64_t 
*addr)
__func__, (uintmax_t)*addr, (uintmax_t)domain->msi_entry->end));
 }
 
-int
-iommu

git: e6775534aee1 - main - iommu_gas: Correct a broken KASSERT

2022-06-17 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=e6775534aee1963a39e5ee762b8eab1d7dfb1b6b

commit e6775534aee1963a39e5ee762b8eab1d7dfb1b6b
Author: Alan Cox 
AuthorDate: 2022-06-17 17:03:06 +
Commit: Alan Cox 
CommitDate: 2022-06-17 18:05:20 +

iommu_gas: Correct a broken KASSERT

If iommu_gas_find_space() ever called iommu_gas_uppermatch(), and it
succeeded in allocating space, then the subsequent KASSERT would be
triggered.  Change that KASSERT to accept either success or ENOMEM.

MFC after:  1 week
---
 sys/dev/iommu/iommu_gas.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sys/dev/iommu/iommu_gas.c b/sys/dev/iommu/iommu_gas.c
index a65bb23e87c5..073b5626edf6 100644
--- a/sys/dev/iommu/iommu_gas.c
+++ b/sys/dev/iommu/iommu_gas.c
@@ -504,7 +504,7 @@ iommu_gas_find_space(struct iommu_domain *domain,
if (common->highaddr >= domain->end)
return (ENOMEM);
error = iommu_gas_uppermatch(&a, RB_ROOT(&domain->rb_root));
-   KASSERT(error == ENOMEM,
+   KASSERT(error == 0 || error == ENOMEM,
("error %d from iommu_gas_uppermatch", error));
return (error);
 }



git: 164491fb0387 - main - iommu_gas: Eliminate a stale comment

2022-06-20 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=164491fb038724405b9e38355b569ef8dd4ad068

commit 164491fb038724405b9e38355b569ef8dd4ad068
Author: Alan Cox 
AuthorDate: 2022-06-20 17:14:53 +
Commit: Alan Cox 
CommitDate: 2022-06-20 17:30:36 +

iommu_gas: Eliminate a stale comment

As of 19bb5a7244ff, the size passed to iommu_gas_map is no longer
required to be a multiple of the CPU page size.

MFC after:  2 weeks
---
 sys/dev/iommu/iommu_gas.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/sys/dev/iommu/iommu_gas.c b/sys/dev/iommu/iommu_gas.c
index 073b5626edf6..f25519552d1c 100644
--- a/sys/dev/iommu/iommu_gas.c
+++ b/sys/dev/iommu/iommu_gas.c
@@ -358,8 +358,7 @@ iommu_gas_match_insert(struct iommu_gas_match_args *a)
 
/*
 * The prev->end is always aligned on the page size, which
-* causes page alignment for the entry->start too.  The size
-* is checked to be multiple of the page size.
+* causes page alignment for the entry->start too.
 *
 * The page sized gap is created between consequent
 * allocations to ensure that out-of-bounds accesses fault.



git: 32e82bcc1527 - main - busdma_iommu: Eliminate a redundant trunc_page()

2022-06-20 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=32e82bcc152783dfe0d03ffdd864cedfddbc80d7

commit 32e82bcc152783dfe0d03ffdd864cedfddbc80d7
Author: Alan Cox 
AuthorDate: 2022-06-20 18:40:42 +
Commit: Alan Cox 
CommitDate: 2022-06-21 04:03:04 +

busdma_iommu: Eliminate a redundant trunc_page()

Since OFF_TO_IDX() inherently truncates the given value, there is no
need to perform trunc_page() on it.

MFC after:  2 weeks
---
 sys/dev/iommu/busdma_iommu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sys/dev/iommu/busdma_iommu.c b/sys/dev/iommu/busdma_iommu.c
index ae8c98922e41..42fc0b6c5451 100644
--- a/sys/dev/iommu/busdma_iommu.c
+++ b/sys/dev/iommu/busdma_iommu.c
@@ -634,7 +634,7 @@ iommu_bus_dmamap_load_something1(struct bus_dma_tag_iommu 
*tag,
segs[seg].ds_addr = entry->start + offset;
segs[seg].ds_len = buflen1;
 
-   idx += OFF_TO_IDX(trunc_page(offset + buflen1));
+   idx += OFF_TO_IDX(offset + buflen1);
offset += buflen1;
offset &= IOMMU_PAGE_MASK;
buflen -= buflen1;



git: 0ba1d8608234 - main - iommu_gas: Add a missing error-case unlock

2022-06-20 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=0ba1d8608234eee767b475627da6e5903ce7536a

commit 0ba1d8608234eee767b475627da6e5903ce7536a
Author: Alan Cox 
AuthorDate: 2022-06-21 04:48:31 +
Commit: Alan Cox 
CommitDate: 2022-06-21 04:48:31 +

iommu_gas: Add a missing error-case unlock

Release the domain lock when iommu_gas_reserve_region_extend()'s call to
iommu_gas_reserve_region_locked() fails.

MFC after:  2 weeks
---
 sys/dev/iommu/iommu_gas.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/sys/dev/iommu/iommu_gas.c b/sys/dev/iommu/iommu_gas.c
index 70eef9a0a1f7..a9c4caa30dd8 100644
--- a/sys/dev/iommu/iommu_gas.c
+++ b/sys/dev/iommu/iommu_gas.c
@@ -781,8 +781,10 @@ iommu_gas_reserve_region_extend(struct iommu_domain 
*domain,
if (entry_start != entry_end) {
error = iommu_gas_reserve_region_locked(domain,
entry_start, entry_end, entry);
-   if (error != 0)
+   if (error != 0) {
+   IOMMU_DOMAIN_UNLOCK(domain);
break;
+   }
entry = NULL;
}
IOMMU_DOMAIN_UNLOCK(domain);



git: eeb46578c21a - main - busdma_iommu: Fine-grained locking for the dmamap's map list

2022-06-24 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=eeb46578c21ad37866f49f3bbb3ac738b44abbf6

commit eeb46578c21ad37866f49f3bbb3ac738b44abbf6
Author: Alan Cox 
AuthorDate: 2022-06-22 21:51:47 +
Commit: Alan Cox 
CommitDate: 2022-06-25 05:59:23 +

busdma_iommu: Fine-grained locking for the dmamap's map list

Introduce fine-grained locking on the dmamap's list of map entries,
replacing the use of the domain lock.  This is not the most significant
source of lock contention, but it is the easiest to address.

Reviewed by:kib
MFC after:  2 weeks
Differential Revision:  https://reviews.freebsd.org/D35557
---
 sys/dev/iommu/busdma_iommu.c | 56 +---
 sys/dev/iommu/busdma_iommu.h |  7 ++
 2 files changed, 34 insertions(+), 29 deletions(-)

diff --git a/sys/dev/iommu/busdma_iommu.c b/sys/dev/iommu/busdma_iommu.c
index 42fc0b6c5451..69cf9dd12e7e 100644
--- a/sys/dev/iommu/busdma_iommu.c
+++ b/sys/dev/iommu/busdma_iommu.c
@@ -458,6 +458,7 @@ iommu_bus_dmamap_create(bus_dma_tag_t dmat, int flags, 
bus_dmamap_t *mapp)
return (ENOMEM);
}
}
+   IOMMU_DMAMAP_INIT(map);
TAILQ_INIT(&map->map_entries);
map->tag = tag;
map->locked = true;
@@ -473,18 +474,16 @@ iommu_bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t 
map1)
 {
struct bus_dma_tag_iommu *tag;
struct bus_dmamap_iommu *map;
-   struct iommu_domain *domain;
 
tag = (struct bus_dma_tag_iommu *)dmat;
map = (struct bus_dmamap_iommu *)map1;
if (map != NULL) {
-   domain = tag->ctx->domain;
-   IOMMU_DOMAIN_LOCK(domain);
+   IOMMU_DMAMAP_LOCK(map);
if (!TAILQ_EMPTY(&map->map_entries)) {
-   IOMMU_DOMAIN_UNLOCK(domain);
+   IOMMU_DMAMAP_UNLOCK(map);
return (EBUSY);
}
-   IOMMU_DOMAIN_UNLOCK(domain);
+   IOMMU_DMAMAP_DESTROY(map);
free(map, M_IOMMU_DMAMAP);
}
tag->map_count--;
@@ -625,10 +624,11 @@ iommu_bus_dmamap_load_something1(struct bus_dma_tag_iommu 
*tag,
(uintmax_t)entry->start, (uintmax_t)entry->end,
(uintmax_t)buflen1, (uintmax_t)tag->common.maxsegsz));
 
-   IOMMU_DOMAIN_LOCK(domain);
+   KASSERT((entry->flags & IOMMU_MAP_ENTRY_MAP) != 0,
+   ("entry %p missing IOMMU_MAP_ENTRY_MAP", entry));
+   IOMMU_DMAMAP_LOCK(map);
TAILQ_INSERT_TAIL(&map->map_entries, entry, dmamap_link);
-   entry->flags |= IOMMU_MAP_ENTRY_MAP;
-   IOMMU_DOMAIN_UNLOCK(domain);
+   IOMMU_DMAMAP_UNLOCK(map);
TAILQ_INSERT_TAIL(unroll_list, entry, unroll_link);
 
segs[seg].ds_addr = entry->start + offset;
@@ -651,8 +651,8 @@ iommu_bus_dmamap_load_something(struct bus_dma_tag_iommu 
*tag,
 {
struct iommu_ctx *ctx;
struct iommu_domain *domain;
-   struct iommu_map_entry *entry, *entry1;
-   struct iommu_map_entries_tailq unroll_list;
+   struct iommu_map_entry *entry;
+   struct iommu_map_entries_tailq entries, unroll_list;
int error;
 
ctx = tag->ctx;
@@ -662,15 +662,15 @@ iommu_bus_dmamap_load_something(struct bus_dma_tag_iommu 
*tag,
TAILQ_INIT(&unroll_list);
error = iommu_bus_dmamap_load_something1(tag, map, ma, offset,
buflen, flags, segs, segp, &unroll_list);
-   if (error != 0) {
+   if (error != 0 && !TAILQ_EMPTY(&unroll_list)) {
/*
 * The busdma interface does not allow us to report
 * partial buffer load, so unfortunately we have to
 * revert all work done.
 */
-   IOMMU_DOMAIN_LOCK(domain);
-   TAILQ_FOREACH_SAFE(entry, &unroll_list, unroll_link,
-   entry1) {
+   TAILQ_INIT(&entries);
+   IOMMU_DMAMAP_LOCK(map);
+   TAILQ_FOREACH(entry, &unroll_list, unroll_link) {
/*
 * No entries other than what we have created
 * during the failed run might have been
@@ -678,10 +678,11 @@ iommu_bus_dmamap_load_something(struct bus_dma_tag_iommu 
*tag,
 * pglock.
 */
TAILQ_REMOVE(&map->map_entries, entry, dmamap_link);
-   TAILQ_REMOVE(&unroll_list, entry, unroll_link);
-   TAILQ_INSERT_TAIL(&domain->unload_entries, entry,
-   dmamap_link);
+   TAILQ_INSERT_TAIL(&entries, entry, dmamap_lin

git: da33f6d76b9d - main - iommu_gas: Tidy up

2022-06-25 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=da33f6d76b9d0c00649c692f6cb2b3b33bf5af30

commit da33f6d76b9d0c00649c692f6cb2b3b33bf5af30
Author: Alan Cox 
AuthorDate: 2022-06-26 05:44:47 +
Commit: Alan Cox 
CommitDate: 2022-06-26 06:01:49 +

iommu_gas: Tidy up

Move a comment to the code that it describes.  Improve the wording.

Style fixes.

MFC after:  2 weeks
---
 sys/dev/iommu/iommu_gas.c | 24 +++-
 1 file changed, 11 insertions(+), 13 deletions(-)

diff --git a/sys/dev/iommu/iommu_gas.c b/sys/dev/iommu/iommu_gas.c
index a9c4caa30dd8..2647c2ce6612 100644
--- a/sys/dev/iommu/iommu_gas.c
+++ b/sys/dev/iommu/iommu_gas.c
@@ -197,8 +197,7 @@ iommu_gas_rb_insert(struct iommu_domain *domain, struct 
iommu_map_entry *entry)
 {
struct iommu_map_entry *found;
 
-   found = RB_INSERT(iommu_gas_entries_tree,
-   &domain->rb_root, entry);
+   found = RB_INSERT(iommu_gas_entries_tree, &domain->rb_root, entry);
return (found == NULL);
 }
 
@@ -303,6 +302,13 @@ iommu_gas_match_one(struct iommu_gas_match_args *a, 
iommu_gaddr_t beg,
 {
iommu_gaddr_t bs, start;
 
+   /*
+* The prev->end is always aligned on the page size, which
+* causes page alignment for the entry->start too.
+*
+* A page sized gap is created between consecutive
+* allocations to ensure that out-of-bounds accesses fault.
+*/
a->entry->start = roundup2(beg + IOMMU_PAGE_SIZE,
a->common->alignment);
if (a->entry->start + a->offset + a->size > maxaddr)
@@ -356,13 +362,6 @@ iommu_gas_match_insert(struct iommu_gas_match_args *a)
 {
bool found __diagused;
 
-   /*
-* The prev->end is always aligned on the page size, which
-* causes page alignment for the entry->start too.
-*
-* The page sized gap is created between consequent
-* allocations to ensure that out-of-bounds accesses fault.
-*/
a->entry->end = a->entry->start +
roundup2(a->size + a->offset, IOMMU_PAGE_SIZE);
 
@@ -487,8 +486,7 @@ iommu_gas_find_space(struct iommu_domain *domain,
 
/* Handle lower region. */
if (common->lowaddr > 0) {
-   error = iommu_gas_lowermatch(&a,
-   RB_ROOT(&domain->rb_root));
+   error = iommu_gas_lowermatch(&a, RB_ROOT(&domain->rb_root));
if (error == 0)
return (0);
KASSERT(error == ENOMEM,
@@ -636,7 +634,7 @@ iommu_gas_map(struct iommu_domain *domain,
("invalid flags 0x%x", flags));
 
entry = iommu_gas_alloc_entry(domain,
-   (flags & IOMMU_MF_CANWAIT) != 0 ?  IOMMU_PGF_WAITOK : 0);
+   (flags & IOMMU_MF_CANWAIT) != 0 ? IOMMU_PGF_WAITOK : 0);
if (entry == NULL)
return (ENOMEM);
IOMMU_DOMAIN_LOCK(domain);
@@ -660,7 +658,7 @@ iommu_gas_map(struct iommu_domain *domain,
 
error = domain->ops->map(domain, entry->start,
entry->end - entry->start, ma, eflags,
-   ((flags & IOMMU_MF_CANWAIT) != 0 ?  IOMMU_PGF_WAITOK : 0));
+   ((flags & IOMMU_MF_CANWAIT) != 0 ? IOMMU_PGF_WAITOK : 0));
if (error == ENOMEM) {
iommu_domain_unload_entry(entry, true);
return (error);



git: 7b39a9bc1df3 - main - iommu_gas: Fix a recent regression with IOMMU_MF_CANSPLIT

2022-06-26 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=7b39a9bc1df37502e8186593f3427b7ff0e4cc71

commit 7b39a9bc1df37502e8186593f3427b7ff0e4cc71
Author: Alan Cox 
AuthorDate: 2022-06-26 16:48:12 +
Commit: Alan Cox 
CommitDate: 2022-06-26 21:31:54 +

iommu_gas: Fix a recent regression with IOMMU_MF_CANSPLIT

As of 19bb5a7244ff, the IOMMU_MF_CANSPLIT case in iommu_gas_match_one()
must take into account the specified offset.  Otherwise, the recently
changed end calculation in iommu_gas_match_insert() could produce an
end address that crosses the specified boundary by one page.

Reviewed by:dougm
MFC with:   19bb5a7244ff
---
 sys/dev/iommu/iommu_gas.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sys/dev/iommu/iommu_gas.c b/sys/dev/iommu/iommu_gas.c
index 2647c2ce6612..bb6cde2721a6 100644
--- a/sys/dev/iommu/iommu_gas.c
+++ b/sys/dev/iommu/iommu_gas.c
@@ -350,7 +350,7 @@ iommu_gas_match_one(struct iommu_gas_match_args *a, 
iommu_gaddr_t beg,
 * the next entry, then we do not have gap.  Ignore for now.
 */
if ((a->gas_flags & IOMMU_MF_CANSPLIT) != 0) {
-   a->size = bs - a->entry->start;
+   a->size = bs - a->entry->start - a->offset;
return (true);
}
 



git: db0110a536bf - main - iommu: Shrink the iommu map entry structure

2022-07-15 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=db0110a536bf70c1ff55f3b3f46a0b5a9af46058

commit db0110a536bf70c1ff55f3b3f46a0b5a9af46058
Author: Alan Cox 
AuthorDate: 2022-07-11 03:52:52 +
Commit: Alan Cox 
CommitDate: 2022-07-16 03:24:52 +

iommu: Shrink the iommu map entry structure

Eliminate the unroll_entry field from struct iommu_map_entry, shrinking
the struct by 16 bytes on 64-bit architectures.

Reviewed by:kib
MFC after:  2 weeks
Differential Revision:  https://reviews.freebsd.org/D35769
---
 sys/dev/iommu/busdma_iommu.c | 33 ++---
 sys/dev/iommu/iommu.h|  2 --
 sys/x86/iommu/intel_ctx.c|  4 ++--
 sys/x86/iommu/intel_drv.c|  2 +-
 4 files changed, 13 insertions(+), 28 deletions(-)

diff --git a/sys/dev/iommu/busdma_iommu.c b/sys/dev/iommu/busdma_iommu.c
index 69cf9dd12e7e..10e7476b35eb 100644
--- a/sys/dev/iommu/busdma_iommu.c
+++ b/sys/dev/iommu/busdma_iommu.c
@@ -558,7 +558,7 @@ static int
 iommu_bus_dmamap_load_something1(struct bus_dma_tag_iommu *tag,
 struct bus_dmamap_iommu *map, vm_page_t *ma, int offset, bus_size_t buflen,
 int flags, bus_dma_segment_t *segs, int *segp,
-struct iommu_map_entries_tailq *unroll_list)
+struct iommu_map_entries_tailq *entries)
 {
struct iommu_ctx *ctx;
struct iommu_domain *domain;
@@ -626,10 +626,7 @@ iommu_bus_dmamap_load_something1(struct bus_dma_tag_iommu 
*tag,
 
KASSERT((entry->flags & IOMMU_MAP_ENTRY_MAP) != 0,
("entry %p missing IOMMU_MAP_ENTRY_MAP", entry));
-   IOMMU_DMAMAP_LOCK(map);
-   TAILQ_INSERT_TAIL(&map->map_entries, entry, dmamap_link);
-   IOMMU_DMAMAP_UNLOCK(map);
-   TAILQ_INSERT_TAIL(unroll_list, entry, unroll_link);
+   TAILQ_INSERT_TAIL(entries, entry, dmamap_link);
 
segs[seg].ds_addr = entry->start + offset;
segs[seg].ds_len = buflen1;
@@ -651,36 +648,26 @@ iommu_bus_dmamap_load_something(struct bus_dma_tag_iommu 
*tag,
 {
struct iommu_ctx *ctx;
struct iommu_domain *domain;
-   struct iommu_map_entry *entry;
-   struct iommu_map_entries_tailq entries, unroll_list;
+   struct iommu_map_entries_tailq entries;
int error;
 
ctx = tag->ctx;
domain = ctx->domain;
atomic_add_long(&ctx->loads, 1);
 
-   TAILQ_INIT(&unroll_list);
+   TAILQ_INIT(&entries);
error = iommu_bus_dmamap_load_something1(tag, map, ma, offset,
-   buflen, flags, segs, segp, &unroll_list);
-   if (error != 0 && !TAILQ_EMPTY(&unroll_list)) {
+   buflen, flags, segs, segp, &entries);
+   if (error == 0) {
+   IOMMU_DMAMAP_LOCK(map);
+   TAILQ_CONCAT(&map->map_entries, &entries, dmamap_link);
+   IOMMU_DMAMAP_UNLOCK(map);
+   } else if (!TAILQ_EMPTY(&entries)) {
/*
 * The busdma interface does not allow us to report
 * partial buffer load, so unfortunately we have to
 * revert all work done.
 */
-   TAILQ_INIT(&entries);
-   IOMMU_DMAMAP_LOCK(map);
-   TAILQ_FOREACH(entry, &unroll_list, unroll_link) {
-   /*
-* No entries other than what we have created
-* during the failed run might have been
-* inserted there in between, since we own ctx
-* pglock.
-*/
-   TAILQ_REMOVE(&map->map_entries, entry, dmamap_link);
-   TAILQ_INSERT_TAIL(&entries, entry, dmamap_link);
-   }
-   IOMMU_DMAMAP_UNLOCK(map);
IOMMU_DOMAIN_LOCK(domain);
TAILQ_CONCAT(&domain->unload_entries, &entries, dmamap_link);
IOMMU_DOMAIN_UNLOCK(domain);
diff --git a/sys/dev/iommu/iommu.h b/sys/dev/iommu/iommu.h
index 3800213a1d64..62b5659b6e83 100644
--- a/sys/dev/iommu/iommu.h
+++ b/sys/dev/iommu/iommu.h
@@ -58,8 +58,6 @@ struct iommu_map_entry {
u_int flags;
TAILQ_ENTRY(iommu_map_entry) dmamap_link; /* Link for dmamap entries */
RB_ENTRY(iommu_map_entry) rb_entry;  /* Links for domain entries */
-   TAILQ_ENTRY(iommu_map_entry) unroll_link; /* Link for unroll after
-   dmamap_load failure */
struct iommu_domain *domain;
struct iommu_qi_genseq gseq;
 };
diff --git a/sys/x86/iommu/intel_ctx.c b/sys/x86/iommu/intel_ctx.c
index 815dc6146b00..79e2a15d80c7 100644
--- a/sys/x86/iommu/intel_ctx.c
+++ b/sys/x86/iommu/intel_ctx.c
@@ -245,7 +245,7 @@ domain_init_rmrr(struct dmar_domain *domain, device_t dev,

git: da55f86c6146 - main - x86/iommu: Eliminate redundant wrappers

2022-07-16 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=da55f86c61462b119fd1306d12411989d6610650

commit da55f86c61462b119fd1306d12411989d6610650
Author: Alan Cox 
AuthorDate: 2022-07-16 04:25:11 +
Commit: Alan Cox 
CommitDate: 2022-07-16 23:05:37 +

x86/iommu: Eliminate redundant wrappers

Reviewed by:kib
MFC after:  2 weeks
Differential Revision:  https://reviews.freebsd.org/D35832
---
 sys/x86/iommu/intel_ctx.c  | 26 --
 sys/x86/iommu/intel_dmar.h |  3 ---
 2 files changed, 4 insertions(+), 25 deletions(-)

diff --git a/sys/x86/iommu/intel_ctx.c b/sys/x86/iommu/intel_ctx.c
index 79e2a15d80c7..bfc607674b57 100644
--- a/sys/x86/iommu/intel_ctx.c
+++ b/sys/x86/iommu/intel_ctx.c
@@ -868,7 +868,7 @@ dmar_domain_free_entry(struct iommu_map_entry *entry, bool 
free)
 }
 
 void
-dmar_domain_unload_entry(struct iommu_map_entry *entry, bool free)
+iommu_domain_unload_entry(struct iommu_map_entry *entry, bool free)
 {
struct dmar_domain *domain;
struct dmar_unit *unit;
@@ -902,15 +902,15 @@ dmar_domain_unload_emit_wait(struct dmar_domain *domain,
 }
 
 void
-dmar_domain_unload(struct dmar_domain *domain,
+iommu_domain_unload(struct iommu_domain *iodom,
 struct iommu_map_entries_tailq *entries, bool cansleep)
 {
+   struct dmar_domain *domain;
struct dmar_unit *unit;
-   struct iommu_domain *iodom;
struct iommu_map_entry *entry, *entry1;
int error __diagused;
 
-   iodom = DOM2IODOM(domain);
+   domain = IODOM2DOM(iodom);
unit = DOM2DMAR(domain);
 
TAILQ_FOREACH_SAFE(entry, entries, dmamap_link, entry1) {
@@ -975,21 +975,3 @@ iommu_free_ctx(struct iommu_ctx *context)
 
dmar_free_ctx(ctx);
 }
-
-void
-iommu_domain_unload_entry(struct iommu_map_entry *entry, bool free)
-{
-
-   dmar_domain_unload_entry(entry, free);
-}
-
-void
-iommu_domain_unload(struct iommu_domain *iodom,
-struct iommu_map_entries_tailq *entries, bool cansleep)
-{
-   struct dmar_domain *domain;
-
-   domain = IODOM2DOM(iodom);
-
-   dmar_domain_unload(domain, entries, cansleep);
-}
diff --git a/sys/x86/iommu/intel_dmar.h b/sys/x86/iommu/intel_dmar.h
index b34505a4e5d0..05793ed9f238 100644
--- a/sys/x86/iommu/intel_dmar.h
+++ b/sys/x86/iommu/intel_dmar.h
@@ -277,9 +277,6 @@ int dmar_move_ctx_to_domain(struct dmar_domain *domain, 
struct dmar_ctx *ctx);
 void dmar_free_ctx_locked(struct dmar_unit *dmar, struct dmar_ctx *ctx);
 void dmar_free_ctx(struct dmar_ctx *ctx);
 struct dmar_ctx *dmar_find_ctx_locked(struct dmar_unit *dmar, uint16_t rid);
-void dmar_domain_unload_entry(struct iommu_map_entry *entry, bool free);
-void dmar_domain_unload(struct dmar_domain *domain,
-struct iommu_map_entries_tailq *entries, bool cansleep);
 void dmar_domain_free_entry(struct iommu_map_entry *entry, bool free);
 
 void dmar_dev_parse_rmrr(struct dmar_domain *domain, int dev_domain,



git: 4eaaacc75535 - main - x86/iommu: Shrink the critical section in dmar_qi_task()

2022-07-18 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=4eaaacc75535befdb9894cca4e0d8da376328fa4

commit 4eaaacc75535befdb9894cca4e0d8da376328fa4
Author: Alan Cox 
AuthorDate: 2022-07-18 00:56:39 +
Commit: Alan Cox 
CommitDate: 2022-07-19 03:23:13 +

x86/iommu: Shrink the critical section in dmar_qi_task()

It is safe to test and clear the Invalidation Wait Descriptor
Complete flag before acquiring the DMAR lock in dmar_qi_task(),
rather than waiting until the lock is held.

Reviewed by:kib
MFC after:  2 weeks
---
 sys/x86/iommu/intel_qi.c | 15 ++-
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/sys/x86/iommu/intel_qi.c b/sys/x86/iommu/intel_qi.c
index 894e3d537ac7..ca58715a227c 100644
--- a/sys/x86/iommu/intel_qi.c
+++ b/sys/x86/iommu/intel_qi.c
@@ -343,6 +343,16 @@ dmar_qi_task(void *arg, int pending __unused)
 
unit = arg;
 
+   /*
+* Request an interrupt on the completion of the next invalidation
+* wait descriptor with the IF field set.
+*/
+   ics = dmar_read4(unit, DMAR_ICS_REG);
+   if ((ics & DMAR_ICS_IWC) != 0) {
+   ics = DMAR_ICS_IWC;
+   dmar_write4(unit, DMAR_ICS_REG, ics);
+   }
+
DMAR_LOCK(unit);
for (;;) {
entry = TAILQ_FIRST(&unit->tlb_flush_entries);
@@ -356,11 +366,6 @@ dmar_qi_task(void *arg, int pending __unused)
IOMMU_MAP_ENTRY_QI_NF) == 0);
DMAR_LOCK(unit);
}
-   ics = dmar_read4(unit, DMAR_ICS_REG);
-   if ((ics & DMAR_ICS_IWC) != 0) {
-   ics = DMAR_ICS_IWC;
-   dmar_write4(unit, DMAR_ICS_REG, ics);
-   }
if (unit->inv_seq_waiters > 0)
wakeup(&unit->inv_seq_waiters);
DMAR_UNLOCK(unit);



git: 54291f7d6506 - main - swap_pager: Reduce the scope of the object lock in putpages

2022-07-18 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=54291f7d6506e6c6087433c5bbdb2224b6cef23b

commit 54291f7d6506e6c6087433c5bbdb2224b6cef23b
Author: Alan Cox 
AuthorDate: 2022-07-19 03:28:07 +
Commit: Alan Cox 
CommitDate: 2022-07-19 03:35:49 +

swap_pager: Reduce the scope of the object lock in putpages

We don't need to hold the object lock while allocating swap space, so
don't.

Reviewed by:dougm, kib, markj
MFC after:  1 week
Differential Revision:  https://reviews.freebsd.org/D35839
---
 sys/vm/swap_pager.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c
index c20360975c4b..67cc3bf017d2 100644
--- a/sys/vm/swap_pager.c
+++ b/sys/vm/swap_pager.c
@@ -1506,10 +1506,8 @@ swap_pager_putpages(vm_object_t object, vm_page_t *ma, 
int count,
}
 
/* Get a block of swap of size up to size n. */
-   VM_OBJECT_WLOCK(object);
blk = swp_pager_getswapspace(&n);
if (blk == SWAPBLK_NONE) {
-   VM_OBJECT_WUNLOCK(object);
mtx_lock(&swbuf_mtx);
if (++nsw_wcount_async == 1)
wakeup(&nsw_wcount_async);
@@ -1518,6 +1516,7 @@ swap_pager_putpages(vm_object_t object, vm_page_t *ma, 
int count,
rtvals[i + j] = VM_PAGER_FAIL;
continue;
}
+   VM_OBJECT_WLOCK(object);
for (j = 0; j < n; ++j) {
mreq = ma[i + j];
vm_page_aflag_clear(mreq, PGA_SWAP_FREE);



git: dfabdacb279c - main - iommu_gas: Avoid double unmapping on error

2022-07-21 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=dfabdacb279ca603d008a0e7e952c5c59ac51da4

commit dfabdacb279ca603d008a0e7e952c5c59ac51da4
Author: Alan Cox 
AuthorDate: 2022-07-21 06:53:54 +
Commit: Alan Cox 
CommitDate: 2022-07-21 07:00:46 +

iommu_gas: Avoid double unmapping on error

In the extremely unlikely case that the iommu_gas_map_region() call in
bus_dma_iommu_load_ident() failed, we would attempt to unmap the failed
entry twice, first in iommu_gas_map_region(), and a second time in the
caller.  Once is enough, and twice is problematic because it leads to a
second RB_REMOVE call on the same tree node.  Like it or not, RB_TREE
does not handle that possibility.

Reviewed by:kib
MFC after:  1 week
Differential Revision:  https://reviews.freebsd.org/D35869
---
 sys/dev/iommu/busdma_iommu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sys/dev/iommu/busdma_iommu.c b/sys/dev/iommu/busdma_iommu.c
index 10e7476b35eb..67e82fe43e58 100644
--- a/sys/dev/iommu/busdma_iommu.c
+++ b/sys/dev/iommu/busdma_iommu.c
@@ -1055,7 +1055,7 @@ bus_dma_iommu_load_ident(bus_dma_tag_t dmat, bus_dmamap_t 
map1,
TAILQ_INSERT_TAIL(&map->map_entries, entry, dmamap_link);
IOMMU_DMAMAP_UNLOCK(map);
} else {
-   iommu_domain_unload_entry(entry, true);
+   iommu_gas_free_entry(domain, entry);
}
for (i = 0; i < atop(length); i++)
vm_page_putfake(ma[i]);



git: 8bc367384745 - main - iommu_gas: Eliminate a possible case of use-after-free

2022-07-25 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=8bc3673847453ca51237b5c85fe57f3f02e17a4b

commit 8bc3673847453ca51237b5c85fe57f3f02e17a4b
Author: Alan Cox 
AuthorDate: 2022-07-22 17:00:26 +
Commit: Alan Cox 
CommitDate: 2022-07-25 16:14:58 +

iommu_gas: Eliminate a possible case of use-after-free

Eliminate a possible case of use-after-free in an error handling path
after a mapping failure.  Specifically, eliminate IOMMU_MAP_ENTRY_QI_NF
and instead perform the IOTLB invalidation synchronously.  Otherwise,
when iommu_domain_unload_entry() is called and told not to free the
IOMMU map entry, the caller could free the entry before dmar_qi_task()
is finished with it.

Reviewed by:kib
MFC after:  1 week
Differential Revision:  https://reviews.freebsd.org/D35878
---
 sys/arm64/iommu/iommu.c|  3 ++-
 sys/dev/iommu/iommu.h  |  3 ++-
 sys/dev/iommu/iommu_gas.c  |  6 --
 sys/dev/iommu/iommu_gas.h  |  1 -
 sys/x86/iommu/intel_ctx.c  | 28 +++-
 sys/x86/iommu/intel_dmar.h |  2 ++
 sys/x86/iommu/intel_qi.c   | 14 --
 7 files changed, 41 insertions(+), 16 deletions(-)

diff --git a/sys/arm64/iommu/iommu.c b/sys/arm64/iommu/iommu.c
index aa48dcf5ab5e..0080ab4ff316 100644
--- a/sys/arm64/iommu/iommu.c
+++ b/sys/arm64/iommu/iommu.c
@@ -509,7 +509,8 @@ iommu_find(device_t dev, bool verbose)
 }
 
 void
-iommu_domain_unload_entry(struct iommu_map_entry *entry, bool free)
+iommu_domain_unload_entry(struct iommu_map_entry *entry, bool free,
+bool cansleep __unused)
 {
 
dprintf("%s\n", __func__);
diff --git a/sys/dev/iommu/iommu.h b/sys/dev/iommu/iommu.h
index 62b5659b6e83..65fefe3ada7b 100644
--- a/sys/dev/iommu/iommu.h
+++ b/sys/dev/iommu/iommu.h
@@ -151,7 +151,8 @@ void iommu_free_ctx_locked(struct iommu_unit *iommu, struct 
iommu_ctx *ctx);
 struct iommu_ctx *iommu_get_ctx(struct iommu_unit *, device_t dev,
 uint16_t rid, bool id_mapped, bool rmrr_init);
 struct iommu_unit *iommu_find(device_t dev, bool verbose);
-void iommu_domain_unload_entry(struct iommu_map_entry *entry, bool free);
+void iommu_domain_unload_entry(struct iommu_map_entry *entry, bool free,
+bool cansleep);
 void iommu_domain_unload(struct iommu_domain *domain,
 struct iommu_map_entries_tailq *entries, bool cansleep);
 
diff --git a/sys/dev/iommu/iommu_gas.c b/sys/dev/iommu/iommu_gas.c
index 86dc919e4572..ec456e2ec48b 100644
--- a/sys/dev/iommu/iommu_gas.c
+++ b/sys/dev/iommu/iommu_gas.c
@@ -638,7 +638,8 @@ iommu_gas_map(struct iommu_domain *domain,
entry->end - entry->start, ma, eflags,
((flags & IOMMU_MF_CANWAIT) != 0 ? IOMMU_PGF_WAITOK : 0));
if (error == ENOMEM) {
-   iommu_domain_unload_entry(entry, true);
+   iommu_domain_unload_entry(entry, true,
+   (flags & IOMMU_MF_CANWAIT) != 0);
return (error);
}
KASSERT(error == 0,
@@ -676,7 +677,8 @@ iommu_gas_map_region(struct iommu_domain *domain, struct 
iommu_map_entry *entry,
entry->end - entry->start, ma + OFF_TO_IDX(start - entry->start),
eflags, ((flags & IOMMU_MF_CANWAIT) != 0 ? IOMMU_PGF_WAITOK : 0));
if (error == ENOMEM) {
-   iommu_domain_unload_entry(entry, false);
+   iommu_domain_unload_entry(entry, false,
+   (flags & IOMMU_MF_CANWAIT) != 0);
return (error);
}
KASSERT(error == 0,
diff --git a/sys/dev/iommu/iommu_gas.h b/sys/dev/iommu/iommu_gas.h
index c32a098538b0..a9d0df5f272f 100644
--- a/sys/dev/iommu/iommu_gas.h
+++ b/sys/dev/iommu/iommu_gas.h
@@ -50,7 +50,6 @@
 #defineIOMMU_MAP_ENTRY_MAP 0x0004  /* Busdma created, linked by
   dmamap_link */
 #defineIOMMU_MAP_ENTRY_UNMAPPED0x0010  /* No backing pages */
-#defineIOMMU_MAP_ENTRY_QI_NF   0x0020  /* qi task, do not free entry */
 #defineIOMMU_MAP_ENTRY_READ0x1000  /* Read permitted */
 #defineIOMMU_MAP_ENTRY_WRITE   0x2000  /* Write permitted */
 #defineIOMMU_MAP_ENTRY_SNOOP   0x4000  /* Snoop */
diff --git a/sys/x86/iommu/intel_ctx.c b/sys/x86/iommu/intel_ctx.c
index bfc607674b57..5e13f020264b 100644
--- a/sys/x86/iommu/intel_ctx.c
+++ b/sys/x86/iommu/intel_ctx.c
@@ -868,25 +868,35 @@ dmar_domain_free_entry(struct iommu_map_entry *entry, 
bool free)
 }
 
 void
-iommu_domain_unload_entry(struct iommu_map_entry *entry, bool free)
+iommu_domain_unload_entry(struct iommu_map_entry *entry, bool free,
+bool cansleep)
 {
struct dmar_domain *domain;
struct dmar_unit *unit;
 
domain = IODOM2DOM(entry->domain);
unit = DOM2DMAR(domain);
+
+   /*
+* If "free" is false, then the IOTLB invalidation must be performed
+* synchronously.  Otherwise, the caller mi

git: c25156347083 - main - x86/iommu: Correct a recent change to iommu_domain_unload_entry()

2022-07-25 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=c251563470831c34cf53242936425a0d4d995edf

commit c251563470831c34cf53242936425a0d4d995edf
Author: Alan Cox 
AuthorDate: 2022-07-26 04:53:15 +
Commit: Alan Cox 
CommitDate: 2022-07-26 06:07:21 +

x86/iommu: Correct a recent change to iommu_domain_unload_entry()

Correct 8bc367384745.  When iommu_domain_unload_entry() performs a
synchronous IOTLB invalidation, it must call dmar_domain_free_entry()
to remove the entry from the domain's RB_TREE.

Push down the acquisition and release of the DMAR lock into the
recently introduced function dmar_qi_invalidate_sync_locked() and
remove the _locked suffix.

MFC with:   8bc367384745
---
 sys/x86/iommu/intel_ctx.c  | 7 ---
 sys/x86/iommu/intel_dmar.h | 4 ++--
 sys/x86/iommu/intel_qi.c   | 9 ++---
 3 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/sys/x86/iommu/intel_ctx.c b/sys/x86/iommu/intel_ctx.c
index 5e13f020264b..936cf8bb7632 100644
--- a/sys/x86/iommu/intel_ctx.c
+++ b/sys/x86/iommu/intel_ctx.c
@@ -883,17 +883,18 @@ iommu_domain_unload_entry(struct iommu_map_entry *entry, 
bool free,
 * dmar_qi_task() is finished processing it.
 */
if (unit->qi_enabled) {
-   DMAR_LOCK(unit);
if (free) {
+   DMAR_LOCK(unit);
dmar_qi_invalidate_locked(domain, entry->start,
entry->end - entry->start, &entry->gseq, true);
TAILQ_INSERT_TAIL(&unit->tlb_flush_entries, entry,
dmamap_link);
+   DMAR_UNLOCK(unit);
} else {
-   dmar_qi_invalidate_sync_locked(domain, entry->start,
+   dmar_qi_invalidate_sync(domain, entry->start,
entry->end - entry->start, cansleep);
+   dmar_domain_free_entry(entry, false);
}
-   DMAR_UNLOCK(unit);
} else {
domain_flush_iotlb_sync(domain, entry->start, entry->end -
entry->start);
diff --git a/sys/x86/iommu/intel_dmar.h b/sys/x86/iommu/intel_dmar.h
index 0f811d760bb7..06cecdf704ff 100644
--- a/sys/x86/iommu/intel_dmar.h
+++ b/sys/x86/iommu/intel_dmar.h
@@ -251,8 +251,8 @@ int dmar_init_qi(struct dmar_unit *unit);
 void dmar_fini_qi(struct dmar_unit *unit);
 void dmar_qi_invalidate_locked(struct dmar_domain *domain, iommu_gaddr_t start,
 iommu_gaddr_t size, struct iommu_qi_genseq *psec, bool emit_wait);
-void dmar_qi_invalidate_sync_locked(struct dmar_domain *domain,
-iommu_gaddr_t start, iommu_gaddr_t size, bool cansleep);
+void dmar_qi_invalidate_sync(struct dmar_domain *domain, iommu_gaddr_t start,
+iommu_gaddr_t size, bool cansleep);
 void dmar_qi_invalidate_ctx_glob_locked(struct dmar_unit *unit);
 void dmar_qi_invalidate_iotlb_glob_locked(struct dmar_unit *unit);
 void dmar_qi_invalidate_iec_glob(struct dmar_unit *unit);
diff --git a/sys/x86/iommu/intel_qi.c b/sys/x86/iommu/intel_qi.c
index 174cf9ea19a8..32f01a2787b0 100644
--- a/sys/x86/iommu/intel_qi.c
+++ b/sys/x86/iommu/intel_qi.c
@@ -243,14 +243,17 @@ dmar_qi_invalidate_locked(struct dmar_domain *domain, 
iommu_gaddr_t base,
 }
 
 void
-dmar_qi_invalidate_sync_locked(struct dmar_domain *domain, iommu_gaddr_t base,
+dmar_qi_invalidate_sync(struct dmar_domain *domain, iommu_gaddr_t base,
 iommu_gaddr_t size, bool cansleep)
 {
+   struct dmar_unit *unit;
struct iommu_qi_genseq gseq;
 
-   DMAR_ASSERT_LOCKED(domain->dmar);
+   unit = domain->dmar;
+   DMAR_LOCK(unit);
dmar_qi_invalidate_locked(domain, base, size, &gseq, true);
-   dmar_qi_wait_for_seq(domain->dmar, &gseq, !cansleep);
+   dmar_qi_wait_for_seq(unit, &gseq, !cansleep);
+   DMAR_UNLOCK(unit);
 }
 
 void



git: 42736dc44dd0 - main - x86/iommu: Reduce DMAR lock contention

2022-07-28 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=42736dc44dd0151546db3f2e145ae1cfd4546fe1

commit 42736dc44dd0151546db3f2e145ae1cfd4546fe1
Author: Alan Cox 
AuthorDate: 2022-07-26 06:04:54 +
Commit: Alan Cox 
CommitDate: 2022-07-29 05:11:33 +

x86/iommu: Reduce DMAR lock contention

Replace the DMAR unit's tlb_flush TAILQ by a custom list implementation
that enables dmar_qi_task() to dequeue entries without holding the DMAR
lock.

Reviewed by:kib
MFC after:  1 week
Differential Revision:  https://reviews.freebsd.org/D35951
---
 sys/dev/iommu/iommu.h  |   5 +-
 sys/dev/iommu/iommu_gas.c  |   5 +-
 sys/x86/iommu/intel_ctx.c  |  16 +++
 sys/x86/iommu/intel_dmar.h |  33 +++--
 sys/x86/iommu/intel_qi.c   | 113 ++---
 5 files changed, 140 insertions(+), 32 deletions(-)

diff --git a/sys/dev/iommu/iommu.h b/sys/dev/iommu/iommu.h
index 65fefe3ada7b..fefd0f615be5 100644
--- a/sys/dev/iommu/iommu.h
+++ b/sys/dev/iommu/iommu.h
@@ -56,7 +56,10 @@ struct iommu_map_entry {
iommu_gaddr_t free_down;/* Max free space below the
   current R/B tree node */
u_int flags;
-   TAILQ_ENTRY(iommu_map_entry) dmamap_link; /* Link for dmamap entries */
+   union {
+   TAILQ_ENTRY(iommu_map_entry) dmamap_link; /* DMA map entries */
+   struct iommu_map_entry *tlb_flush_next;
+   };
RB_ENTRY(iommu_map_entry) rb_entry;  /* Links for domain entries */
struct iommu_domain *domain;
struct iommu_qi_genseq gseq;
diff --git a/sys/dev/iommu/iommu_gas.c b/sys/dev/iommu/iommu_gas.c
index ec456e2ec48b..bac15edcf849 100644
--- a/sys/dev/iommu/iommu_gas.c
+++ b/sys/dev/iommu/iommu_gas.c
@@ -99,7 +99,7 @@ iommu_gas_alloc_entry(struct iommu_domain *domain, u_int 
flags)
 
res = uma_zalloc(iommu_map_entry_zone, ((flags & IOMMU_PGF_WAITOK) !=
0 ? M_WAITOK : M_NOWAIT) | M_ZERO);
-   if (res != NULL) {
+   if (res != NULL && domain != NULL) {
res->domain = domain;
atomic_add_int(&domain->entries_cnt, 1);
}
@@ -113,7 +113,8 @@ iommu_gas_free_entry(struct iommu_domain *domain, struct 
iommu_map_entry *entry)
KASSERT(domain == entry->domain,
("mismatched free domain %p entry %p entry->domain %p", domain,
entry, entry->domain));
-   atomic_subtract_int(&domain->entries_cnt, 1);
+   if (domain != NULL)
+   atomic_subtract_int(&domain->entries_cnt, 1);
uma_zfree(iommu_map_entry_zone, entry);
 }
 
diff --git a/sys/x86/iommu/intel_ctx.c b/sys/x86/iommu/intel_ctx.c
index 936cf8bb7632..3bd425aeecbd 100644
--- a/sys/x86/iommu/intel_ctx.c
+++ b/sys/x86/iommu/intel_ctx.c
@@ -867,6 +867,10 @@ dmar_domain_free_entry(struct iommu_map_entry *entry, bool 
free)
entry->flags = 0;
 }
 
+/*
+ * If the given value for "free" is true, then the caller must not be using
+ * the entry's dmamap_link field.
+ */
 void
 iommu_domain_unload_entry(struct iommu_map_entry *entry, bool free,
 bool cansleep)
@@ -885,10 +889,7 @@ iommu_domain_unload_entry(struct iommu_map_entry *entry, 
bool free,
if (unit->qi_enabled) {
if (free) {
DMAR_LOCK(unit);
-   dmar_qi_invalidate_locked(domain, entry->start,
-   entry->end - entry->start, &entry->gseq, true);
-   TAILQ_INSERT_TAIL(&unit->tlb_flush_entries, entry,
-   dmamap_link);
+   dmar_qi_invalidate_locked(domain, entry, true);
DMAR_UNLOCK(unit);
} else {
dmar_qi_invalidate_sync(domain, entry->start,
@@ -942,12 +943,11 @@ iommu_domain_unload(struct iommu_domain *iodom,
 
KASSERT(unit->qi_enabled, ("loaded entry left"));
DMAR_LOCK(unit);
-   TAILQ_FOREACH(entry, entries, dmamap_link) {
-   dmar_qi_invalidate_locked(domain, entry->start, entry->end -
-   entry->start, &entry->gseq,
+   while ((entry = TAILQ_FIRST(entries)) != NULL) {
+   TAILQ_REMOVE(entries, entry, dmamap_link);
+   dmar_qi_invalidate_locked(domain, entry,
dmar_domain_unload_emit_wait(domain, entry));
}
-   TAILQ_CONCAT(&unit->tlb_flush_entries, entries, dmamap_link);
DMAR_UNLOCK(unit);
 }
 
diff --git a/sys/x86/iommu/intel_dmar.h b/sys/x86/iommu/intel_dmar.h
index 06cecdf704ff..1234ee058ffd 100644
--- a/sys/x86/iommu/intel_dmar.h
+++ b/sys/x86/iommu/intel_dmar.h
@@ -177,8 +177,33 @@ struct dmar_unit {
u_int irte_cnt;
vmem_t *irtids;
 
-   /* Delayed freeing of

git: 4670f90846d4 - main - iommu_gas: Eliminate redundant parameters and push down lock acquisition

2022-07-30 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=4670f90846d49027bf23435a30895a74264f1e79

commit 4670f90846d49027bf23435a30895a74264f1e79
Author: Alan Cox 
AuthorDate: 2022-07-29 06:14:46 +
Commit: Alan Cox 
CommitDate: 2022-07-30 19:28:48 +

iommu_gas: Eliminate redundant parameters and push down lock acquisition

Since IOMMU map entries store a reference to the domain in which they
reside, there is no need to pass the domain to iommu_gas_free_entry(),
iommu_gas_free_space(), and iommu_gas_free_region().

Push down the acquisition and release of the IOMMU domain lock into
iommu_gas_free_space() and iommu_gas_free_region().

Both of these changes allow for simplifications in the callers of the
functions without really complicating the functions themselves.
Moreover, the latter change eliminates the direct use of the IOMMU
domain lock from the x86-specific DMAR code.

Reviewed by:kib
MFC after:  1 week
Differential Revision:  https://reviews.freebsd.org/D35995
---
 sys/arm64/iommu/iommu.c  | 10 ++
 sys/dev/iommu/busdma_iommu.c |  4 ++--
 sys/dev/iommu/iommu.h|  9 +++--
 sys/dev/iommu/iommu_gas.c| 44 +---
 sys/x86/iommu/intel_ctx.c| 13 -
 sys/x86/iommu/intel_qi.c | 10 +++---
 6 files changed, 39 insertions(+), 51 deletions(-)

diff --git a/sys/arm64/iommu/iommu.c b/sys/arm64/iommu/iommu.c
index 0080ab4ff316..d24cad94e966 100644
--- a/sys/arm64/iommu/iommu.c
+++ b/sys/arm64/iommu/iommu.c
@@ -410,16 +410,10 @@ iommu_free_ctx(struct iommu_ctx *ioctx)
 static void
 iommu_domain_free_entry(struct iommu_map_entry *entry, bool free)
 {
-   struct iommu_domain *iodom;
-
-   iodom = entry->domain;
-
-   IOMMU_DOMAIN_LOCK(iodom);
-   iommu_gas_free_space(iodom, entry);
-   IOMMU_DOMAIN_UNLOCK(iodom);
+   iommu_gas_free_space(entry);
 
if (free)
-   iommu_gas_free_entry(iodom, entry);
+   iommu_gas_free_entry(entry);
else
entry->flags = 0;
 }
diff --git a/sys/dev/iommu/busdma_iommu.c b/sys/dev/iommu/busdma_iommu.c
index 67e82fe43e58..8f63d8b47f19 100644
--- a/sys/dev/iommu/busdma_iommu.c
+++ b/sys/dev/iommu/busdma_iommu.c
@@ -1040,7 +1040,7 @@ bus_dma_iommu_load_ident(bus_dma_tag_t dmat, bus_dmamap_t 
map1,
ma = malloc(sizeof(vm_page_t) * atop(length), M_TEMP, waitok ?
M_WAITOK : M_NOWAIT);
if (ma == NULL) {
-   iommu_gas_free_entry(domain, entry);
+   iommu_gas_free_entry(entry);
return (ENOMEM);
}
for (i = 0; i < atop(length); i++) {
@@ -1055,7 +1055,7 @@ bus_dma_iommu_load_ident(bus_dma_tag_t dmat, bus_dmamap_t 
map1,
TAILQ_INSERT_TAIL(&map->map_entries, entry, dmamap_link);
IOMMU_DMAMAP_UNLOCK(map);
} else {
-   iommu_gas_free_entry(domain, entry);
+   iommu_gas_free_entry(entry);
}
for (i = 0; i < atop(length); i++)
vm_page_putfake(ma[i]);
diff --git a/sys/dev/iommu/iommu.h b/sys/dev/iommu/iommu.h
index fefd0f615be5..ae4022c5c4f7 100644
--- a/sys/dev/iommu/iommu.h
+++ b/sys/dev/iommu/iommu.h
@@ -169,15 +169,12 @@ void iommu_gas_init_domain(struct iommu_domain *domain);
 void iommu_gas_fini_domain(struct iommu_domain *domain);
 struct iommu_map_entry *iommu_gas_alloc_entry(struct iommu_domain *domain,
 u_int flags);
-void iommu_gas_free_entry(struct iommu_domain *domain,
-struct iommu_map_entry *entry);
-void iommu_gas_free_space(struct iommu_domain *domain,
-struct iommu_map_entry *entry);
+void iommu_gas_free_entry(struct iommu_map_entry *entry);
+void iommu_gas_free_space(struct iommu_map_entry *entry);
 int iommu_gas_map(struct iommu_domain *domain,
 const struct bus_dma_tag_common *common, iommu_gaddr_t size, int offset,
 u_int eflags, u_int flags, vm_page_t *ma, struct iommu_map_entry **res);
-void iommu_gas_free_region(struct iommu_domain *domain,
-struct iommu_map_entry *entry);
+void iommu_gas_free_region(struct iommu_map_entry *entry);
 int iommu_gas_map_region(struct iommu_domain *domain,
 struct iommu_map_entry *entry, u_int eflags, u_int flags, vm_page_t *ma);
 int iommu_gas_reserve_region(struct iommu_domain *domain, iommu_gaddr_t start,
diff --git a/sys/dev/iommu/iommu_gas.c b/sys/dev/iommu/iommu_gas.c
index bac15edcf849..bad56ab9140e 100644
--- a/sys/dev/iommu/iommu_gas.c
+++ b/sys/dev/iommu/iommu_gas.c
@@ -107,12 +107,11 @@ iommu_gas_alloc_entry(struct iommu_domain *domain, u_int 
flags)
 }
 
 void
-iommu_gas_free_entry(struct iommu_domain *domain, struct iommu_map_entry 
*entry)
+iommu_gas_free_entry(struct iommu_map_entry *entry)
 {
+   struct iommu_domain *domain;
 
-   KASSERT(domain == entry->domain,
-   ("mismatched free domain %p entry %

git: 7f46deccbed7 - main - x86/iommu: Reduce the number of queued invalidation interrupts

2022-08-06 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=7f46deccbed74436b62f8fd02655ff4ad89f1023

commit 7f46deccbed74436b62f8fd02655ff4ad89f1023
Author: Alan Cox 
AuthorDate: 2022-07-31 19:28:30 +
Commit: Alan Cox 
CommitDate: 2022-08-06 18:05:58 +

x86/iommu: Reduce the number of queued invalidation interrupts

Restructure dmar_qi_task() so as to reduce the number of invalidation
completion interrupts.  Specifically, because processing completed
invalidations in dmar_qi_task() can take quite some time, don't reenable
completion interrupts until processing has completed a first time. Then,
check a second time after reenabling completion interrupts, so that
any invalidations that complete just before interrupts are reenabled
do not linger until a future invalidation might raise an interrupt.
(Recent changes have made checking for completed invalidations cheap; no
locking is required.)

Reviewed by:kib
MFC after:  1 week
Differential Revision:  https://reviews.freebsd.org/D36054
---
 sys/x86/iommu/intel_qi.c | 45 +
 1 file changed, 29 insertions(+), 16 deletions(-)

diff --git a/sys/x86/iommu/intel_qi.c b/sys/x86/iommu/intel_qi.c
index baaf5b472a2c..8a8e656083e3 100644
--- a/sys/x86/iommu/intel_qi.c
+++ b/sys/x86/iommu/intel_qi.c
@@ -411,14 +411,34 @@ dmar_qi_intr(void *arg)
return (FILTER_HANDLED);
 }
 
+static void
+dmar_qi_drain_tlb_flush(struct dmar_unit *unit)
+{
+   struct iommu_map_entry *entry, *head;
+
+   for (head = unit->tlb_flush_head;; head = entry) {
+   entry = (struct iommu_map_entry *)
+   atomic_load_acq_ptr((uintptr_t *)&head->tlb_flush_next);
+   if (entry == NULL ||
+   !dmar_qi_seq_processed(unit, &entry->gseq))
+   break;
+   unit->tlb_flush_head = entry;
+   iommu_gas_free_entry(head);
+   if ((entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0)
+   iommu_gas_free_region(entry);
+   else
+   iommu_gas_free_space(entry);
+   }
+}
+
 static void
 dmar_qi_task(void *arg, int pending __unused)
 {
struct dmar_unit *unit;
-   struct iommu_map_entry *entry, *head;
uint32_t ics;
 
unit = arg;
+   dmar_qi_drain_tlb_flush(unit);
 
/*
 * Request an interrupt on the completion of the next invalidation
@@ -428,23 +448,16 @@ dmar_qi_task(void *arg, int pending __unused)
if ((ics & DMAR_ICS_IWC) != 0) {
ics = DMAR_ICS_IWC;
dmar_write4(unit, DMAR_ICS_REG, ics);
-   }
 
-   for (;;) {
-   head = unit->tlb_flush_head;
-   entry = (struct iommu_map_entry *)
-   atomic_load_acq_ptr((uintptr_t *)&head->tlb_flush_next);
-   if (entry == NULL)
-   break;
-   if (!dmar_qi_seq_processed(unit, &entry->gseq))
-   break;
-   unit->tlb_flush_head = entry;
-   iommu_gas_free_entry(head);
-   if ((entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0)
-   iommu_gas_free_region(entry);
-   else
-   iommu_gas_free_space(entry);
+   /*
+* Drain a second time in case the DMAR processes an entry
+* after the first call and before clearing DMAR_ICS_IWC.
+* Otherwise, such entries will linger until a later entry
+* that requests an interrupt is processed.
+*/
+   dmar_qi_drain_tlb_flush(unit);
}
+
if (unit->inv_seq_waiters > 0) {
/*
 * Acquire the DMAR lock so that wakeup() is called only after



git: 34eeabff5a86 - main - amd64/arm64 pmap: Stop requiring the accessed bit for superpage promotion

2023-06-12 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=34eeabff5a8636155bb02985c5928c1844fd3178

commit 34eeabff5a8636155bb02985c5928c1844fd3178
Author: Alan Cox 
AuthorDate: 2023-05-31 23:10:41 +
Commit: Alan Cox 
CommitDate: 2023-06-12 18:40:57 +

amd64/arm64 pmap: Stop requiring the accessed bit for superpage promotion

Stop requiring all of the PTEs to have the accessed bit set for superpage
promotion to occur.  Given that change, add support for promotion to
pmap_enter_quick(), which does not set the accessed bit in the PTE that
it creates.

Since the final mapping within a superpage-aligned and sized region of a
memory-mapped file is typically created by a call to pmap_enter_quick(),
we now achieve promotions in circumstances where they did not occur
before, for example, the X server's read-only mapping of libLLVM-15.so.

See also https://www.usenix.org/system/files/atc20-zhu-weixi_0.pdf

Reviewed by:kib, markj
MFC after:  2 weeks
Differential Revision:  https://reviews.freebsd.org/D40478
---
 sys/amd64/amd64/pmap.c   | 154 +++
 sys/amd64/include/pmap.h |   2 +-
 sys/arm64/arm64/pmap.c   | 122 +++--
 3 files changed, 205 insertions(+), 73 deletions(-)

diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index 123811ed573f..3cb02a4f9daa 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -313,6 +313,33 @@ pmap_pku_mask_bit(pmap_t pmap)
return (pmap->pm_type == PT_X86 ? X86_PG_PKU_MASK : 0);
 }
 
+static __inline boolean_t
+safe_to_clear_referenced(pmap_t pmap, pt_entry_t pte)
+{
+
+   if (!pmap_emulate_ad_bits(pmap))
+   return (TRUE);
+
+   KASSERT(pmap->pm_type == PT_EPT, ("invalid pm_type %d", pmap->pm_type));
+
+   /*
+* XWR = 010 or 110 will cause an unconditional EPT misconfiguration
+* so we don't let the referenced (aka EPT_PG_READ) bit to be cleared
+* if the EPT_PG_WRITE bit is set.
+*/
+   if ((pte & EPT_PG_WRITE) != 0)
+   return (FALSE);
+
+   /*
+* XWR = 100 is allowed only if the PMAP_SUPPORTS_EXEC_ONLY is set.
+*/
+   if ((pte & EPT_PG_EXECUTE) == 0 ||
+   ((pmap->pm_flags & PMAP_SUPPORTS_EXEC_ONLY) != 0))
+   return (TRUE);
+   else
+   return (FALSE);
+}
+
 #if !defined(DIAGNOSTIC)
 #ifdef __GNUC_GNU_INLINE__
 #define PMAP_INLINE__attribute__((__gnu_inline__)) inline
@@ -1279,7 +1306,8 @@ static intpmap_enter_pde(pmap_t pmap, vm_offset_t 
va, pd_entry_t newpde,
 static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
 vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp);
 static void pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte);
-static int pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte, bool promoted);
+static int pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte, bool promoted,
+bool allpte_PG_A_set);
 static void pmap_invalidate_cache_range_selfsnoop(vm_offset_t sva,
 vm_offset_t eva);
 static void pmap_invalidate_cache_range_all(vm_offset_t sva,
@@ -2491,7 +2519,7 @@ pmap_init(void)
 */
if ((i == 0 ||
kernphys + ((vm_paddr_t)(i - 1) << PDRSHIFT) < KERNend) &&
-   pmap_insert_pt_page(kernel_pmap, mpte, false))
+   pmap_insert_pt_page(kernel_pmap, mpte, false, false))
panic("pmap_init: pmap_insert_pt_page failed");
}
PMAP_UNLOCK(kernel_pmap);
@@ -4061,14 +4089,26 @@ pmap_add_delayed_free_list(vm_page_t m, struct spglist 
*free,
  * for mapping a distinct range of virtual addresses.  The pmap's collection is
  * ordered by this virtual address range.
  *
- * If "promoted" is false, then the page table page "mpte" must be zero filled.
+ * If "promoted" is false, then the page table page "mpte" must be zero filled;
+ * "mpte"'s valid field will be set to 0.
+ *
+ * If "promoted" is true and "allpte_PG_A_set" is false, then "mpte" must
+ * contain valid mappings with identical attributes except for PG_A; "mpte"'s
+ * valid field will be set to 1.
+ *
+ * If "promoted" and "allpte_PG_A_set" are both true, then "mpte" must contain
+ * valid mappings with identical attributes including PG_A; "mpte"'s valid
+ * field will be set to VM_PAGE_BITS_ALL.
  */
 static __inline int
-pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte, bool promoted)
+pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte, bool promoted,
+bool allpte_PG_A_set)
 {
 
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
-   mpte->valid = promoted ? VM_PAG

git: 58d427172157 - main - vm_phys: Fix typo in 9e8174289236

2023-06-16 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=58d427172157dedf82e46014e7d19cf973186dd9

commit 58d427172157dedf82e46014e7d19cf973186dd9
Author: Alan Cox 
AuthorDate: 2023-06-16 08:12:42 +
Commit: Alan Cox 
CommitDate: 2023-06-16 08:12:42 +

vm_phys: Fix typo in 9e8174289236
---
 sys/vm/vm_phys.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sys/vm/vm_phys.c b/sys/vm/vm_phys.c
index a0b53f0f7c4b..28f12231e01c 100644
--- a/sys/vm/vm_phys.c
+++ b/sys/vm/vm_phys.c
@@ -1246,7 +1246,7 @@ vm_phys_find_range(vm_page_t bounds[], int segind, int 
domain,
struct vm_phys_seg *end_seg, *seg;
 
KASSERT(npages > 0, ("npages is zero"));
-   KASSERT(domain >= 0 && domain < vm_ndomain, ("domain out of range"));
+   KASSERT(domain >= 0 && domain < vm_ndomains, ("domain out of range"));
end_seg = &vm_phys_segs[vm_phys_nsegs];
for (seg = &vm_phys_segs[segind]; seg < end_seg; seg++) {
if (seg->domain != domain)



git: 0d2f98c2f092 - main - amd64 pmap: Tidy up pmap_promote_pde() calls

2023-06-24 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=0d2f98c2f0928a8ee8446c3e5e0e4fb93f1dd9c5

commit 0d2f98c2f0928a8ee8446c3e5e0e4fb93f1dd9c5
Author: Alan Cox 
AuthorDate: 2023-06-17 17:18:33 +
Commit: Alan Cox 
CommitDate: 2023-06-24 18:09:04 +

amd64 pmap: Tidy up pmap_promote_pde() calls

Since pmap_ps_enabled() is true by default, check it inside of
pmap_promote_pde() instead of at every call site.

Modify pmap_promote_pde() to return true if the promotion succeeded and
false otherwise.  Use this return value in a couple places.

Reviewed by:kib, markj
Differential Revision:  https://reviews.freebsd.org/D40744
---
 sys/amd64/amd64/pmap.c | 36 ++--
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index 3cb02a4f9daa..3215a7f8d559 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -436,7 +436,7 @@ pt_entry_t pg_nx;
 static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
 "VM/pmap parameters");
 
-static int pg_ps_enabled = 1;
+static int __read_frequently pg_ps_enabled = 1;
 SYSCTL_INT(_vm_pmap, OID_AUTO, pg_ps_enabled, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
 &pg_ps_enabled, 0, "Are large page mappings enabled?");
 
@@ -1318,7 +1318,7 @@ static void pmap_kenter_attr(vm_offset_t va, vm_paddr_t 
pa, int mode);
 static vm_page_t pmap_large_map_getptp_unlocked(void);
 static vm_paddr_t pmap_large_map_kextract(vm_offset_t va);
 #if VM_NRESERVLEVEL > 0
-static void pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va,
+static bool pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va,
 vm_page_t mpte, struct rwlock **lockp);
 #endif
 static boolean_t pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t 
sva,
@@ -6856,7 +6856,7 @@ pmap_pde_ept_executable(pmap_t pmap, pd_entry_t pde)
  * aligned, contiguous physical memory and (2) the 4KB page mappings must have
  * identical characteristics. 
  */
-static void
+static bool
 pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, vm_page_t mpte,
 struct rwlock **lockp)
 {
@@ -6865,6 +6865,10 @@ pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, 
vm_offset_t va, vm_page_t mpte,
pt_entry_t allpte_PG_A, PG_A, PG_G, PG_M, PG_PKU_MASK, PG_RW, PG_V;
int PG_PTE_CACHE;
 
+   PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+   if (!pmap_ps_enabled(pmap))
+   return (false);
+
PG_A = pmap_accessed_bit(pmap);
PG_G = pmap_global_bit(pmap);
PG_M = pmap_modified_bit(pmap);
@@ -6873,8 +6877,6 @@ pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, 
vm_offset_t va, vm_page_t mpte,
PG_PKU_MASK = pmap_pku_mask_bit(pmap);
PG_PTE_CACHE = pmap_cache_mask(pmap, 0);
 
-   PMAP_LOCK_ASSERT(pmap, MA_OWNED);
-
/*
 * Examine the first PTE in the specified PTP.  Abort if this PTE is
 * ineligible for promotion due to hardware errata, invalid, or does
@@ -6883,12 +6885,12 @@ pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, 
vm_offset_t va, vm_page_t mpte,
firstpte = (pt_entry_t *)PHYS_TO_DMAP(*pde & PG_FRAME);
newpde = *firstpte;
if (!pmap_allow_2m_x_page(pmap, pmap_pde_ept_executable(pmap, newpde)))
-   return;
+   return (false);
if ((newpde & ((PG_FRAME & PDRMASK) | PG_V)) != PG_V) {
counter_u64_add(pmap_pde_p_failures, 1);
CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#lx"
" in pmap %p", va, pmap);
-   return;
+   return (false);
}
 
/*
@@ -6933,7 +6935,7 @@ setpde:
counter_u64_add(pmap_pde_p_failures, 1);
CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#lx"
" in pmap %p", va, pmap);
-   return;
+   return (false);
}
 setpte:
if ((oldpte & (PG_M | PG_RW)) == PG_RW) {
@@ -6952,7 +6954,7 @@ setpte:
counter_u64_add(pmap_pde_p_failures, 1);
CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#lx"
" in pmap %p", va, pmap);
-   return;
+   return (false);
}
allpte_PG_A &= oldpte;
pa -= PAGE_SIZE;
@@ -6993,7 +6995,7 @@ setpte:
CTR2(KTR_PMAP,
"pmap_promote_pde: failure for va %#lx in pmap %p", va,
pmap);
-   return;
+   return (false);
}
 
/*
@@ -7018,6 +7020,7 @@ setpte:
counter_u64_add(pmap_pde_promotions, 1);
CTR2(KTR_PMAP, "pmap_promote_pde: success for va %#l

git: d8e6f4946cec - main - vm: Fix anonymous memory clustering under ASLR

2023-06-26 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=d8e6f4946cec0b84a6997d62e791b8cf993741b2

commit d8e6f4946cec0b84a6997d62e791b8cf993741b2
Author: Alan Cox 
AuthorDate: 2023-06-23 17:00:32 +
Commit: Alan Cox 
CommitDate: 2023-06-27 04:42:48 +

vm: Fix anonymous memory clustering under ASLR

By default, our ASLR implementation is supposed to cluster anonymous
memory allocations, unless the application's mmap(..., MAP_ANON, ...)
call included a non-zero address hint.  Unfortunately, clustering
never occurred because kern_mmap() always replaced the given address
hint when it was zero.  So, the ASLR implementation always believed
that a non-zero hint had been provided and randomized the mapping's
location in the address space.  To fix this problem, I'm pushing down
the point at which we convert a hint of zero to the minimum allocatable
address from kern_mmap() to vm_map_find_min().

Reviewed by:kib
MFC after:  2 weeks
Differential Revision:  https://reviews.freebsd.org/D40743
---
 sys/vm/vm_map.c  | 10 +++---
 sys/vm/vm_map.h  |  1 +
 sys/vm/vm_mmap.c |  8 +---
 3 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c
index f5863a9b9939..a02107b5e64d 100644
--- a/sys/vm/vm_map.c
+++ b/sys/vm/vm_map.c
@@ -1981,14 +1981,14 @@ SYSCTL_INT(_vm, OID_AUTO, cluster_anon, CTLFLAG_RW,
 "Cluster anonymous mappings: 0 = no, 1 = yes if no hint, 2 = always");
 
 static bool
-clustering_anon_allowed(vm_offset_t addr)
+clustering_anon_allowed(vm_offset_t addr, int cow)
 {
 
switch (cluster_anon) {
case 0:
return (false);
case 1:
-   return (addr == 0);
+   return (addr == 0 || (cow & MAP_NO_HINT) != 0);
case 2:
default:
return (true);
@@ -2111,7 +2111,7 @@ vm_map_find(vm_map_t map, vm_object_t object, 
vm_ooffset_t offset,
} else
alignment = 0;
en_aslr = (map->flags & MAP_ASLR) != 0;
-   update_anon = cluster = clustering_anon_allowed(*addr) &&
+   update_anon = cluster = clustering_anon_allowed(*addr, cow) &&
(map->flags & MAP_IS_SUB_MAP) == 0 && max_addr == 0 &&
find_space != VMFS_NO_SPACE && object == NULL &&
(cow & (MAP_INHERIT_SHARE | MAP_STACK_GROWS_UP |
@@ -2255,6 +2255,10 @@ vm_map_find_min(vm_map_t map, vm_object_t object, 
vm_ooffset_t offset,
int rv;
 
hint = *addr;
+   if (hint == 0)
+   cow |= MAP_NO_HINT;
+   if (hint < min_addr)
+   *addr = hint = min_addr;
for (;;) {
rv = vm_map_find(map, object, offset, addr, length, max_addr,
find_space, prot, max, cow);
diff --git a/sys/vm/vm_map.h b/sys/vm/vm_map.h
index 2ac54a39a57b..fd8b606e8ddc 100644
--- a/sys/vm/vm_map.h
+++ b/sys/vm/vm_map.h
@@ -383,6 +383,7 @@ long vmspace_resident_count(struct vmspace *vmspace);
 #defineMAP_CREATE_STACK_GAP_DN 0x0002
 #defineMAP_VN_EXEC 0x0004
 #defineMAP_SPLIT_BOUNDARY_MASK 0x0018
+#defineMAP_NO_HINT 0x0020
 
 #defineMAP_SPLIT_BOUNDARY_SHIFT 19
 
diff --git a/sys/vm/vm_mmap.c b/sys/vm/vm_mmap.c
index 56345fcaf560..408e077476dd 100644
--- a/sys/vm/vm_mmap.c
+++ b/sys/vm/vm_mmap.c
@@ -353,10 +353,12 @@ kern_mmap(struct thread *td, const struct mmap_req *mrp)
 * the hint would fall in the potential heap space,
 * place it after the end of the largest possible heap.
 *
-* There should really be a pmap call to determine a reasonable
-* location.
+* For anonymous mappings within the address space of the
+* calling process, the absence of a hint is handled at a
+* lower level in order to implement different clustering
+* strategies for ASLR.
 */
-   if (addr == 0 ||
+   if (((flags & MAP_ANON) == 0 && addr == 0) ||
(addr >= round_page((vm_offset_t)vms->vm_taddr) &&
addr < round_page((vm_offset_t)vms->vm_daddr +
lim_max(td, RLIMIT_DATA



git: 3767de839742 - main - arm64 pmap: Tidy up pmap_promote_l2() calls

2023-06-28 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=3767de83974206e4267dabf7fbe66b151c1a0b14

commit 3767de83974206e4267dabf7fbe66b151c1a0b14
Author: Alan Cox 
AuthorDate: 2023-06-28 07:08:02 +
Commit: Alan Cox 
CommitDate: 2023-06-28 17:46:15 +

arm64 pmap: Tidy up pmap_promote_l2() calls

Since pmap_ps_enabled() is true by default, check it inside of
pmap_promote_l2() instead of at every call site.

Modify pmap_promote_l2() to return true if the promotion succeeded and
false otherwise.

(A similar change was applied to the amd64 pmap in 0d2f98c2f092.)

Reviewed by:kib, markj
Differential Revision:  https://reviews.freebsd.org/D40781
---
 sys/arm64/arm64/pmap.c | 41 ++---
 1 file changed, 22 insertions(+), 19 deletions(-)

diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index 3166b3d7959b..46520889728f 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -435,7 +435,7 @@ void (*pmap_stage2_invalidate_all)(uint64_t);
 #defineTLBI_VA(addr)   (((addr) >> TLBI_VA_SHIFT) & 
TLBI_VA_MASK)
 #defineTLBI_VA_L3_INCR (L3_SIZE >> TLBI_VA_SHIFT)
 
-static int superpages_enabled = 1;
+static int __read_frequently superpages_enabled = 1;
 SYSCTL_INT(_vm_pmap, OID_AUTO, superpages_enabled,
 CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &superpages_enabled, 0,
 "Are large page mappings enabled?");
@@ -4141,14 +4141,21 @@ pmap_pv_promote_l2(pmap_t pmap, vm_offset_t va, 
vm_paddr_t pa,
  * aligned, contiguous physical memory and (2) the 4KB page mappings must have
  * identical characteristics.
  */
-static void
+static bool
 pmap_promote_l2(pmap_t pmap, pd_entry_t *l2, vm_offset_t va, vm_page_t mpte,
 struct rwlock **lockp)
 {
pt_entry_t all_l3e_AF, *firstl3, *l3, newl2, oldl3, pa;
 
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
-   PMAP_ASSERT_STAGE1(pmap);
+
+   /*
+* Currently, this function only supports promotion on stage 1 pmaps
+* because it tests stage 1 specific fields and performs a break-
+* before-make sequence that is incorrect for stage 2 pmaps.
+*/
+   if (pmap->pm_stage != PM_STAGE1 || !pmap_ps_enabled(pmap))
+   return (false);
 
/*
 * Examine the first L3E in the specified PTP.  Abort if this L3E is
@@ -4157,14 +4164,14 @@ pmap_promote_l2(pmap_t pmap, pd_entry_t *l2, 
vm_offset_t va, vm_page_t mpte,
firstl3 = (pt_entry_t *)PHYS_TO_DMAP(PTE_TO_PHYS(pmap_load(l2)));
newl2 = pmap_load(firstl3);
if ((newl2 & ATTR_SW_NO_PROMOTE) != 0)
-   return;
+   return (false);
/* ... is not the first physical page within an L2 block */
if ((PTE_TO_PHYS(newl2) & L2_OFFSET) != 0 ||
((newl2 & ATTR_DESCR_MASK) != L3_PAGE)) { /* ... or is invalid */
atomic_add_long(&pmap_l2_p_failures, 1);
CTR2(KTR_PMAP, "pmap_promote_l2: failure for va %#lx"
" in pmap %p", va, pmap);
-   return;
+   return (false);
}
 
/*
@@ -4212,7 +4219,7 @@ setl2:
atomic_add_long(&pmap_l2_p_failures, 1);
CTR2(KTR_PMAP, "pmap_promote_l2: failure for va %#lx"
" in pmap %p", va, pmap);
-   return;
+   return (false);
}
 setl3:
if ((oldl3 & (ATTR_S1_AP_RW_BIT | ATTR_SW_DBM)) ==
@@ -4232,7 +4239,7 @@ setl3:
atomic_add_long(&pmap_l2_p_failures, 1);
CTR2(KTR_PMAP, "pmap_promote_l2: failure for va %#lx"
" in pmap %p", va, pmap);
-   return;
+   return (false);
}
all_l3e_AF &= oldl3;
pa -= PAGE_SIZE;
@@ -4263,7 +4270,7 @@ setl3:
CTR2(KTR_PMAP,
"pmap_promote_l2: failure for va %#lx in pmap %p", va,
pmap);
-   return;
+   return (false);
}
 
if ((newl2 & ATTR_SW_MANAGED) != 0)
@@ -4277,6 +4284,7 @@ setl3:
atomic_add_long(&pmap_l2_promotions, 1);
CTR2(KTR_PMAP, "pmap_promote_l2: success for va %#lx in pmap %p", va,
pmap);
+   return (true);
 }
 #endif /* VM_NRESERVLEVEL > 0 */
 
@@ -4681,17 +4689,13 @@ validate:
 
 #if VM_NRESERVLEVEL > 0
/*
-* Try to promote from level 3 pages to a level 2 superpage. This
-* currently only works on stage 1 pmaps as pmap_promote_l2 looks at
-* stage 1 specific fields and performs a break-before-make sequence
-* that is incorrect a stage 2 pmap.
+

git: e59d202312f9 - main - arm64: make VM_NFREEORDER and the comment describing it match

2023-06-29 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=e59d202312f9868583c6603030ded2476085920d

commit e59d202312f9868583c6603030ded2476085920d
Author: Alan Cox 
AuthorDate: 2023-06-28 08:23:09 +
Commit: Alan Cox 
CommitDate: 2023-06-29 17:48:48 +

arm64: make VM_NFREEORDER and the comment describing it match

The setting of VM_NFREEORDER and the comment describing it were copied
from sparc64 where both the page size and the number of page table
entries that fit in a cache line are different from arm64.

Reviewed by:andrew, kib, markj
MFC after:  2 weeks
Differential Revision:  https://reviews.freebsd.org/D40782
---
 sys/arm64/include/vmparam.h | 15 ---
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/sys/arm64/include/vmparam.h b/sys/arm64/include/vmparam.h
index b28a79256453..23b7d0d87c94 100644
--- a/sys/arm64/include/vmparam.h
+++ b/sys/arm64/include/vmparam.h
@@ -89,14 +89,15 @@
 #defineVM_FREELIST_DEFAULT 0
 
 /*
- * An allocation size of 16MB is supported in order to optimize the
- * use of the direct map by UMA.  Specifically, a cache line contains
- * at most four TTEs, collectively mapping 16MB of physical memory.
- * By reducing the number of distinct 16MB "pages" that are used by UMA,
- * the physical memory allocator reduces the likelihood of both 4MB
- * page TLB misses and cache misses caused by 4MB page TLB misses.
+ * When PAGE_SIZE is 4KB, an allocation size of 16MB is supported in order
+ * to optimize the use of the direct map by UMA.  Specifically, a 64-byte
+ * cache line contains at most 8 L2 BLOCK entries, collectively mapping 16MB
+ * of physical memory.  By reducing the number of distinct 16MB "pages" that
+ * are used by UMA, the physical memory allocator reduces the likelihood of
+ * both 2MB page TLB misses and cache misses during the page table walk when
+ * a 2MB page TLB miss does occur.
  */
-#defineVM_NFREEORDER   12
+#defineVM_NFREEORDER   13
 
 /*
  * Enable superpage reservations: 1 level.



git: 294c52d969df - main - amd64 pmap: Fix compilation when superpage reservations are disabled

2023-07-12 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=294c52d969dfdaf1d9b3f4a1de76b702ee724afc

commit 294c52d969dfdaf1d9b3f4a1de76b702ee724afc
Author: Yufeng Zhou 
AuthorDate: 2023-07-12 07:52:02 +
Commit: Alan Cox 
CommitDate: 2023-07-12 17:07:42 +

amd64 pmap: Fix compilation when superpage reservations are disabled

The function pmap_pde_ept_executable() should not be conditionally
compiled based on VM_NRESERVLEVEL. It is required indirectly by
pmap_enter(..., psind=1) even when reservation-based allocation is
disabled at compile time.

Reviewed by:alc
MFC after:  1 week
---
 sys/amd64/amd64/pmap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index 3215a7f8d559..896078f3c456 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -6839,7 +6839,6 @@ retry:
PMAP_UNLOCK(pmap);
 }
 
-#if VM_NRESERVLEVEL > 0
 static bool
 pmap_pde_ept_executable(pmap_t pmap, pd_entry_t pde)
 {
@@ -6849,6 +6848,7 @@ pmap_pde_ept_executable(pmap_t pmap, pd_entry_t pde)
return ((pde & EPT_PG_EXECUTE) != 0);
 }
 
+#if VM_NRESERVLEVEL > 0
 /*
  * Tries to promote the 512, contiguous 4KB page mappings that are within a
  * single page table page (PTP) to a single 2MB page mapping.  For promotion



git: 29edff0dea0f - main - arm64/riscv pmap: Initialize the pmap's pm_pvchunk field

2023-07-21 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=29edff0dea0f7a2df710dd649d0cbcd4a2da3692

commit 29edff0dea0f7a2df710dd649d0cbcd4a2da3692
Author: Alan Cox 
AuthorDate: 2023-07-16 20:58:04 +
Commit: Alan Cox 
CommitDate: 2023-07-22 04:58:18 +

arm64/riscv pmap: Initialize the pmap's pm_pvchunk field

I believe that there are two reasons that the missing TAILQ
initialization operations haven't caused a problem.  First, the TAILQ
head's first field is being initialized to zeroes elsewhere.  Second,
the first access to the TAILQ head's last field is by
TAILQ_INSERT_HEAD(), which assigns to the last field without reading
it when the first field is NULL.

Reviewed by:kib, markj
MFC after:  1 week
Differential Revision:  https://reviews.freebsd.org/D41118
---
 sys/arm64/arm64/pmap.c | 3 +++
 sys/riscv/riscv/pmap.c | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index c2681104c961..b2591437b3b3 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -1291,6 +1291,7 @@ pmap_bootstrap(vm_paddr_t kernstart, vm_size_t kernlen)
PMAP_LOCK_INIT(kernel_pmap);
kernel_pmap->pm_l0_paddr =
pmap_early_vtophys((vm_offset_t)kernel_pmap_store.pm_l0);
+   TAILQ_INIT(&kernel_pmap->pm_pvchunk);
vm_radix_init(&kernel_pmap->pm_root);
kernel_pmap->pm_cookie = COOKIE_FROM(-1, INT_MIN);
kernel_pmap->pm_stage = PM_STAGE1;
@@ -2270,6 +2271,7 @@ pmap_pinit0(pmap_t pmap)
bzero(&pmap->pm_stats, sizeof(pmap->pm_stats));
pmap->pm_l0_paddr = READ_SPECIALREG(ttbr0_el1);
pmap->pm_l0 = (pd_entry_t *)PHYS_TO_DMAP(pmap->pm_l0_paddr);
+   TAILQ_INIT(&pmap->pm_pvchunk);
vm_radix_init(&pmap->pm_root);
pmap->pm_cookie = COOKIE_FROM(ASID_RESERVED_FOR_PID_0, INT_MIN);
pmap->pm_stage = PM_STAGE1;
@@ -2293,6 +2295,7 @@ pmap_pinit_stage(pmap_t pmap, enum pmap_stage stage, int 
levels)
pmap->pm_l0_paddr = VM_PAGE_TO_PHYS(m);
pmap->pm_l0 = (pd_entry_t *)PHYS_TO_DMAP(pmap->pm_l0_paddr);
 
+   TAILQ_INIT(&pmap->pm_pvchunk);
vm_radix_init(&pmap->pm_root);
bzero(&pmap->pm_stats, sizeof(pmap->pm_stats));
pmap->pm_cookie = COOKIE_FROM(-1, INT_MAX);
diff --git a/sys/riscv/riscv/pmap.c b/sys/riscv/riscv/pmap.c
index 7580f091ad86..3732eea14f7d 100644
--- a/sys/riscv/riscv/pmap.c
+++ b/sys/riscv/riscv/pmap.c
@@ -646,6 +646,7 @@ pmap_bootstrap(vm_offset_t l1pt, vm_paddr_t kernstart, 
vm_size_t kernlen)
/* Set this early so we can use the pagetable walking functions */
kernel_pmap_store.pm_top = (pd_entry_t *)l1pt;
PMAP_LOCK_INIT(kernel_pmap);
+   TAILQ_INIT(&kernel_pmap->pm_pvchunk);
vm_radix_init(&kernel_pmap->pm_root);
 
rw_init(&pvh_global_lock, "pmap pv global");
@@ -1327,6 +1328,7 @@ pmap_pinit0(pmap_t pmap)
pmap->pm_satp = pmap_satp_mode() |
(vtophys(pmap->pm_top) >> PAGE_SHIFT);
CPU_ZERO(&pmap->pm_active);
+   TAILQ_INIT(&pmap->pm_pvchunk);
vm_radix_init(&pmap->pm_root);
pmap_activate_boot(pmap);
 }
@@ -1369,6 +1371,7 @@ pmap_pinit(pmap_t pmap)
pmap->pm_top[i] = kernel_pmap->pm_top[i];
}
 
+   TAILQ_INIT(&pmap->pm_pvchunk);
vm_radix_init(&pmap->pm_root);
 
return (1);



git: 0aebcfc9f4d6 - main - arm64 pmap: Eliminate some duplication of code

2023-07-22 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=0aebcfc9f4d642a8bef95504dc928fab78af33bf

commit 0aebcfc9f4d642a8bef95504dc928fab78af33bf
Author: Alan Cox 
AuthorDate: 2023-07-22 17:41:49 +
Commit: Alan Cox 
CommitDate: 2023-07-23 05:34:17 +

arm64 pmap: Eliminate some duplication of code

pmap_unmapbios() can simply call pmap_kremove_device() rather than
duplicating its code.

While I'm here, add a comment to pmap_kremove_device() explaining its
proper use, and fix a whitespace issue.

MFC after:  1 week
---
 sys/arm64/arm64/pmap.c | 19 ++-
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index b2591437b3b3..dfed0142f273 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -2032,6 +2032,13 @@ pmap_kremove(vm_offset_t va)
pmap_s1_invalidate_page(kernel_pmap, va, true);
 }
 
+/*
+ * Remove the specified range of mappings from the kernel address space.
+ *
+ * Should only be applied to mappings that were created by pmap_kenter() or
+ * pmap_kenter_device().  Nothing about this function is actually specific
+ * to device mappings.
+ */
 void
 pmap_kremove_device(vm_offset_t sva, vm_size_t size)
 {
@@ -2039,7 +2046,7 @@ pmap_kremove_device(vm_offset_t sva, vm_size_t size)
vm_offset_t va;
 
KASSERT((sva & L3_OFFSET) == 0,
-  ("pmap_kremove_device: Invalid virtual address"));
+   ("pmap_kremove_device: Invalid virtual address"));
KASSERT((size & PAGE_MASK) == 0,
("pmap_kremove_device: Mapping is not page-sized"));
 
@@ -6550,7 +6557,7 @@ void
 pmap_unmapbios(void *p, vm_size_t size)
 {
struct pmap_preinit_mapping *ppim;
-   vm_offset_t offset, tmpsize, va, va_trunc;
+   vm_offset_t offset, va, va_trunc;
pd_entry_t *pde;
pt_entry_t *l2;
int i, lvl, l2_blocks, block;
@@ -6600,14 +6607,8 @@ pmap_unmapbios(void *p, vm_size_t size)
size = round_page(offset + size);
va = trunc_page(va);
 
-   pde = pmap_pde(kernel_pmap, va, &lvl);
-   KASSERT(pde != NULL,
-   ("pmap_unmapbios: Invalid page entry, va: 0x%lx", va));
-   KASSERT(lvl == 2, ("pmap_unmapbios: Invalid level %d", lvl));
-
/* Unmap and invalidate the pages */
-for (tmpsize = 0; tmpsize < size; tmpsize += PAGE_SIZE)
-   pmap_kremove(va + tmpsize);
+   pmap_kremove_device(va, size);
 
kva_free(va, size);
}



git: 7b1e606c7222 - main - arm64 pmap: Retire PMAP_INLINE

2023-07-22 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=7b1e606c7acdaea613924f566ffe9b65c068

commit 7b1e606c7acdaea613924f566ffe9b65c068
Author: Alan Cox 
AuthorDate: 2023-07-22 17:55:43 +
Commit: Alan Cox 
CommitDate: 2023-07-23 05:34:17 +

arm64 pmap: Retire PMAP_INLINE

Neither of the remaining callers to pmap_kremove() warrant inlining.
Those calls rarely occur.  In other words, we were optimizing for the
uncommon case.

MFC after:  1 week
---
 sys/arm64/arm64/pmap.c | 12 +---
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index dfed0142f273..379296f375ae 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -170,16 +170,6 @@ __FBSDID("$FreeBSD$");
 #defineNUL1E   (NUL0E * NL1PG)
 #defineNUL2E   (NUL1E * NL2PG)
 
-#if !defined(DIAGNOSTIC)
-#ifdef __GNUC_GNU_INLINE__
-#define PMAP_INLINE__attribute__((__gnu_inline__)) inline
-#else
-#define PMAP_INLINEextern inline
-#endif
-#else
-#define PMAP_INLINE
-#endif
-
 #ifdef PV_STATS
 #define PV_STAT(x) do { x ; } while (0)
 #define __pvused
@@ -2022,7 +2012,7 @@ pmap_kenter_device(vm_offset_t sva, vm_size_t size, 
vm_paddr_t pa)
 /*
  * Remove a page from the kernel pagetables.
  */
-PMAP_INLINE void
+void
 pmap_kremove(vm_offset_t va)
 {
pt_entry_t *pte;



git: 50d663b14b31 - main - vm: Fix vm_map_find_min()

2023-07-25 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=50d663b14b310d6020b4b6cc92d4fae985f086f2

commit 50d663b14b310d6020b4b6cc92d4fae985f086f2
Author: Alan Cox 
AuthorDate: 2023-07-25 07:24:19 +
Commit: Alan Cox 
CommitDate: 2023-07-26 05:24:50 +

vm: Fix vm_map_find_min()

Fix the handling of address hints that are less than min_addr by
vm_map_find_min().

Reported by:dchagin
Reviewed by:kib
Fixes:  d8e6f4946cec0 "vm: Fix anonymous memory clustering under ASLR"
Differential Revision:  https://reviews.freebsd.org/D41159
---
 sys/vm/vm_map.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c
index 444e09986d4e..eb607d519247 100644
--- a/sys/vm/vm_map.c
+++ b/sys/vm/vm_map.c
@@ -2255,10 +2255,10 @@ vm_map_find_min(vm_map_t map, vm_object_t object, 
vm_ooffset_t offset,
int rv;
 
hint = *addr;
-   if (hint == 0)
+   if (hint == 0) {
cow |= MAP_NO_HINT;
-   if (hint < min_addr)
*addr = hint = min_addr;
+   }
for (;;) {
rv = vm_map_find(map, object, offset, addr, length, max_addr,
find_space, prot, max, cow);



git: a98a0090b2ba - main - arm64 pmap: Eliminate unnecessary TLB invalidations

2023-07-25 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=a98a0090b2ba64ff0bc3cf71a00fb5f9e31fc1d3

commit a98a0090b2ba64ff0bc3cf71a00fb5f9e31fc1d3
Author: Alan Cox 
AuthorDate: 2023-07-23 07:11:43 +
Commit: Alan Cox 
CommitDate: 2023-07-26 05:37:13 +

arm64 pmap: Eliminate unnecessary TLB invalidations

Eliminate unnecessary TLB invalidations by pmap_kenter(),
pmap_qenter(), and pmap_mapbios() when the old page table entries
were invalid.

While I'm here, correct some nearby whitespace issues.

MFC after:  2 weeks
---
 sys/arm64/arm64/pmap.c | 49 ++---
 1 file changed, 38 insertions(+), 11 deletions(-)

diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index 379296f375ae..fa09d2026550 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -1972,19 +1972,20 @@ void
 pmap_kenter(vm_offset_t sva, vm_size_t size, vm_paddr_t pa, int mode)
 {
pd_entry_t *pde;
-   pt_entry_t *pte, attr;
+   pt_entry_t attr, old_l3e, *pte;
vm_offset_t va;
int lvl;
 
KASSERT((pa & L3_OFFSET) == 0,
-  ("pmap_kenter: Invalid physical address"));
+   ("pmap_kenter: Invalid physical address"));
KASSERT((sva & L3_OFFSET) == 0,
-  ("pmap_kenter: Invalid virtual address"));
+   ("pmap_kenter: Invalid virtual address"));
KASSERT((size & PAGE_MASK) == 0,
("pmap_kenter: Mapping is not page-sized"));
 
attr = ATTR_DEFAULT | ATTR_S1_AP(ATTR_S1_AP_RW) | ATTR_S1_XN |
ATTR_S1_IDX(mode) | L3_PAGE;
+   old_l3e = 0;
va = sva;
while (size != 0) {
pde = pmap_pde(kernel_pmap, va, &lvl);
@@ -1993,13 +1994,21 @@ pmap_kenter(vm_offset_t sva, vm_size_t size, vm_paddr_t 
pa, int mode)
KASSERT(lvl == 2, ("pmap_kenter: Invalid level %d", lvl));
 
pte = pmap_l2_to_l3(pde, va);
-   pmap_load_store(pte, PHYS_TO_PTE(pa) | attr);
+   old_l3e |= pmap_load_store(pte, PHYS_TO_PTE(pa) | attr);
 
va += PAGE_SIZE;
pa += PAGE_SIZE;
size -= PAGE_SIZE;
}
-   pmap_s1_invalidate_range(kernel_pmap, sva, va, true);
+   if ((old_l3e & ATTR_DESCR_VALID) != 0)
+   pmap_s1_invalidate_range(kernel_pmap, sva, va, true);
+   else {
+   /*
+* Because the old entries were invalid and the new mappings
+* are not executable, an isb is not required.
+*/
+   dsb(ishst);
+   }
 }
 
 void
@@ -2082,11 +2091,12 @@ void
 pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count)
 {
pd_entry_t *pde;
-   pt_entry_t *pte, pa, attr;
+   pt_entry_t attr, old_l3e, pa, *pte;
vm_offset_t va;
vm_page_t m;
int i, lvl;
 
+   old_l3e = 0;
va = sva;
for (i = 0; i < count; i++) {
pde = pmap_pde(kernel_pmap, va, &lvl);
@@ -2100,11 +2110,19 @@ pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count)
attr = ATTR_DEFAULT | ATTR_S1_AP(ATTR_S1_AP_RW) | ATTR_S1_XN |
ATTR_S1_IDX(m->md.pv_memattr) | L3_PAGE;
pte = pmap_l2_to_l3(pde, va);
-   pmap_load_store(pte, PHYS_TO_PTE(pa) | attr);
+   old_l3e |= pmap_load_store(pte, PHYS_TO_PTE(pa) | attr);
 
va += L3_SIZE;
}
-   pmap_s1_invalidate_range(kernel_pmap, sva, va, true);
+   if ((old_l3e & ATTR_DESCR_VALID) != 0)
+   pmap_s1_invalidate_range(kernel_pmap, sva, va, true);
+   else {
+   /*
+* Because the old entries were invalid and the new mappings
+* are not executable, an isb is not required.
+*/
+   dsb(ishst);
+   }
 }
 
 /*
@@ -6441,7 +6459,7 @@ pmap_mapbios(vm_paddr_t pa, vm_size_t size)
 {
struct pmap_preinit_mapping *ppim;
vm_offset_t va, offset;
-   pd_entry_t *pde;
+   pd_entry_t old_l2e, *pde;
pt_entry_t *l2;
int i, lvl, l2_blocks, free_l2_count, start_idx;
 
@@ -6501,6 +6519,7 @@ pmap_mapbios(vm_paddr_t pa, vm_size_t size)
 
/* Map L2 blocks */
pa = rounddown2(pa, L2_SIZE);
+   old_l2e = 0;
for (i = 0; i < l2_blocks; i++) {
pde = pmap_pde(kernel_pmap, va, &lvl);
KASSERT(pde != NULL,
@@ -6511,14 +6530,22 @@ pmap_mapbios(vm_paddr_t pa, vm_size_t size)
 
/* Insert L2_BLOCK */
l2 = pmap_l1_to_l2(pde, va);
-   pmap_load_store(l2,
+   old_l2e |= pmap_load_store(l2,
PHYS_TO_PTE(pa) | ATTR_DEFAULT | ATTR_S1_XN |
 

git: 5ec2d94ade51 - main - vm_mmap_object: Update the spelling of true/false

2023-07-26 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=5ec2d94ade51b2f2f129cf0c7f695582c7dccb81

commit 5ec2d94ade51b2f2f129cf0c7f695582c7dccb81
Author: Alan Cox 
AuthorDate: 2023-07-26 05:58:51 +
Commit: Alan Cox 
CommitDate: 2023-07-27 05:25:53 +

vm_mmap_object: Update the spelling of true/false

Since fitit is already a bool, use true/false instead of TRUE/FALSE.

MFC after:  2 weeks
---
 sys/vm/vm_mmap.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sys/vm/vm_mmap.c b/sys/vm/vm_mmap.c
index 408e077476dd..328fef007b1e 100644
--- a/sys/vm/vm_mmap.c
+++ b/sys/vm/vm_mmap.c
@@ -1577,12 +1577,12 @@ vm_mmap_object(vm_map_t map, vm_offset_t *addr, 
vm_size_t size, vm_prot_t prot,
return (EINVAL);
 
if ((flags & MAP_FIXED) == 0) {
-   fitit = TRUE;
+   fitit = true;
*addr = round_page(*addr);
} else {
if (*addr != trunc_page(*addr))
return (EINVAL);
-   fitit = FALSE;
+   fitit = false;
}
 
if (flags & MAP_ANON) {



git: 3d7c37425ee0 - main - amd64 pmap: Catch up with pctrie changes

2023-07-28 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=3d7c37425ee07186c65d424306c1b295c30fa592

commit 3d7c37425ee07186c65d424306c1b295c30fa592
Author: Alan Cox 
AuthorDate: 2023-07-28 20:13:13 +
Commit: Alan Cox 
CommitDate: 2023-07-28 20:13:13 +

amd64 pmap: Catch up with pctrie changes

Recent changes to the pctrie code make it necessary to initialize the
kernel pmap's rangeset for PKU.
---
 sys/amd64/amd64/pmap.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index a4b8c6dc4c06..c1968fc11844 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -1995,6 +1995,10 @@ pmap_bootstrap(vm_paddr_t *firstaddr)
kernel_pmap->pm_stats.resident_count = res;
vm_radix_init(&kernel_pmap->pm_root);
kernel_pmap->pm_flags = pmap_flags;
+   if ((cpu_stdext_feature2 & CPUID_STDEXT2_PKU) != 0) {
+   rangeset_init(&kernel_pmap->pm_pkru, pkru_dup_range,
+   pkru_free_range, kernel_pmap, M_NOWAIT);
+   }
 
/*
 * The kernel pmap is always active on all CPUs.  Once CPUs are



Re: git: 50d663b14b31 - main - vm: Fix vm_map_find_min()

2023-07-30 Thread Alan Cox
I see.  That change fixed the case where the address hint is non-zero, 
e.g., 0x10, but not zero.


On 7/30/23 05:58, Dmitry Chagin wrote:

On Sun, Jul 30, 2023 at 01:30:37PM +0300, Dmitry Chagin wrote:

On Wed, Jul 26, 2023 at 05:25:37AM +, Alan Cox wrote:

The branch main has been updated by alc:

URL: 
https://urldefense.com/v3/__https://cgit.FreeBSD.org/src/commit/?id=50d663b14b310d6020b4b6cc92d4fae985f086f2__;!!BuQPrrmRaQ!mJGmkdRJ06TT6ocFnVe7lPS7hSNIIhrCQH9IKMbB6XZVJuiUc2_wPJ55o1zzD6AhClmQwgQKHvKnW4rs75yVrtax$

commit 50d663b14b310d6020b4b6cc92d4fae985f086f2
Author: Alan Cox 
AuthorDate: 2023-07-25 07:24:19 +
Commit: Alan Cox 
CommitDate: 2023-07-26 05:24:50 +

 vm: Fix vm_map_find_min()
 
 Fix the handling of address hints that are less than min_addr by

 vm_map_find_min().
 

Thank you for fixing that, however it still fails under Linuxulator.


#include 
#include 

#include 
#include 
#include 
#include 
#include 
#include 

int
main(int argc, char** argv)
{
struct stat sb;
void *s32;
int f, r;

f = open(argv[0], O_RDONLY);
assert(f > 0);

r = fstat(f, &sb);
assert(r == 0);

s32  = mmap(NULL, sb.st_size, PROT_READ,
MAP_32BIT|MAP_PRIVATE, f, 0);
assert(s32 != MAP_FAILED);
assert((uintptr_t)s32 < 0x8000);

close(f);
munmap(s32, sb.st_size);
return (0);
}


hmm, it also fails natively with disable aslr





 Reported by:dchagin
 Reviewed by:kib
 Fixes:  d8e6f4946cec0 "vm: Fix anonymous memory clustering under ASLR"
 Differential Revision:  
https://urldefense.com/v3/__https://reviews.freebsd.org/D41159__;!!BuQPrrmRaQ!mJGmkdRJ06TT6ocFnVe7lPS7hSNIIhrCQH9IKMbB6XZVJuiUc2_wPJ55o1zzD6AhClmQwgQKHvKnW4rs70ygLqzX$
---
  sys/vm/vm_map.c | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c
index 444e09986d4e..eb607d519247 100644
--- a/sys/vm/vm_map.c
+++ b/sys/vm/vm_map.c
@@ -2255,10 +2255,10 @@ vm_map_find_min(vm_map_t map, vm_object_t object, 
vm_ooffset_t offset,
int rv;
  
  	hint = *addr;

-   if (hint == 0)
+   if (hint == 0) {
cow |= MAP_NO_HINT;
-   if (hint < min_addr)
*addr = hint = min_addr;
+   }
for (;;) {
rv = vm_map_find(map, object, offset, addr, length, max_addr,
find_space, prot, max, cow);




git: 37e5d49e1e5e - main - vm: Fix address hints of 0 with MAP_32BIT

2023-08-12 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=37e5d49e1e5e750bf2a200ef2e117d14c4e9a578

commit 37e5d49e1e5e750bf2a200ef2e117d14c4e9a578
Author: Alan Cox 
AuthorDate: 2023-08-03 07:07:14 +
Commit: Alan Cox 
CommitDate: 2023-08-12 07:35:21 +

vm: Fix address hints of 0 with MAP_32BIT

Also, rename min_addr to default_addr, which better reflects what it
represents.  The min_addr is not a minimum address in the same way that
max_addr is actually a maximum address that can be allocated.  For
example, a non-zero hint can be less than min_addr and be allocated.

Reported by:dchagin
Reviewed by:dchagin, kib, markj
Fixes:  d8e6f4946cec0 "vm: Fix anonymous memory clustering under ASLR"
Differential Revision:  https://reviews.freebsd.org/D41397
---
 sys/vm/vm_map.c  | 16 
 sys/vm/vm_mmap.c | 14 ++
 2 files changed, 18 insertions(+), 12 deletions(-)

diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c
index 8d98af7709cd..c77c00b8b5c6 100644
--- a/sys/vm/vm_map.c
+++ b/sys/vm/vm_map.c
@@ -2255,19 +2255,19 @@ done:
 
 /*
  * vm_map_find_min() is a variant of vm_map_find() that takes an
- * additional parameter (min_addr) and treats the given address
- * (*addr) differently.  Specifically, it treats *addr as a hint
+ * additional parameter ("default_addr") and treats the given address
+ * ("*addr") differently.  Specifically, it treats "*addr" as a hint
  * and not as the minimum address where the mapping is created.
  *
  * This function works in two phases.  First, it tries to
  * allocate above the hint.  If that fails and the hint is
- * greater than min_addr, it performs a second pass, replacing
- * the hint with min_addr as the minimum address for the
+ * greater than "default_addr", it performs a second pass, replacing
+ * the hint with "default_addr" as the minimum address for the
  * allocation.
  */
 int
 vm_map_find_min(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
-vm_offset_t *addr, vm_size_t length, vm_offset_t min_addr,
+vm_offset_t *addr, vm_size_t length, vm_offset_t default_addr,
 vm_offset_t max_addr, int find_space, vm_prot_t prot, vm_prot_t max,
 int cow)
 {
@@ -2277,14 +2277,14 @@ vm_map_find_min(vm_map_t map, vm_object_t object, 
vm_ooffset_t offset,
hint = *addr;
if (hint == 0) {
cow |= MAP_NO_HINT;
-   *addr = hint = min_addr;
+   *addr = hint = default_addr;
}
for (;;) {
rv = vm_map_find(map, object, offset, addr, length, max_addr,
find_space, prot, max, cow);
-   if (rv == KERN_SUCCESS || min_addr >= hint)
+   if (rv == KERN_SUCCESS || default_addr >= hint)
return (rv);
-   *addr = hint = min_addr;
+   *addr = hint = default_addr;
}
 }
 
diff --git a/sys/vm/vm_mmap.c b/sys/vm/vm_mmap.c
index 7876a055ca91..d904c4f38e40 100644
--- a/sys/vm/vm_mmap.c
+++ b/sys/vm/vm_mmap.c
@@ -1555,7 +1555,7 @@ vm_mmap_object(vm_map_t map, vm_offset_t *addr, vm_size_t 
size, vm_prot_t prot,
 vm_prot_t maxprot, int flags, vm_object_t object, vm_ooffset_t foff,
 boolean_t writecounted, struct thread *td)
 {
-   vm_offset_t max_addr;
+   vm_offset_t default_addr, max_addr;
int docow, error, findspace, rv;
bool curmap, fitit;
 
@@ -1630,10 +1630,16 @@ vm_mmap_object(vm_map_t map, vm_offset_t *addr, 
vm_size_t size, vm_prot_t prot,
max_addr = MAP_32BIT_MAX_ADDR;
 #endif
if (curmap) {
-   rv = vm_map_find_min(map, object, foff, addr, size,
+   default_addr =
round_page((vm_offset_t)td->td_proc->p_vmspace->
-   vm_daddr + lim_max(td, RLIMIT_DATA)), max_addr,
-   findspace, prot, maxprot, docow);
+   vm_daddr + lim_max(td, RLIMIT_DATA));
+#ifdef MAP_32BIT
+   if ((flags & MAP_32BIT) != 0)
+   default_addr = 0;
+#endif
+   rv = vm_map_find_min(map, object, foff, addr, size,
+   default_addr, max_addr, findspace, prot, maxprot,
+   docow);
} else {
rv = vm_map_find(map, object, foff, addr, size,
max_addr, findspace, prot, maxprot, docow);



git: 902ed64fecbe - main - i386 pmap: Adapt recent amd64/arm64 superpage improvements

2023-09-26 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=902ed64fecbe078e1cdd527b97af3958b413da11

commit 902ed64fecbe078e1cdd527b97af3958b413da11
Author: Alan Cox 
AuthorDate: 2023-09-24 18:21:36 +
Commit: Alan Cox 
CommitDate: 2023-09-26 17:41:20 +

i386 pmap: Adapt recent amd64/arm64 superpage improvements

Don't recompute mpte during promotion.

Optimize MADV_WILLNEED on existing superpages.

Standardize promotion conditions across amd64, arm64, and i386.

Stop requiring the accessed bit for superpage promotion.

Tidy up pmap_promote_pde() calls.

Retire PMAP_INLINE.  It's no longer used.

Note: Some of these changes are a prerequisite to fixing a panic that
arises when attempting to create a wired superpage mapping by
pmap_enter(psind=1) (as opposed to promotion).

Reviewed by:kib
MFC after:  1 week
Differential Revision:  https://reviews.freebsd.org/D41944
---
 sys/i386/i386/pmap.c| 200 
 sys/i386/include/pmap.h |   2 +-
 2 files changed, 137 insertions(+), 65 deletions(-)

diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c
index 4198849b1a5a..2d19fc51dd53 100644
--- a/sys/i386/i386/pmap.c
+++ b/sys/i386/i386/pmap.c
@@ -159,16 +159,6 @@
 #endif
 #include 
 
-#if !defined(DIAGNOSTIC)
-#ifdef __GNUC_GNU_INLINE__
-#define PMAP_INLINE__attribute__((__gnu_inline__)) inline
-#else
-#define PMAP_INLINEextern inline
-#endif
-#else
-#define PMAP_INLINE
-#endif
-
 #ifdef PV_STATS
 #define PV_STAT(x) do { x ; } while (0)
 #else
@@ -311,13 +301,14 @@ static intpmap_pvh_wired_mappings(struct md_page 
*pvh, int count);
 
 static voidpmap_abort_ptp(pmap_t pmap, vm_offset_t va, vm_page_t mpte);
 static boolean_t pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va);
-static boolpmap_enter_4mpage(pmap_t pmap, vm_offset_t va, vm_page_t m,
+static int pmap_enter_4mpage(pmap_t pmap, vm_offset_t va, vm_page_t m,
vm_prot_t prot);
 static int pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t newpde,
u_int flags, vm_page_t m);
 static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
 vm_page_t m, vm_prot_t prot, vm_page_t mpte);
-static int pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte, bool promoted);
+static int pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte, bool promoted,
+bool allpte_PG_A_set);
 static void pmap_invalidate_pde_page(pmap_t pmap, vm_offset_t va,
pd_entry_t pde);
 static void pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte);
@@ -327,7 +318,8 @@ static void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, 
int mode);
 static void pmap_kenter_pde(vm_offset_t va, pd_entry_t newpde);
 static void pmap_pde_attr(pd_entry_t *pde, int cache_bits);
 #if VM_NRESERVLEVEL > 0
-static void pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va);
+static bool pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va,
+vm_page_t mpte);
 #endif
 static boolean_t pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t 
sva,
 vm_prot_t prot);
@@ -993,7 +985,7 @@ __CONCAT(PMTYPE, init)(void)
 */
if (pseflag != 0 &&
KERNBASE <= i << PDRSHIFT && i << PDRSHIFT < KERNend &&
-   pmap_insert_pt_page(kernel_pmap, mpte, true))
+   pmap_insert_pt_page(kernel_pmap, mpte, true, true))
panic("pmap_init: pmap_insert_pt_page failed");
}
PMAP_UNLOCK(kernel_pmap);
@@ -1928,14 +1920,26 @@ pmap_add_delayed_free_list(vm_page_t m, struct spglist 
*free,
  * for mapping a distinct range of virtual addresses.  The pmap's collection is
  * ordered by this virtual address range.
  *
- * If "promoted" is false, then the page table page "mpte" must be zero filled.
+ * If "promoted" is false, then the page table page "mpte" must be zero filled;
+ * "mpte"'s valid field will be set to 0.
+ *
+ * If "promoted" is true and "allpte_PG_A_set" is false, then "mpte" must
+ * contain valid mappings with identical attributes except for PG_A; "mpte"'s
+ * valid field will be set to 1.
+ *
+ * If "promoted" and "allpte_PG_A_set" are both true, then "mpte" must contain
+ * valid mappings with identical attributes including PG_A; "mpte"'s valid
+ * field will be set to VM_PAGE_BITS_ALL.
  */
 static __inline int
-pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte, bool promoted)
+pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte, bool promoted,
+bool allpte_PG_A_set)
 {
 
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
-   mpte->valid = promoted ? VM_PAGE_BITS_ALL : 0;
+  

git: 2001bef84ba6 - main - vm: Eliminate unnecessary lock asserts

2024-10-27 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=2001bef84ba64cee51abf91b5ad3aca071e75788

commit 2001bef84ba64cee51abf91b5ad3aca071e75788
Author: Alan Cox 
AuthorDate: 2024-10-27 17:40:43 +
Commit: Alan Cox 
CommitDate: 2024-10-27 19:03:52 +

vm: Eliminate unnecessary lock asserts

There is no actual need for the VM object to be locked when initializing
a VM page iterator.

Reviewed by:dougm
Differential Revision:  https://reviews.freebsd.org/D47298
---
 sys/vm/vm_page.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c
index 054832e3f19a..57e5684b3178 100644
--- a/sys/vm/vm_page.c
+++ b/sys/vm/vm_page.c
@@ -1715,7 +1715,6 @@ void
 vm_page_iter_init(struct pctrie_iter *pages, vm_object_t object)
 {
 
-   VM_OBJECT_ASSERT_LOCKED(object);
vm_radix_iter_init(pages, &object->rtree);
 }
 
@@ -1729,7 +1728,6 @@ vm_page_iter_limit_init(struct pctrie_iter *pages, 
vm_object_t object,
 vm_pindex_t limit)
 {
 
-   VM_OBJECT_ASSERT_LOCKED(object);
vm_radix_iter_limit_init(pages, &object->rtree, limit);
 }
 



git: fd630ae93634 - main - vm: Retire an unused declaration

2024-11-27 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=fd630ae93634b3c7410a390c57408685caf8d937

commit fd630ae93634b3c7410a390c57408685caf8d937
Author: Alan Cox 
AuthorDate: 2024-11-24 19:23:48 +
Commit: Alan Cox 
CommitDate: 2024-11-27 08:14:58 +

vm: Retire an unused declaration

The bio_transient_map was long ago replaced by a vmem arena.

Reviewed by:kib, markj
Differential Revision:  https://reviews.freebsd.org/D47729
---
 sys/sys/bio.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/sys/sys/bio.h b/sys/sys/bio.h
index 1de841681710..74d2b03bd180 100644
--- a/sys/sys/bio.h
+++ b/sys/sys/bio.h
@@ -75,7 +75,6 @@
 #ifdef _KERNEL
 struct disk;
 struct bio;
-struct vm_map;
 
 typedef void bio_task_t(void *);
 
@@ -144,7 +143,6 @@ struct bio_queue_head {
int batched;
 };
 
-extern struct vm_map *bio_transient_map;
 extern int bio_transient_maxcnt;
 
 void biodone(struct bio *bp);



git: c296ac7e0f1c - main - vm: Optimize page rename

2024-11-30 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=c296ac7e0f1c2fc9bc8bcab0177afb123ce6993a

commit c296ac7e0f1c2fc9bc8bcab0177afb123ce6993a
Author: Alan Cox 
AuthorDate: 2024-11-27 08:32:07 +
Commit: Alan Cox 
CommitDate: 2024-11-30 08:59:15 +

vm: Optimize page rename

Rename vm_page_rename() to vm_page_iter_rename() to reflect its
reimplementation using iterators, and pass the page to this function
rather than spending clock cycles looking it up.  Change its return
value from 0/1 to a bool.

Reviewed by:dougm, markj
Differential Revision:  https://reviews.freebsd.org/D47829
---
 sys/vm/vm_object.c | 10 ++
 sys/vm/vm_page.c   | 28 
 sys/vm/vm_page.h   |  3 ++-
 sys/vm/vm_reserv.c |  4 ++--
 4 files changed, 26 insertions(+), 19 deletions(-)

diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c
index 84981d7cc7cd..ff95469749b7 100644
--- a/sys/vm/vm_object.c
+++ b/sys/vm/vm_object.c
@@ -1604,8 +1604,9 @@ retry:
continue;
}
 
-   /* vm_page_rename() will dirty the page. */
-   if (vm_page_rename(&pages, new_object, m->pindex - 
offidxstart)) {
+   /* vm_page_iter_rename() will dirty the page. */
+   if (!vm_page_iter_rename(&pages, m, new_object, m->pindex -
+   offidxstart)) {
vm_page_xunbusy(m);
VM_OBJECT_WUNLOCK(new_object);
VM_OBJECT_WUNLOCK(orig_object);
@@ -1789,9 +1790,10 @@ vm_object_collapse_scan(vm_object_t object)
 * backing object to the main object.
 *
 * If the page was mapped to a process, it can remain mapped
-* through the rename.  vm_page_rename() will dirty the page.
+* through the rename.  vm_page_iter_rename() will dirty the
+* page.
 */
-   if (vm_page_rename(&pages, object, new_pindex)) {
+   if (!vm_page_iter_rename(&pages, p, object, new_pindex)) {
vm_page_xunbusy(p);
next = vm_object_collapse_scan_wait(&pages, object,
NULL);
diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c
index a37619c7743e..8a23f900e987 100644
--- a/sys/vm/vm_page.c
+++ b/sys/vm/vm_page.c
@@ -2065,10 +2065,14 @@ vm_page_replace(vm_page_t mnew, vm_object_t object, 
vm_pindex_t pindex,
 }
 
 /*
- * vm_page_rename:
+ * vm_page_iter_rename:
  *
- * Move the current page, as identified by iterator, from its current
- * object to the specified target object/offset.
+ * Tries to move the specified page from its current object to a new object
+ * and pindex, using the given iterator to remove the page from its current
+ * object.  Returns true if the move was successful, and false if the move
+ * was aborted due to a failed memory allocation.
+ *
+ * Panics if a page already resides in the new object at the new pindex.
  *
  * Note: swap associated with the page must be invalidated by the move.  We
  *   have to do this for several reasons:  (1) we aren't freeing the
@@ -2082,18 +2086,18 @@ vm_page_replace(vm_page_t mnew, vm_object_t object, 
vm_pindex_t pindex,
  *
  * The objects must be locked.
  */
-int
-vm_page_rename(struct pctrie_iter *pages,
+bool
+vm_page_iter_rename(struct pctrie_iter *old_pages, vm_page_t m,
 vm_object_t new_object, vm_pindex_t new_pindex)
 {
-   vm_page_t m, mpred;
+   vm_page_t mpred;
vm_pindex_t opidx;
 
+   KASSERT((m->ref_count & VPRC_OBJREF) != 0,
+   ("%s: page %p is missing object ref", __func__, m));
+   VM_OBJECT_ASSERT_WLOCKED(m->object);
VM_OBJECT_ASSERT_WLOCKED(new_object);
 
-   m = vm_radix_iter_page(pages);
-   KASSERT(m->ref_count != 0, ("vm_page_rename: page %p has no refs", m));
-
/*
 * Create a custom version of vm_page_insert() which does not depend
 * by m_prev and can cheat on the implementation aspects of the
@@ -2103,7 +2107,7 @@ vm_page_rename(struct pctrie_iter *pages,
m->pindex = new_pindex;
if (vm_radix_insert_lookup_lt(&new_object->rtree, m, &mpred) != 0) {
m->pindex = opidx;
-   return (1);
+   return (false);
}
 
/*
@@ -2111,7 +2115,7 @@ vm_page_rename(struct pctrie_iter *pages,
 * the listq iterator is tainted.
 */
m->pindex = opidx;
-   vm_radix_iter_remove(pages);
+   vm_radix_iter_remove(old_pages);
vm_page_remove_radixdone(m);
 
/* Return back to the new pindex to complete vm_page_insert(). */
@@ -2121,7 +2125,7 @@ vm_page_rename(struct pctrie_iter *pages,
vm_page_insert_radixdone(m, new_object, mpred);
  

git: 8c8d36b9d172 - main - vm: static-ize vm_page_alloc_after()

2024-11-17 Thread Alan Cox
The branch main has been updated by alc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=8c8d36b9d17239dc4e54731b6cf54c9f9fce43a9

commit 8c8d36b9d17239dc4e54731b6cf54c9f9fce43a9
Author: Alan Cox 
AuthorDate: 2024-11-16 22:20:14 +
Commit: Alan Cox 
CommitDate: 2024-11-17 18:19:00 +

vm: static-ize vm_page_alloc_after()

This function is only intended for the internal use of the VM system.

Reviewed by:dougm, kib, markj
Differential Revision:  https://reviews.freebsd.org/D47644
---
 sys/vm/vm_page.c | 4 +++-
 sys/vm/vm_page.h | 1 -
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c
index 6b49f0745c73..0b9b55337b52 100644
--- a/sys/vm/vm_page.c
+++ b/sys/vm/vm_page.c
@@ -162,6 +162,8 @@ SYSCTL_PROC(_vm, OID_AUTO, page_blacklist, CTLTYPE_STRING | 
CTLFLAG_RD |
 
 static uma_zone_t fakepg_zone;
 
+static vm_page_t vm_page_alloc_after(vm_object_t object, vm_pindex_t pindex,
+int req, vm_page_t mpred);
 static void vm_page_alloc_check(vm_page_t m);
 static vm_page_t vm_page_alloc_nofree_domain(int domain, int req);
 static bool _vm_page_busy_sleep(vm_object_t obj, vm_page_t m,
@@ -2085,7 +2087,7 @@ vm_page_alloc(vm_object_t object, vm_pindex_t pindex, int 
req)
  * the resident page in the object with largest index smaller than the given
  * page index, or NULL if no such page exists.
  */
-vm_page_t
+static vm_page_t
 vm_page_alloc_after(vm_object_t object, vm_pindex_t pindex,
 int req, vm_page_t mpred)
 {
diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h
index b85342b784de..893608bcacf1 100644
--- a/sys/vm/vm_page.h
+++ b/sys/vm/vm_page.h
@@ -608,7 +608,6 @@ void vm_page_activate (vm_page_t);
 void vm_page_advise(vm_page_t m, int advice);
 vm_page_t vm_page_mpred(vm_object_t, vm_pindex_t);
 vm_page_t vm_page_alloc(vm_object_t, vm_pindex_t, int);
-vm_page_t vm_page_alloc_after(vm_object_t, vm_pindex_t, int, vm_page_t);
 vm_page_t vm_page_alloc_domain_after(vm_object_t, vm_pindex_t, int, int,
 vm_page_t);
 vm_page_t vm_page_alloc_contig(vm_object_t object, vm_pindex_t pindex, int req,