git: 82f5dfc12139 - main - db_pprint: Fix offset calculation for struct members
The branch main has been updated by bnovkov: URL: https://cgit.FreeBSD.org/src/commit/?id=82f5dfc121391604b079ea96aa14ea71e6b618c9 commit 82f5dfc121391604b079ea96aa14ea71e6b618c9 Author: Bojan Novković AuthorDate: 2024-07-21 16:45:33 + Commit: Bojan Novković CommitDate: 2024-07-21 17:31:48 + db_pprint: Fix offset calculation for struct members The struct pretty-printing code uses the ctm_offset field in struct ctf_member_v3 to calculate the address of a struct member. However, the code treats this as a byte offset rather than the offset in bits, leading to wrong values being printed. Fix this by diving with ctm_offset by NBBY. Approved by: markj (mentor) Fixes: c21bc6f3c242 --- sys/ddb/db_pprint.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sys/ddb/db_pprint.c b/sys/ddb/db_pprint.c index 10334ce650c8..2925caedd49d 100644 --- a/sys/ddb/db_pprint.c +++ b/sys/ddb/db_pprint.c @@ -117,7 +117,7 @@ db_pprint_struct(db_addr_t addr, struct ctf_type_v3 *type, u_int depth) return; } mtype = db_ctf_typeid_to_type(&sym_data, mp->ctm_type); - maddr = addr + mp->ctm_offset; + maddr = addr + (mp->ctm_offset / NBBY); mname = db_ctf_stroff_to_str(&sym_data, mp->ctm_name); db_indent = depth; if (mname != NULL) { @@ -140,7 +140,7 @@ db_pprint_struct(db_addr_t addr, struct ctf_type_v3 *type, u_int depth) return; } mtype = db_ctf_typeid_to_type(&sym_data, mp->ctlm_type); - maddr = addr + CTF_LMEM_OFFSET(mp); + maddr = addr + (CTF_LMEM_OFFSET(mp) / NBBY); mname = db_ctf_stroff_to_str(&sym_data, mp->ctlm_name); db_indent = depth; if (mname != NULL) {
git: 1cbd613f3343 - main - db_pprint: Properly handle complex pointer types
The branch main has been updated by bnovkov: URL: https://cgit.FreeBSD.org/src/commit/?id=1cbd613f3343c873ace8a56df2e515626a18ef22 commit 1cbd613f3343c873ace8a56df2e515626a18ef22 Author: Bojan Novković AuthorDate: 2024-07-21 16:51:22 + Commit: Bojan Novković CommitDate: 2024-07-21 17:31:59 + db_pprint: Properly handle complex pointer types The existing pretty-printing code fails to properly print complex pointer types. This commit fixes this behaviour by traversing the chain of CTF types until a base type is encountered. Approved by: markj (mentor) Fixes: c21bc6f3c242 --- sys/ddb/db_pprint.c | 38 ++ 1 file changed, 30 insertions(+), 8 deletions(-) diff --git a/sys/ddb/db_pprint.c b/sys/ddb/db_pprint.c index 2925caedd49d..0ca2b0bb952c 100644 --- a/sys/ddb/db_pprint.c +++ b/sys/ddb/db_pprint.c @@ -45,6 +45,7 @@ static void db_pprint_type(db_addr_t addr, struct ctf_type_v3 *type, static u_int max_depth = DB_PPRINT_DEFAULT_DEPTH; static struct db_ctf_sym_data sym_data; +static const char *asteriskstr = "*"; /* * Pretty-prints a CTF_INT type. @@ -248,9 +249,14 @@ db_pprint_ptr(db_addr_t addr, struct ctf_type_v3 *type, u_int depth) const char *qual = ""; const char *name; db_addr_t val; + uint32_t tid; u_int kind; + int ptrcnt; - ref_type = db_ctf_typeid_to_type(&sym_data, type->ctt_type); + ptrcnt = 1; + tid = type->ctt_type; +again: + ref_type = db_ctf_typeid_to_type(&sym_data, tid); kind = CTF_V3_INFO_KIND(ref_type->ctt_info); switch (kind) { case CTF_K_STRUCT: @@ -258,25 +264,41 @@ db_pprint_ptr(db_addr_t addr, struct ctf_type_v3 *type, u_int depth) break; case CTF_K_VOLATILE: qual = "volatile "; - break; + tid = ref_type->ctt_type; + goto again; case CTF_K_CONST: qual = "const "; - break; + tid = ref_type->ctt_type; + goto again; + case CTF_K_RESTRICT: + qual = "restrict "; + tid = ref_type->ctt_type; + goto again; + case CTF_K_POINTER: + ptrcnt++; + tid = ref_type->ctt_type; + goto again; + case CTF_K_TYPEDEF: + tid = ref_type->ctt_type; + goto again; default: break; } - val = db_get_value(addr, sizeof(db_addr_t), false); - if (depth < max_depth) { + ptrcnt = min(ptrcnt, strlen(asteriskstr)); + val = (addr != 0) ? db_get_value(addr, sizeof(db_addr_t), false) : 0; + if (depth < max_depth && (val != 0)) { /* Print contents of memory pointed to by this pointer. */ - db_pprint_type(addr, ref_type, depth + 1); + db_pprint_type(val, ref_type, depth + 1); } else { name = db_ctf_stroff_to_str(&sym_data, ref_type->ctt_name); db_indent = depth; if (name != NULL) - db_printf("(%s%s *) 0x%lx", qual, name, (long)val); + db_printf("(%s%s %.*s) 0x%lx", qual, name, ptrcnt, + asteriskstr, (long)val); else - db_printf("(%s *) 0x%lx", qual, (long)val); + db_printf("(%s %.*s) 0x%lx", qual, ptrcnt, asteriskstr, + (long)val); } }
git: 78f3e0f6b3ad - main - malloc(9): Introduce M_NEVERFREED
The branch main has been updated by bnovkov: URL: https://cgit.FreeBSD.org/src/commit/?id=78f3e0f6b3ad70d9574730fc3338474376ef8ebd commit 78f3e0f6b3ad70d9574730fc3338474376ef8ebd Author: Bojan Novković AuthorDate: 2024-05-03 18:18:56 + Commit: Bojan Novković CommitDate: 2024-07-30 15:38:18 + malloc(9): Introduce M_NEVERFREED This patch adds an additional malloc(9) flag to distinguish allocations that are never freed during runtime. Differential Revision: https://reviews.freebsd.org/D45045 Reviewed by:alc, kib, markj Tested by: alc --- sys/sys/malloc.h | 3 ++- sys/vm/uma_core.c | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/sys/sys/malloc.h b/sys/sys/malloc.h index dfd7928fc258..9b281da4b4d4 100644 --- a/sys/sys/malloc.h +++ b/sys/sys/malloc.h @@ -60,8 +60,9 @@ #defineM_BESTFIT 0x2000 /* only for vmem, low fragmentation */ #defineM_EXEC 0x4000 /* allocate executable space */ #defineM_NEXTFIT 0x8000 /* only for vmem, follow cursor */ +#defineM_NEVERFREED0x1 /* chunk will never get freed */ -#defineM_VERSION 2020110501 +#defineM_VERSION 2024073001 /* * Two malloc type structures are present: malloc_type, which is used by a diff --git a/sys/vm/uma_core.c b/sys/vm/uma_core.c index 516ac2c2965a..e93c561d759a 100644 --- a/sys/vm/uma_core.c +++ b/sys/vm/uma_core.c @@ -1791,6 +1791,9 @@ keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int domain, int flags, if (keg->uk_flags & UMA_ZONE_NODUMP) aflags |= M_NODUMP; + if (keg->uk_flags & UMA_ZONE_NOFREE) + aflags |= M_NEVERFREED; + /* zone is passed for legacy reasons. */ size = keg->uk_ppera * PAGE_SIZE; mem = keg->uk_allocf(zone, size, domain, &sflags, aflags);
git: 92b9138991dd - main - vm: Introduce VM_ALLOC_NOFREE and PG_NOFREE
The branch main has been updated by bnovkov: URL: https://cgit.FreeBSD.org/src/commit/?id=92b9138991dd2829ac744592cb9f9f3415be146c commit 92b9138991dd2829ac744592cb9f9f3415be146c Author: Bojan Novković AuthorDate: 2024-07-14 13:13:56 + Commit: Bojan Novković CommitDate: 2024-07-30 15:38:24 + vm: Introduce VM_ALLOC_NOFREE and PG_NOFREE This patch adds two additional vm_page flags to distinguish pages that never get released while the system is running (e.g. UMA_ZONE_NOFREE slabs). Differential Revision: https://reviews.freebsd.org/D45970 Reviewed by:alc, kib, markj Tested by: alc --- sys/vm/vm_page.c | 13 ++--- sys/vm/vm_page.h | 5 - 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c index 64413ba10bfa..3b6b88e4eb32 100644 --- a/sys/vm/vm_page.c +++ b/sys/vm/vm_page.c @@ -2082,7 +2082,8 @@ vm_page_alloc_domain_after(vm_object_t object, vm_pindex_t pindex, int domain, #defineVPA_FLAGS (VM_ALLOC_CLASS_MASK | VM_ALLOC_WAITFAIL | \ VM_ALLOC_NOWAIT | VM_ALLOC_NOBUSY |\ VM_ALLOC_SBUSY | VM_ALLOC_WIRED | \ -VM_ALLOC_NODUMP | VM_ALLOC_ZERO | VM_ALLOC_COUNT_MASK) +VM_ALLOC_NODUMP | VM_ALLOC_ZERO | \ +VM_ALLOC_NOFREE | VM_ALLOC_COUNT_MASK) KASSERT((req & ~VPA_FLAGS) == 0, ("invalid request %#x", req)); KASSERT(((req & (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)) != @@ -2154,6 +2155,8 @@ found: flags |= m->flags & PG_ZERO; if ((req & VM_ALLOC_NODUMP) != 0) flags |= PG_NODUMP; + if ((req & VM_ALLOC_NOFREE) != 0) + flags |= PG_NOFREE; m->flags = flags; m->a.flags = 0; m->oflags = (object->flags & OBJ_UNMANAGED) != 0 ? VPO_UNMANAGED : 0; @@ -2418,11 +2421,13 @@ vm_page_alloc_noobj_domain(int domain, int req) #defineVPAN_FLAGS (VM_ALLOC_CLASS_MASK | VM_ALLOC_WAITFAIL | \ VM_ALLOC_NOWAIT | VM_ALLOC_WAITOK |\ VM_ALLOC_NOBUSY | VM_ALLOC_WIRED | \ -VM_ALLOC_NODUMP | VM_ALLOC_ZERO | VM_ALLOC_COUNT_MASK) +VM_ALLOC_NODUMP | VM_ALLOC_ZERO | \ +VM_ALLOC_NOFREE | VM_ALLOC_COUNT_MASK) KASSERT((req & ~VPAN_FLAGS) == 0, ("invalid request %#x", req)); - flags = (req & VM_ALLOC_NODUMP) != 0 ? PG_NODUMP : 0; + flags = ((req & VM_ALLOC_NODUMP) != 0 ? PG_NODUMP : 0) | + ((req & VM_ALLOC_NOFREE) != 0 ? PG_NOFREE : 0); vmd = VM_DOMAIN(domain); again: if (vmd->vmd_pgcache[VM_FREEPOOL_DIRECT].zone != NULL) { @@ -3937,6 +3942,8 @@ vm_page_free_prep(vm_page_t m) m, i, (uintmax_t)*p)); } #endif + KASSERT((m->flags & PG_NOFREE) == 0, + ("%s: attempting to free a PG_NOFREE page", __func__)); if ((m->oflags & VPO_UNMANAGED) == 0) { KASSERT(!pmap_page_is_mapped(m), ("vm_page_free_prep: freeing mapped page %p", m)); diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h index 61a0228273c2..07a6c98c8ee8 100644 --- a/sys/vm/vm_page.h +++ b/sys/vm/vm_page.h @@ -457,6 +457,7 @@ extern struct mtx_padalign pa_lock[]; #definePG_ZERO 0x04/* page is zeroed */ #definePG_MARKER 0x08/* special queue marker page */ #definePG_NODUMP 0x10/* don't include this page in a dump */ +#definePG_NOFREE 0x20/* page should never be freed. */ /* * Misc constants. @@ -537,7 +538,7 @@ vm_page_t PHYS_TO_VM_PAGE(vm_paddr_t pa); #defineVM_ALLOC_WIRED 0x0020 /* (acgnp) Allocate a wired page */ #defineVM_ALLOC_ZERO 0x0040 /* (acgnp) Allocate a zeroed page */ #defineVM_ALLOC_NORECLAIM 0x0080 /* (c) Do not reclaim after failure */ -#defineVM_ALLOC_AVAIL0 0x0100 +#defineVM_ALLOC_NOFREE 0x0100 /* (an) Page will never be released */ #defineVM_ALLOC_NOBUSY 0x0200 /* (acgp) Do not excl busy the page */ #defineVM_ALLOC_NOCREAT0x0400 /* (gp) Don't create a page */ #defineVM_ALLOC_AVAIL1 0x0800 @@ -575,6 +576,8 @@ malloc2vm_flags(int malloc_flags) pflags |= VM_ALLOC_WAITOK; if ((malloc_flags & M_NORECLAIM)) pflags |= VM_ALLOC_NORECLAIM; + if ((malloc_flags & M_NEVERFREED)) + pflags |= VM_ALLOC_NOFREE; return (pflags); } #endif
git: a8693e89e3e4 - main - vm: Introduce vm_page_alloc_nofree_domain
The branch main has been updated by bnovkov: URL: https://cgit.FreeBSD.org/src/commit/?id=a8693e89e3e4a04efd02901cc93bb6148e3e40d6 commit a8693e89e3e4a04efd02901cc93bb6148e3e40d6 Author: Bojan Novković AuthorDate: 2024-07-14 13:14:22 + Commit: Bojan Novković CommitDate: 2024-07-30 15:38:24 + vm: Introduce vm_page_alloc_nofree_domain This patch adds a reservation-aware bump allocator intended for allocating NOFREE pages. The main goal of this change is to reduce the long-term fragmentation issues caused by pages that are never freed during runtime. The `vm_page_alloc_nofree_domain` routine hands out 0-order pages from a preallocated superpage. Once an active NOFREE superpage fills up, the routine will try to allocate a new one and discard the old one. This routine will get invoked whenever VM_ALLOC_NOFREE is passed to vm_page_alloc_noobj or vm_page_alloc. Differential Revision: https://reviews.freebsd.org/D45863 Reviewed by:alc, kib, markj Tested by: alc --- sys/vm/vm_page.c | 62 +++ sys/vm/vm_pagequeue.h | 4 2 files changed, 66 insertions(+) diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c index 3b6b88e4eb32..ff9df7f4a9fc 100644 --- a/sys/vm/vm_page.c +++ b/sys/vm/vm_page.c @@ -163,6 +163,7 @@ SYSCTL_PROC(_vm, OID_AUTO, page_blacklist, CTLTYPE_STRING | CTLFLAG_RD | static uma_zone_t fakepg_zone; static void vm_page_alloc_check(vm_page_t m); +static vm_page_t vm_page_alloc_nofree_domain(int domain, int req); static bool _vm_page_busy_sleep(vm_object_t obj, vm_page_t m, vm_pindex_t pindex, const char *wmesg, int allocflags, bool locked); static void vm_page_clear_dirty_mask(vm_page_t m, vm_page_bits_t pagebits); @@ -2099,6 +2100,11 @@ vm_page_alloc_domain_after(vm_object_t object, vm_pindex_t pindex, int domain, if (!vm_pager_can_alloc_page(object, pindex)) return (NULL); again: + if (__predict_false((req & VM_ALLOC_NOFREE) != 0)) { + m = vm_page_alloc_nofree_domain(domain, req); + if (m != NULL) + goto found; + } #if VM_NRESERVLEVEL > 0 /* * Can we allocate the page from a reservation? @@ -2430,6 +2436,12 @@ vm_page_alloc_noobj_domain(int domain, int req) ((req & VM_ALLOC_NOFREE) != 0 ? PG_NOFREE : 0); vmd = VM_DOMAIN(domain); again: + if (__predict_false((req & VM_ALLOC_NOFREE) != 0)) { + m = vm_page_alloc_nofree_domain(domain, req); + if (m != NULL) + goto found; + } + if (vmd->vmd_pgcache[VM_FREEPOOL_DIRECT].zone != NULL) { m = uma_zalloc(vmd->vmd_pgcache[VM_FREEPOOL_DIRECT].zone, M_NOWAIT | M_NOVM); @@ -2480,6 +2492,56 @@ found: return (m); } +#if VM_NRESERVLEVEL > 1 +#defineVM_NOFREE_IMPORT_ORDER (VM_LEVEL_1_ORDER + VM_LEVEL_0_ORDER) +#elif VM_NRESERVLEVEL > 0 +#defineVM_NOFREE_IMPORT_ORDER VM_LEVEL_0_ORDER +#else +#defineVM_NOFREE_IMPORT_ORDER 8 +#endif + +/* + * Allocate a single NOFREE page. + * + * This routine hands out NOFREE pages from higher-order + * physical memory blocks in order to reduce memory fragmentation. + * When a NOFREE for a given domain chunk is used up, + * the routine will try to fetch a new one from the freelists + * and discard the old one. + */ +static vm_page_t +vm_page_alloc_nofree_domain(int domain, int req) +{ + vm_page_t m; + struct vm_domain *vmd; + struct vm_nofreeq *nqp; + + KASSERT((req & VM_ALLOC_NOFREE) != 0, ("invalid request %#x", req)); + + vmd = VM_DOMAIN(domain); + nqp = &vmd->vmd_nofreeq; + vm_domain_free_lock(vmd); + if (nqp->offs >= (1 << VM_NOFREE_IMPORT_ORDER) || nqp->ma == NULL) { + if (!vm_domain_allocate(vmd, req, + 1 << VM_NOFREE_IMPORT_ORDER)) { + vm_domain_free_unlock(vmd); + return (NULL); + } + nqp->ma = vm_phys_alloc_pages(domain, VM_FREEPOOL_DEFAULT, + VM_LEVEL_0_ORDER); + if (nqp->ma == NULL) { + vm_domain_freecnt_inc(vmd, 1 << VM_NOFREE_IMPORT_ORDER); + vm_domain_free_unlock(vmd); + return (NULL); + } + nqp->offs = 0; + } + m = &nqp->ma[nqp->offs++]; + vm_domain_free_unlock(vmd); + + return (m); +} + vm_page_t vm_page_alloc_noobj(int req) { diff --git a/sys/vm/vm_pagequeue.h b/sys/vm/vm_pagequeue.h index 7e133ec947b4..86863a0a6400 100644 --- a/sys/vm/vm_pagequeue.h +++ b/sys/vm/vm_pagequeue.h @@ -246,6 +246,10 @@ struct vm_domain { u_int vmd_domain; /* (c) Domain number. */ u_int vmd_p
git: 3f32a7e4eee5 - main - vm: Add a KVA arena for M_NEVERFREED allocations
The branch main has been updated by bnovkov: URL: https://cgit.FreeBSD.org/src/commit/?id=3f32a7e4eee53d5565a4076e69a41d1afd803e0c commit 3f32a7e4eee53d5565a4076e69a41d1afd803e0c Author: Bojan Novković AuthorDate: 2024-07-16 14:14:30 + Commit: Bojan Novković CommitDate: 2024-07-30 15:38:24 + vm: Add a KVA arena for M_NEVERFREED allocations This patch adds a new KVA arena for separating M_NEVERFREED allocations. Separating KVAs for pages that are never freed should facilitate superpage promotion in the kernel. Differential Revision: https://reviews.freebsd.org/D45997 Reviewed by:alc, kib, markj Tested by: alc --- sys/vm/vm_kern.c | 21 - sys/vm/vm_pagequeue.h | 1 + 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/sys/vm/vm_kern.c b/sys/vm/vm_kern.c index a04044463fe2..fb7c80b767ed 100644 --- a/sys/vm/vm_kern.c +++ b/sys/vm/vm_kern.c @@ -473,10 +473,12 @@ kmem_malloc_domain(int domain, vm_size_t size, int flags) vm_size_t asize; int rv; - if (__predict_true((flags & M_EXEC) == 0)) + if (__predict_true((flags & (M_EXEC | M_NEVERFREED)) == 0)) arena = vm_dom[domain].vmd_kernel_arena; - else + else if ((flags & M_EXEC) != 0) arena = vm_dom[domain].vmd_kernel_rwx_arena; + else + arena = vm_dom[domain].vmd_kernel_nofree_arena; asize = round_page(size); if (vmem_alloc(arena, asize, flags | M_BESTFIT, &addr)) return (0); @@ -882,20 +884,29 @@ kmem_init(vm_offset_t start, vm_offset_t end) /* * In architectures with superpages, maintain separate arenas * for allocations with permissions that differ from the -* "standard" read/write permissions used for kernel memory, -* so as not to inhibit superpage promotion. +* "standard" read/write permissions used for kernel memory +* and pages that are never released, so as not to inhibit +* superpage promotion. * -* Use the base import quantum since this arena is rarely used. +* Use the base import quantum since these arenas are rarely +* used. */ #if VM_NRESERVLEVEL > 0 vm_dom[domain].vmd_kernel_rwx_arena = vmem_create( "kernel rwx arena domain", 0, 0, PAGE_SIZE, 0, M_WAITOK); + vm_dom[domain].vmd_kernel_nofree_arena = vmem_create( + "kernel NOFREE arena domain", 0, 0, PAGE_SIZE, 0, M_WAITOK); vmem_set_import(vm_dom[domain].vmd_kernel_rwx_arena, kva_import_domain, (vmem_release_t *)vmem_xfree, kernel_arena, KVA_QUANTUM); + vmem_set_import(vm_dom[domain].vmd_kernel_nofree_arena, + kva_import_domain, (vmem_release_t *)vmem_xfree, + kernel_arena, KVA_QUANTUM); #else vm_dom[domain].vmd_kernel_rwx_arena = vm_dom[domain].vmd_kernel_arena; + vm_dom[domain].vmd_kernel_nofree_arena = + vm_dom[domain].vmd_kernel_arena; #endif } diff --git a/sys/vm/vm_pagequeue.h b/sys/vm/vm_pagequeue.h index 86863a0a6400..af1183e63e53 100644 --- a/sys/vm/vm_pagequeue.h +++ b/sys/vm/vm_pagequeue.h @@ -243,6 +243,7 @@ struct vm_domain { } vmd_pgcache[VM_NFREEPOOL]; struct vmem *vmd_kernel_arena; /* (c) per-domain kva R/W arena. */ struct vmem *vmd_kernel_rwx_arena; /* (c) per-domain kva R/W/X arena. */ + struct vmem *vmd_kernel_nofree_arena; /* (c) per-domain kva NOFREE arena. */ u_int vmd_domain; /* (c) Domain number. */ u_int vmd_page_count; /* (c) Total page count. */ long vmd_segs; /* (c) bitmask of the segments */
git: ddc09a10eaa6 - main - pmap_growkernel: Use VM_ALLOC_NOFREE when allocating pagetable pages
The branch main has been updated by bnovkov: URL: https://cgit.FreeBSD.org/src/commit/?id=ddc09a10eaa66bbebeb691021bb2a9f934d33d58 commit ddc09a10eaa66bbebeb691021bb2a9f934d33d58 Author: Bojan Novković AuthorDate: 2024-07-16 15:12:25 + Commit: Bojan Novković CommitDate: 2024-07-30 15:38:24 + pmap_growkernel: Use VM_ALLOC_NOFREE when allocating pagetable pages This patch modifies pmap_growkernel in all pmaps to use VM_ALLOC_NOFREE when allocating new pagetable pages. This should help reduce longterm fragmentation as these pages are never released after they are allocated. Differential Revision: https://reviews.freebsd.org/D45998 Reviewed by:alc, markj, kib, mhorne Tested by: alc --- sys/amd64/amd64/pmap.c | 7 --- sys/arm/arm/pmap-v6.c | 2 +- sys/arm64/arm64/pmap.c | 6 +++--- sys/i386/i386/pmap.c| 4 ++-- sys/powerpc/aim/mmu_radix.c | 6 +++--- sys/riscv/riscv/pmap.c | 6 +++--- 6 files changed, 16 insertions(+), 15 deletions(-) diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index 778d07689ff0..cf0fc7184f56 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -5156,8 +5156,8 @@ pmap_growkernel(vm_offset_t addr) pdpe = pmap_pdpe(kernel_pmap, end); if ((*pdpe & X86_PG_V) == 0) { nkpg = pmap_alloc_pt_page(kernel_pmap, - pmap_pdpe_pindex(end), VM_ALLOC_WIRED | - VM_ALLOC_INTERRUPT | VM_ALLOC_ZERO); + pmap_pdpe_pindex(end), VM_ALLOC_INTERRUPT | + VM_ALLOC_NOFREE | VM_ALLOC_WIRED | VM_ALLOC_ZERO); if (nkpg == NULL) panic("pmap_growkernel: no memory to grow kernel"); paddr = VM_PAGE_TO_PHYS(nkpg); @@ -5176,7 +5176,8 @@ pmap_growkernel(vm_offset_t addr) } nkpg = pmap_alloc_pt_page(kernel_pmap, pmap_pde_pindex(end), - VM_ALLOC_WIRED | VM_ALLOC_INTERRUPT | VM_ALLOC_ZERO); + VM_ALLOC_INTERRUPT | VM_ALLOC_NOFREE | VM_ALLOC_WIRED | + VM_ALLOC_ZERO); if (nkpg == NULL) panic("pmap_growkernel: no memory to grow kernel"); paddr = VM_PAGE_TO_PHYS(nkpg); diff --git a/sys/arm/arm/pmap-v6.c b/sys/arm/arm/pmap-v6.c index de1082e7ae62..6cc78b187a9a 100644 --- a/sys/arm/arm/pmap-v6.c +++ b/sys/arm/arm/pmap-v6.c @@ -2067,7 +2067,7 @@ pmap_growkernel(vm_offset_t addr) * Install new PT2s page into kernel PT2TAB. */ m = vm_page_alloc_noobj(VM_ALLOC_INTERRUPT | - VM_ALLOC_WIRED | VM_ALLOC_ZERO); + VM_ALLOC_NOFREE | VM_ALLOC_WIRED | VM_ALLOC_ZERO); if (m == NULL) panic("%s: no memory to grow kernel", __func__); m->pindex = pte1_index(kernel_vm_end) & ~PT2PG_MASK; diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c index 59de6ef37f09..58795e25c82e 100644 --- a/sys/arm64/arm64/pmap.c +++ b/sys/arm64/arm64/pmap.c @@ -2959,7 +2959,7 @@ pmap_growkernel(vm_offset_t addr) if (pmap_load(l1) == 0) { /* We need a new PDP entry */ nkpg = vm_page_alloc_noobj(VM_ALLOC_INTERRUPT | - VM_ALLOC_WIRED | VM_ALLOC_ZERO); + VM_ALLOC_NOFREE | VM_ALLOC_WIRED | VM_ALLOC_ZERO); if (nkpg == NULL) panic("pmap_growkernel: no memory to grow kernel"); nkpg->pindex = kernel_vm_end >> L1_SHIFT; @@ -2978,8 +2978,8 @@ pmap_growkernel(vm_offset_t addr) continue; } - nkpg = vm_page_alloc_noobj(VM_ALLOC_INTERRUPT | VM_ALLOC_WIRED | - VM_ALLOC_ZERO); + nkpg = vm_page_alloc_noobj(VM_ALLOC_INTERRUPT | + VM_ALLOC_NOFREE | VM_ALLOC_WIRED | VM_ALLOC_ZERO); if (nkpg == NULL) panic("pmap_growkernel: no memory to grow kernel"); nkpg->pindex = kernel_vm_end >> L2_SHIFT; diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c index 5808c31a99af..57ba48d399c3 100644 --- a/sys/i386/i386/pmap.c +++ b/sys/i386/i386/pmap.c @@ -2264,8 +2264,8 @@ __CONCAT(PMTYPE, growkernel)(vm_offset_t addr) continue; } - nkpg = vm_page_alloc_noobj(VM_ALLOC_INTERRUPT | VM_ALLOC_WIRED | - VM_ALLOC_ZERO); + nkpg = vm_page_alloc_noobj(VM_ALLOC_INTERRUPT | + VM_ALLOC_NOFREE | VM_ALLOC_WIRED | VM_ALLOC_ZERO); if (nkpg ==
git: 1206cf04a717 - main - sys: Bump __FreeBSD_version to mark new malloc(9) flags
The branch main has been updated by bnovkov: URL: https://cgit.FreeBSD.org/src/commit/?id=1206cf04a717a55bc15255ed043b066941cb43f2 commit 1206cf04a717a55bc15255ed043b066941cb43f2 Author: Bojan Novković AuthorDate: 2024-07-30 14:41:22 + Commit: Bojan Novković CommitDate: 2024-07-30 15:38:25 + sys: Bump __FreeBSD_version to mark new malloc(9) flags This bump is meant to cover the addition of the M_NEVERFREED flag. --- sys/sys/param.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/sys/param.h b/sys/sys/param.h index 7461f7dec403..79d188c93230 100644 --- a/sys/sys/param.h +++ b/sys/sys/param.h @@ -73,7 +73,7 @@ * cannot include sys/param.h and should only be updated here. */ #undef __FreeBSD_version -#define __FreeBSD_version 1500022 +#define __FreeBSD_version 1500023 /* * __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD,
git: d9ce4c0b6617 - main - vm_page: Fix inconsistent use of VM_NOFREE_IMPORT_ORDER in vm_page_alloc_nofree_domain
The branch main has been updated by bnovkov: URL: https://cgit.FreeBSD.org/src/commit/?id=d9ce4c0b66170383a558b90ca835d31ee6d87927 commit d9ce4c0b66170383a558b90ca835d31ee6d87927 Author: Bojan Novković AuthorDate: 2024-07-30 17:17:18 + Commit: Bojan Novković CommitDate: 2024-07-30 17:25:37 + vm_page: Fix inconsistent use of VM_NOFREE_IMPORT_ORDER in vm_page_alloc_nofree_domain Pass VM_NOFREE_IMPORT_ORDER to vm_phys_alloc_pages instead of VM_LEVEL_0_ORDER when allocating a higher-order page for the NOFREE page allocator. Reported by:alc Fixes: a8693e8 --- sys/vm/vm_page.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c index ff9df7f4a9fc..0f41ea5a6bb5 100644 --- a/sys/vm/vm_page.c +++ b/sys/vm/vm_page.c @@ -2528,7 +2528,7 @@ vm_page_alloc_nofree_domain(int domain, int req) return (NULL); } nqp->ma = vm_phys_alloc_pages(domain, VM_FREEPOOL_DEFAULT, - VM_LEVEL_0_ORDER); + VM_NOFREE_IMPORT_ORDER); if (nqp->ma == NULL) { vm_domain_freecnt_inc(vmd, 1 << VM_NOFREE_IMPORT_ORDER); vm_domain_free_unlock(vmd);
git: 31cc65708c66 - main - man9: Document M_NEVERFREED and VM_ALLOC_NOFREE
The branch main has been updated by bnovkov: URL: https://cgit.FreeBSD.org/src/commit/?id=31cc65708c664c2a0257c26503d39ebc506f674e commit 31cc65708c664c2a0257c26503d39ebc506f674e Author: Bojan Novković AuthorDate: 2024-07-31 17:36:55 + Commit: Bojan Novković CommitDate: 2024-08-05 16:41:22 + man9: Document M_NEVERFREED and VM_ALLOC_NOFREE Reviewed by:alc, kib, markj Differential Revision: https://reviews.freebsd.org/D46198 --- share/man/man9/malloc.9| 11 ++- share/man/man9/vm_page_alloc.9 | 6 +- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/share/man/man9/malloc.9 b/share/man/man9/malloc.9 index 0ab5b2ed6e8d..d8759a255492 100644 --- a/share/man/man9/malloc.9 +++ b/share/man/man9/malloc.9 @@ -28,7 +28,7 @@ .\" .\" $NetBSD: malloc.9,v 1.3 1996/11/11 00:05:11 lukem Exp $ .\" -.Dd October 12, 2022 +.Dd August 4, 2024 .Dt MALLOC 9 .Os .Sh NAME @@ -269,6 +269,15 @@ This option should only be used in combination with .Dv M_NOWAIT when an allocation failure cannot be tolerated by the caller without catastrophic effects on the system. +.It Dv M_NEVERFREED +This is an internal flag used by the +.Xr UMA 9 +allocator and should not be used in regular +.Fn malloc +invocations. +See the description of VM_ALLOC_NOFREE in +.Xr vm_page_alloc 9 +for more details. .El .Pp Exactly one of either diff --git a/share/man/man9/vm_page_alloc.9 b/share/man/man9/vm_page_alloc.9 index 7d6cf1692bb1..4bf8db33a28d 100644 --- a/share/man/man9/vm_page_alloc.9 +++ b/share/man/man9/vm_page_alloc.9 @@ -28,7 +28,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH .\" DAMAGE. .\" -.Dd July 21, 2024 +.Dd August 4, 2024 .Dt VM_PAGE_ALLOC 9 .Os .Sh NAME @@ -307,6 +307,10 @@ pages will be allocated by the caller in the near future. must be no larger than 65535. If the system is short of free pages, this hint may cause the kernel to reclaim memory more aggressively than it would otherwise. +.It Dv VM_ALLOC_NOFREE +The caller asserts that the returned page will never be released. +If this flag is specified, the allocator will try to fetch a page from a +special per-domain arena in order to curb long-term physical memory fragmentation. .El .Sh RETURN VALUES If the allocation was successful, a pointer to the
git: 06134ea2f38c - main - malloc(9): Check for M_NEVERFREED
The branch main has been updated by bnovkov: URL: https://cgit.FreeBSD.org/src/commit/?id=06134ea2f38ca214b53a1613e110e8332b2804e4 commit 06134ea2f38ca214b53a1613e110e8332b2804e4 Author: Bojan Novković AuthorDate: 2024-07-31 17:43:31 + Commit: Bojan Novković CommitDate: 2024-08-05 16:44:10 + malloc(9): Check for M_NEVERFREED The recently introduced M_NEVERFREED flag is not meant to be used for regular malloc requests. Enforce this by checking for M_NEVERFREED in malloc_dbg. Reviewed by:alc, kib, markj Differential Revision: https://reviews.freebsd.org/D46199 --- sys/kern/kern_malloc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sys/kern/kern_malloc.c b/sys/kern/kern_malloc.c index 3c4cb63003c4..9d7e0464e0f7 100644 --- a/sys/kern/kern_malloc.c +++ b/sys/kern/kern_malloc.c @@ -542,6 +542,8 @@ malloc_dbg(caddr_t *vap, size_t *sizep, struct malloc_type *mtp, once++; } } + KASSERT((flags & M_NEVERFREED) == 0, + ("malloc: M_NEVERFREED is for internal use only")); #endif #ifdef MALLOC_MAKE_FAILURES if ((flags & M_NOWAIT) && (malloc_failure_rate != 0)) {
git: da76d349b6b1 - main - uma: Deduplicate uma_small_alloc
The branch main has been updated by bnovkov: URL: https://cgit.FreeBSD.org/src/commit/?id=da76d349b6b104f4e70562304c800a0793dea18d commit da76d349b6b104f4e70562304c800a0793dea18d Author: Bojan Novković AuthorDate: 2024-05-03 16:48:18 + Commit: Bojan Novković CommitDate: 2024-05-25 17:24:46 + uma: Deduplicate uma_small_alloc This commit refactors the UMA small alloc code and removes most UMA machine-dependent code. The existing machine-dependent uma_small_alloc code is almost identical across all architectures, except for powerpc where using the direct map addresses involved extra steps in some cases. The MI/MD split was replaced by a default uma_small_alloc implementation that can be overridden by architecture-specific code by defining the UMA_MD_SMALL_ALLOC symbol. Furthermore, UMA_USE_DMAP was introduced to replace most UMA_MD_SMALL_ALLOC uses. Reviewed by: markj, kib Approved by: markj (mentor) Differential Revision: https://reviews.freebsd.org/D45084 --- sys/amd64/amd64/uma_machdep.c | 71 --- sys/amd64/include/vmparam.h | 6 ++-- sys/arm64/arm64/uma_machdep.c | 69 - sys/arm64/include/vmparam.h | 2 +- sys/conf/files.amd64 | 1 - sys/conf/files.arm64 | 1 - sys/conf/files.riscv | 1 - sys/kern/subr_vmem.c | 6 ++-- sys/powerpc/include/vmparam.h | 6 ++-- sys/riscv/include/vmparam.h | 2 +- sys/riscv/riscv/uma_machdep.c | 68 - sys/vm/uma_core.c | 43 -- sys/vm/vm_map.c | 8 ++--- sys/vm/vm_radix.c | 2 +- 14 files changed, 57 insertions(+), 229 deletions(-) diff --git a/sys/amd64/amd64/uma_machdep.c b/sys/amd64/amd64/uma_machdep.c deleted file mode 100644 index f83f0674cc4e.. --- a/sys/amd64/amd64/uma_machdep.c +++ /dev/null @@ -1,71 +0,0 @@ -/*- - * SPDX-License-Identifier: BSD-2-Clause - * - * Copyright (c) 2003 Alan L. Cox - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - *notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - *notice, this list of conditions and the following disclaimer in the - *documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -void * -uma_small_alloc(uma_zone_t zone, vm_size_t bytes, int domain, u_int8_t *flags, -int wait) -{ - vm_page_t m; - vm_paddr_t pa; - void *va; - - *flags = UMA_SLAB_PRIV; - m = vm_page_alloc_noobj_domain(domain, malloc2vm_flags(wait) | - VM_ALLOC_WIRED); - if (m == NULL) - return (NULL); - pa = m->phys_addr; - if ((wait & M_NODUMP) == 0) - dump_add_page(pa); - va = (void *)PHYS_TO_DMAP(pa); - return (va); -} - -void -uma_small_free(void *mem, vm_size_t size, u_int8_t flags) -{ - vm_page_t m; - vm_paddr_t pa; - - pa = DMAP_TO_PHYS((vm_offset_t)mem); - dump_drop_page(pa); - m = PHYS_TO_VM_PAGE(pa); - vm_page_unwire_noq(m); - vm_page_free(m); -} diff --git a/sys/amd64/include/vmparam.h b/sys/amd64/include/vmparam.h index bff9bf840036..e5155a7c7d47 100644 --- a/sys/amd64/include/vmparam.h +++ b/sys/amd64/include/vmparam.h @@ -72,12 +72,12 @@ #endif /* - * We provide a machine specific single page allocator through the use - * of the direct mapped segment. This uses 2MB pages for reduced + * We provide a single page allocator through the use of the + * direct mapped segment. This uses 2MB pages for reduced * TLB pressure. */ #if !defined(KASAN) && !defined(KMSAN) -#defineUMA_MD_SMALL_ALLOC +#define UMA_USE_DMAP #endif /* dif
git: 0a44b8a56d23 - main - vm: Simplify startup page dumping conditional
The branch main has been updated by bnovkov: URL: https://cgit.FreeBSD.org/src/commit/?id=0a44b8a56d23e24b05471ddb038b7dd30b149efe commit 0a44b8a56d23e24b05471ddb038b7dd30b149efe Author: Bojan Novković AuthorDate: 2024-05-03 17:53:56 + Commit: Bojan Novković CommitDate: 2024-05-25 17:24:55 + vm: Simplify startup page dumping conditional This commit introduces the MINIDUMP_STARTUP_PAGE_TRACKING symbol and uses it to simplify several instances of a complex preprocessor conditional for adding pages allocated when bootstraping the kernel to minidumps. Reviewed by:markj, mhorne Approved by:markj (mentor) Differential Revision: https://reviews.freebsd.org/D45085 --- sys/amd64/include/vmparam.h | 3 ++- sys/arm/include/vmparam.h | 1 + sys/arm64/include/vmparam.h | 1 + sys/i386/include/vmparam.h| 1 + sys/powerpc/include/vmparam.h | 2 ++ sys/riscv/include/vmparam.h | 1 + sys/vm/uma_core.c | 6 ++ sys/vm/vm_page.c | 12 +--- 8 files changed, 15 insertions(+), 12 deletions(-) diff --git a/sys/amd64/include/vmparam.h b/sys/amd64/include/vmparam.h index e5155a7c7d47..93c2648e8fac 100644 --- a/sys/amd64/include/vmparam.h +++ b/sys/amd64/include/vmparam.h @@ -294,7 +294,8 @@ /* * Need a page dump array for minidump. */ -#define MINIDUMP_PAGE_TRACKING 1 +#define MINIDUMP_PAGE_TRACKING1 +#define MINIDUMP_STARTUP_PAGE_TRACKING 1 #endif /* _MACHINE_VMPARAM_H_ */ diff --git a/sys/arm/include/vmparam.h b/sys/arm/include/vmparam.h index 4ad42cfbe16b..15807923cefb 100644 --- a/sys/arm/include/vmparam.h +++ b/sys/arm/include/vmparam.h @@ -200,5 +200,6 @@ extern vm_offset_t vm_max_kernel_address; * Need a page dump array for minidump. */ #define MINIDUMP_PAGE_TRACKING 1 +#define MINIDUMP_STARTUP_PAGE_TRACKING 0 #endif /* _MACHINE_VMPARAM_H_ */ diff --git a/sys/arm64/include/vmparam.h b/sys/arm64/include/vmparam.h index 0dcd02d63938..0e93e4026d4a 100644 --- a/sys/arm64/include/vmparam.h +++ b/sys/arm64/include/vmparam.h @@ -317,6 +317,7 @@ extern vm_offset_t dmap_max_addr; * Need a page dump array for minidump. */ #define MINIDUMP_PAGE_TRACKING 1 +#define MINIDUMP_STARTUP_PAGE_TRACKING 1 #endif /* !_MACHINE_VMPARAM_H_ */ diff --git a/sys/i386/include/vmparam.h b/sys/i386/include/vmparam.h index 0d9734ae3830..6f8885a539c3 100644 --- a/sys/i386/include/vmparam.h +++ b/sys/i386/include/vmparam.h @@ -246,5 +246,6 @@ * Need a page dump array for minidump. */ #define MINIDUMP_PAGE_TRACKING 1 +#define MINIDUMP_STARTUP_PAGE_TRACKING 0 #endif /* _MACHINE_VMPARAM_H_ */ diff --git a/sys/powerpc/include/vmparam.h b/sys/powerpc/include/vmparam.h index 250da8298610..a612acbef239 100644 --- a/sys/powerpc/include/vmparam.h +++ b/sys/powerpc/include/vmparam.h @@ -308,11 +308,13 @@ externint vm_level_0_order; * Need a page dump array for minidump. */ #define MINIDUMP_PAGE_TRACKING 1 +#define MINIDUMP_STARTUP_PAGE_TRACKING 1 #else /* * No minidump with 32-bit powerpc. */ #define MINIDUMP_PAGE_TRACKING 0 +#define MINIDUMP_STARTUP_PAGE_TRACKING 0 #endif #definePMAP_HAS_DMAP (hw_direct_map) diff --git a/sys/riscv/include/vmparam.h b/sys/riscv/include/vmparam.h index 5711bc8c347e..5092977d0669 100644 --- a/sys/riscv/include/vmparam.h +++ b/sys/riscv/include/vmparam.h @@ -257,5 +257,6 @@ extern vm_offset_t init_pt_va; * Need a page dump array for minidump. */ #define MINIDUMP_PAGE_TRACKING 1 +#define MINIDUMP_STARTUP_PAGE_TRACKING 1 #endif /* !_MACHINE_VMPARAM_H_ */ diff --git a/sys/vm/uma_core.c b/sys/vm/uma_core.c index f9b6e18899c6..59066eb96ae9 100644 --- a/sys/vm/uma_core.c +++ b/sys/vm/uma_core.c @@ -1890,8 +1890,7 @@ startup_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *pflag, pa = VM_PAGE_TO_PHYS(m); for (i = 0; i < pages; i++, pa += PAGE_SIZE) { -#if defined(__aarch64__) || defined(__amd64__) || \ -defined(__riscv) || defined(__powerpc64__) +#if MINIDUMP_PAGE_TRACKING && MINIDUMP_STARTUP_PAGE_TRACKING if ((wait & M_NODUMP) == 0) dump_add_page(pa); #endif @@ -1918,8 +1917,7 @@ startup_free(void *mem, vm_size_t bytes) if (va >= bootstart && va + bytes <= bootmem) pmap_remove(kernel_pmap, va, va + bytes); for (; bytes != 0; bytes -= PAGE_SIZE, m++) { -#if defined(__aarch64__) || defined(__amd64__) || \ -defined(__riscv) || defined(__powerpc64__) +#if MINIDUMP_PAGE_TRACKING && MINIDUMP_STARTUP_PAGE_TRACKING dump_drop_page(VM_PAGE_TO_PHYS(m)); #endif vm_page_unwire_noq(m); diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c index 4429ec067673..9ba31cb9e1b3 100644 --- a/sys/vm/vm_page.c +++ b/sys/vm/vm_page.c @@ -620,11 +620,7 @@ vm_page_startup(vm_offset_t vaddr) vm_page_dump = (void *)(uintptr_t)pmap_map(&vaddr, new_end,
git: d25ed6504383 - main - uma: Fix improper uses of UMA_MD_SMALL_ALLOC
The branch main has been updated by bnovkov: URL: https://cgit.FreeBSD.org/src/commit/?id=d25ed6504383e15b7eb6d04876b70548fffc9690 commit d25ed6504383e15b7eb6d04876b70548fffc9690 Author: Bojan Novković AuthorDate: 2024-05-26 07:54:45 + Commit: Bojan Novković CommitDate: 2024-05-26 05:27:37 + uma: Fix improper uses of UMA_MD_SMALL_ALLOC UMA_MD_SMALL_ALLOC was recently replaced by UMA_USE_DMAP, but da76d349b6b1 missed some improper uses of the old symbol. This change makes sure that UMA_USE_DMAP is used properly in code that selects uma_small_alloc. Fixes: da76d349b6b1 Reported by: eduardo, rlibby Approved by: markj (mentor) Differential Revision: https://reviews.freebsd.org/D45368 --- sys/vm/uma_core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sys/vm/uma_core.c b/sys/vm/uma_core.c index 59066eb96ae9..516ac2c2965a 100644 --- a/sys/vm/uma_core.c +++ b/sys/vm/uma_core.c @@ -2523,7 +2523,7 @@ keg_ctor(void *mem, int size, void *udata, int flags) * If we haven't booted yet we need allocations to go through the * startup cache until the vm is ready. */ -#ifdef UMA_MD_SMALL_ALLOC +#ifdef UMA_USE_DMAP if (keg->uk_ppera == 1) keg->uk_allocf = uma_small_alloc; else @@ -2536,7 +2536,7 @@ keg_ctor(void *mem, int size, void *udata, int flags) keg->uk_allocf = contig_alloc; else keg->uk_allocf = page_alloc; -#ifdef UMA_MD_SMALL_ALLOC +#ifdef UMA_USE_DMAP if (keg->uk_ppera == 1) keg->uk_freef = uma_small_free; else @@ -5221,7 +5221,7 @@ uma_zone_reserve_kva(uma_zone_t zone, int count) keg->uk_kva = kva; keg->uk_offset = 0; zone->uz_max_items = pages * keg->uk_ipers; -#ifdef UMA_MD_SMALL_ALLOC +#ifdef UMA_USE_DMAP keg->uk_allocf = (keg->uk_ppera > 1) ? noobj_alloc : uma_small_alloc; #else keg->uk_allocf = noobj_alloc;
git: 4c053c17f2c8 - main - zfs: Update use of UMA-related symbols in arc_available_memory
The branch main has been updated by bnovkov: URL: https://cgit.FreeBSD.org/src/commit/?id=4c053c17f2c8a715988f215d16284879857ca376 commit 4c053c17f2c8a715988f215d16284879857ca376 Author: Bojan Novković AuthorDate: 2024-05-27 13:28:03 + Commit: Bojan Novković CommitDate: 2024-05-27 13:47:17 + zfs: Update use of UMA-related symbols in arc_available_memory da76d34 repurposed the use of UMA_MD_SMALL_ALLOC in a way that breaks arc_available_memory on -CURRENT. This change ensures that arc_available_memory uses the new symbol while maintaining compatibility with older FreeBSD releases. This change was submitted to upstream as well. Approved by:markj (mentor) Fixes: da76d34 --- sys/contrib/openzfs/module/os/freebsd/zfs/arc_os.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/arc_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/arc_os.c index 92696c0bf1ae..478b74828c65 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/arc_os.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/arc_os.c @@ -89,7 +89,7 @@ arc_available_memory(void) if (n < lowest) { lowest = n; } -#if defined(__i386) || !defined(UMA_MD_SMALL_ALLOC) +#if !defined(UMA_MD_SMALL_ALLOC) && !defined(UMA_USE_DMAP) /* * If we're on an i386 platform, it's possible that we'll exhaust the * kernel heap space before we ever run out of available physical
git: b53b21e8f81a - main - amd64 pmap: Release PTP reference on leaf ptpage allocation failure
The branch main has been updated by bnovkov: URL: https://cgit.FreeBSD.org/src/commit/?id=b53b21e8f81a8d2d233b99cee6426c2f64765a3c commit b53b21e8f81a8d2d233b99cee6426c2f64765a3c Author: Bojan Novković AuthorDate: 2024-06-13 15:58:49 + Commit: Bojan Novković CommitDate: 2024-06-16 16:19:26 + amd64 pmap: Release PTP reference on leaf ptpage allocation failure aa3bcaa fixed an edge case invloving mlock() and superpage creation by creating and inserting a leaf pagetable page for mlock'd superpages. However, the code does not properly release the reference to the pagetable page in the error handling path. This commit fixes the issue by adding calls to 'pmap_abort_ptp' in the error handling path. Reported by: alc Approved by: markj (mentor) Fixes: aa3bcaa Differential Revision: https://reviews.freebsd.org/D45577 --- sys/amd64/amd64/pmap.c | 5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index 4d4ecc8ea4e2..dee208fc9145 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -7595,10 +7595,13 @@ pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t newpde, u_int flags, if ((newpde & PG_W) != 0 && pmap != kernel_pmap) { uwptpg = pmap_alloc_pt_page(pmap, pmap_pde_pindex(va), VM_ALLOC_WIRED); - if (uwptpg == NULL) + if (uwptpg == NULL) { + pmap_abort_ptp(pmap, va, pdpg); return (KERN_RESOURCE_SHORTAGE); + } if (pmap_insert_pt_page(pmap, uwptpg, true, false)) { pmap_free_pt_page(pmap, uwptpg, false); + pmap_abort_ptp(pmap, va, pdpg); return (KERN_RESOURCE_SHORTAGE); }
git: 5d4545a2270e - main - arm64 pmap: Release PTP reference on leaf ptpage allocation failure
The branch main has been updated by bnovkov: URL: https://cgit.FreeBSD.org/src/commit/?id=5d4545a2270e9d6c37c0a580c010c579ccdfa129 commit 5d4545a2270e9d6c37c0a580c010c579ccdfa129 Author: Bojan Novković AuthorDate: 2024-06-13 15:59:29 + Commit: Bojan Novković CommitDate: 2024-06-16 16:19:26 + arm64 pmap: Release PTP reference on leaf ptpage allocation failure 808f5ac fixed an edge case invloving mlock() and superpage creation by creating and inserting a leaf pagetable page for mlock'd superpages. However, the code does not properly release the reference to the pagetable page in the error handling path. This commit fixes the issue by adding calls to 'pmap_abort_ptp' in the error handling path. Reported by: alc Approved by: markj (mentor) Fixes: 808f5ac Differential Revision: https://reviews.freebsd.org/D45578 --- sys/arm64/arm64/pmap.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c index 7b30b2a6ae37..e8991ae706f9 100644 --- a/sys/arm64/arm64/pmap.c +++ b/sys/arm64/arm64/pmap.c @@ -5545,12 +5545,14 @@ pmap_enter_l2(pmap_t pmap, vm_offset_t va, pd_entry_t new_l2, u_int flags, if ((new_l2 & ATTR_SW_WIRED) != 0 && pmap != kernel_pmap) { uwptpg = vm_page_alloc_noobj(VM_ALLOC_WIRED); if (uwptpg == NULL) { + pmap_abort_ptp(pmap, va, l2pg); return (KERN_RESOURCE_SHORTAGE); } uwptpg->pindex = pmap_l2_pindex(va); if (pmap_insert_pt_page(pmap, uwptpg, true, false)) { vm_page_unwire_noq(uwptpg); vm_page_free(uwptpg); + pmap_abort_ptp(pmap, va, l2pg); return (KERN_RESOURCE_SHORTAGE); } pmap_resident_count_inc(pmap, 1);
git: 200de4dc0716 - main - powerpc_mmu_radix: Introduce 'pmap_abort_ptp'
The branch main has been updated by bnovkov: URL: https://cgit.FreeBSD.org/src/commit/?id=200de4dc0716ae2cc2b0995ccb6eee555d14c751 commit 200de4dc0716ae2cc2b0995ccb6eee555d14c751 Author: Bojan Novković AuthorDate: 2024-06-13 16:03:31 + Commit: Bojan Novković CommitDate: 2024-06-16 16:19:26 + powerpc_mmu_radix: Introduce 'pmap_abort_ptp' This commit moves code for releasing pagetable page references into a separate function. No functional change intended. Approved by: markj (mentor) Differential Revision: https://reviews.freebsd.org/D45581 --- sys/powerpc/aim/mmu_radix.c | 34 +++--- 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/sys/powerpc/aim/mmu_radix.c b/sys/powerpc/aim/mmu_radix.c index 0a534e3f9d3f..746b1ef49a99 100644 --- a/sys/powerpc/aim/mmu_radix.c +++ b/sys/powerpc/aim/mmu_radix.c @@ -3137,6 +3137,28 @@ out: return (rv); } +/* + * Release a page table page reference after a failed attempt to create a + * mapping. + */ +static void +pmap_abort_ptp(pmap_t pmap, vm_offset_t va, vm_page_t pdpg) +{ + struct spglist free; + + SLIST_INIT(&free); + if (pmap_unwire_ptp(pmap, va, pdpg, &free)) { + /* +* Although "va" is not mapped, paging- +* structure caches could nonetheless have +* entries that refer to the freed page table +* pages. Invalidate those entries. +*/ + pmap_invalidate_page(pmap, va); + vm_page_free_pages_toq(&free, true); + } +} + /* * Tries to create a read- and/or execute-only 2MB page mapping. Returns true * if successful. Returns false if (1) a page table page cannot be allocated @@ -3264,17 +3286,7 @@ pmap_enter_l3e(pmap_t pmap, vm_offset_t va, pml3_entry_t newpde, u_int flags, * Abort this mapping if its PV entry could not be created. */ if (!pmap_pv_insert_l3e(pmap, va, newpde, flags, lockp)) { - SLIST_INIT(&free); - if (pmap_unwire_ptp(pmap, va, pdpg, &free)) { - /* -* Although "va" is not mapped, paging- -* structure caches could nonetheless have -* entries that refer to the freed page table -* pages. Invalidate those entries. -*/ - pmap_invalidate_page(pmap, va); - vm_page_free_pages_toq(&free, true); - } + pmap_abort_ptp(pmap, va, pdpg); if (uwptpg != NULL) { mt = pmap_remove_pt_page(pmap, va); KASSERT(mt == uwptpg,
git: 858ead4bcefb - main - powerpc_mmu_radix: Release PTP reference on leaf ptpage allocation failure
The branch main has been updated by bnovkov: URL: https://cgit.FreeBSD.org/src/commit/?id=858ead4bcefb4657629cba29b0e4507db509ee36 commit 858ead4bcefb4657629cba29b0e4507db509ee36 Author: Bojan Novković AuthorDate: 2024-06-13 16:11:12 + Commit: Bojan Novković CommitDate: 2024-06-16 16:19:27 + powerpc_mmu_radix: Release PTP reference on leaf ptpage allocation failure 0013741 fixed an edge case invloving mlock() and superpage creation by creating and inserting a leaf pagetable page for mlock'd superpages. However, the code does not properly release the reference to the pagetable page in the error handling path. This commit fixes the issue by adding calls to 'pmap_abort_ptp' in the error handling path. Reported by: alc Approved by: markj (mentor) Fixes: 0013741 Differential Revision: https://reviews.freebsd.org/D45582 --- sys/powerpc/aim/mmu_radix.c | 5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sys/powerpc/aim/mmu_radix.c b/sys/powerpc/aim/mmu_radix.c index 746b1ef49a99..ae6e4d116e87 100644 --- a/sys/powerpc/aim/mmu_radix.c +++ b/sys/powerpc/aim/mmu_radix.c @@ -3268,12 +3268,15 @@ pmap_enter_l3e(pmap_t pmap, vm_offset_t va, pml3_entry_t newpde, u_int flags, uwptpg = NULL; if ((newpde & PG_W) != 0 && pmap != kernel_pmap) { uwptpg = vm_page_alloc_noobj(VM_ALLOC_WIRED); - if (uwptpg == NULL) + if (uwptpg == NULL) { + pmap_abort_ptp(pmap, va, pdpg); return (KERN_RESOURCE_SHORTAGE); + } uwptpg->pindex = pmap_l3e_pindex(va); if (pmap_insert_pt_page(pmap, uwptpg)) { vm_page_unwire_noq(uwptpg); vm_page_free(uwptpg); + pmap_abort_ptp(pmap, va, pdpg); return (KERN_RESOURCE_SHORTAGE); } pmap_resident_count_inc(pmap, 1);
git: e8816b4b66ad - main - riscv pmap: Introduce 'pmap_abort_ptp'
The branch main has been updated by bnovkov: URL: https://cgit.FreeBSD.org/src/commit/?id=e8816b4b66adf2e6052803cd0eb609ee63fbb3ed commit e8816b4b66adf2e6052803cd0eb609ee63fbb3ed Author: Bojan Novković AuthorDate: 2024-06-13 16:13:53 + Commit: Bojan Novković CommitDate: 2024-06-16 16:19:27 + riscv pmap: Introduce 'pmap_abort_ptp' This commit moves code for releasing pagetable page references into a separate function. No functional change intended. Approved by: markj (mentor) Differential Revision: https://reviews.freebsd.org/D45579 --- sys/riscv/riscv/pmap.c | 34 +++--- 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/sys/riscv/riscv/pmap.c b/sys/riscv/riscv/pmap.c index 1902f1f4009b..4f6305ed651d 100644 --- a/sys/riscv/riscv/pmap.c +++ b/sys/riscv/riscv/pmap.c @@ -3137,6 +3137,28 @@ out: return (rv); } +/* + * Release a page table page reference after a failed attempt to create a + * mapping. + */ +static void +pmap_abort_ptp(pmap_t pmap, vm_offset_t va, vm_page_t l2pg) +{ + struct spglist free; + + SLIST_INIT(&free); + if (pmap_unwire_ptp(pmap, va, l2pg, &free)) { + /* +* Although "va" is not mapped, paging-structure +* caches could nonetheless have entries that +* refer to the freed page table pages. +* Invalidate those entries. +*/ + pmap_invalidate_page(pmap, va); + vm_page_free_pages_toq(&free, true); + } +} + /* * Tries to create a read- and/or execute-only 2MB page mapping. Returns * KERN_SUCCESS if the mapping was created. Otherwise, returns an error @@ -3285,17 +3307,7 @@ pmap_enter_l2(pmap_t pmap, vm_offset_t va, pd_entry_t new_l2, u_int flags, * Abort this mapping if its PV entry could not be created. */ if (!pmap_pv_insert_l2(pmap, va, new_l2, flags, lockp)) { - SLIST_INIT(&free); - if (pmap_unwire_ptp(pmap, va, l2pg, &free)) { - /* -* Although "va" is not mapped, paging-structure -* caches could nonetheless have entries that -* refer to the freed page table pages. -* Invalidate those entries. -*/ - pmap_invalidate_page(pmap, va); - vm_page_free_pages_toq(&free, true); - } + pmap_abort_ptp(pmap, va, l2pg); if (uwptpg != NULL) { mt = pmap_remove_pt_page(pmap, va); KASSERT(mt == uwptpg,
git: 774549fe06ac - main - riscv pmap: Release PTP reference on leaf ptpage allocation failure
The branch main has been updated by bnovkov: URL: https://cgit.FreeBSD.org/src/commit/?id=774549fe06ac0f45a5a5a661a7fb4107a8695d4c commit 774549fe06ac0f45a5a5a661a7fb4107a8695d4c Author: Bojan Novković AuthorDate: 2024-06-13 16:14:21 + Commit: Bojan Novković CommitDate: 2024-06-16 16:19:27 + riscv pmap: Release PTP reference on leaf ptpage allocation failure d0941ed fixed an edge case invloving mlock() and superpage creation by creating and inserting a leaf pagetable page for mlock'd superpages. However, the code does not properly release the reference to the pagetable page in the error handling path. This commit fixes the issue by adding calls to 'pmap_abort_ptp' in the error handling path. Reported by: alc Approved by: markj (mentor) Fixes: d0941ed Differential Revision: https://reviews.freebsd.org/D45580 --- sys/riscv/riscv/pmap.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sys/riscv/riscv/pmap.c b/sys/riscv/riscv/pmap.c index 4f6305ed651d..1e4061935ca0 100644 --- a/sys/riscv/riscv/pmap.c +++ b/sys/riscv/riscv/pmap.c @@ -3291,12 +3291,14 @@ pmap_enter_l2(pmap_t pmap, vm_offset_t va, pd_entry_t new_l2, u_int flags, if ((new_l2 & PTE_SW_WIRED) != 0 && pmap != kernel_pmap) { uwptpg = vm_page_alloc_noobj(VM_ALLOC_WIRED); if (uwptpg == NULL) { + pmap_abort_ptp(pmap, va, l2pg); return (KERN_RESOURCE_SHORTAGE); } uwptpg->pindex = pmap_l2_pindex(va); if (pmap_insert_pt_page(pmap, uwptpg, true, false)) { vm_page_unwire_noq(uwptpg); vm_page_free(uwptpg); + pmap_abort_ptp(pmap, va, l2pg); return (KERN_RESOURCE_SHORTAGE); } pmap_resident_count_inc(pmap, 1);
git: 7a79d0669761 - main - vm: improve kstack_object pindex calculation to avoid pindex holes
The branch main has been updated by bnovkov: URL: https://cgit.FreeBSD.org/src/commit/?id=7a79d066976149349ecb90240d02eed0c4268737 commit 7a79d066976149349ecb90240d02eed0c4268737 Author: Bojan Novković AuthorDate: 2024-04-09 19:02:12 + Commit: Bojan Novković CommitDate: 2024-04-10 15:37:20 + vm: improve kstack_object pindex calculation to avoid pindex holes This commit replaces the linear transformation of kernel virtual addresses to kstack_object pindex values with a non-linear scheme that circumvents physical memory fragmentation caused by kernel stack guard pages. The new mapping scheme is used to effectively "skip" guard pages and assign pindices for non-guard pages in a contiguous fashion. The new allocation scheme requires that all default-sized kstack KVAs come from a separate, specially aligned region of the KVA space. For this to work, this commited introduces a dedicated per-domain kstack KVA arena used to allocate kernel stacks of default size. The behaviour on 32-bit platforms remains unchanged due to a significatly smaller KVA space. Aside from fullfilling the requirements imposed by the new scheme, a separate kstack KVA arena facilitates superpage promotion in the rest of kernel and causes most kstacks to have guard pages at both ends. Reviewed by: alc, kib, markj Tested by:markj Approved by: markj (mentor) Differential Revision: https://reviews.freebsd.org/D38852 --- sys/sys/proc.h | 3 +- sys/vm/vm_extern.h | 6 +- sys/vm/vm_glue.c| 336 +--- sys/vm/vm_kern.h| 2 - sys/vm/vm_swapout.c | 29 +++-- 5 files changed, 314 insertions(+), 62 deletions(-) diff --git a/sys/sys/proc.h b/sys/sys/proc.h index b08226c89dfd..fa4c7d2768f0 100644 --- a/sys/sys/proc.h +++ b/sys/sys/proc.h @@ -366,7 +366,8 @@ struct thread { struct callout td_slpcallout; /* (h) Callout for sleep. */ struct trapframe *td_frame; /* (k) */ vm_offset_t td_kstack; /* (a) Kernel VA of kstack. */ - int td_kstack_pages; /* (a) Size of the kstack. */ + u_short td_kstack_pages;/* (a) Size of the kstack. */ + u_short td_kstack_domain; /* (a) Domain backing kstack KVA. */ volatile u_int td_critnest;/* (k*) Critical section nest level. */ struct mdthread td_md; /* (k) Any machine-dependent fields. */ struct kaudit_record*td_ar; /* (k) Active audit record, if any. */ diff --git a/sys/vm/vm_extern.h b/sys/vm/vm_extern.h index 2e2bc18a0233..b50abab7380c 100644 --- a/sys/vm/vm_extern.h +++ b/sys/vm/vm_extern.h @@ -127,8 +127,10 @@ struct sf_buf *vm_imgact_map_page(vm_object_t object, vm_ooffset_t offset); void vm_imgact_unmap_page(struct sf_buf *sf); void vm_thread_dispose(struct thread *td); int vm_thread_new(struct thread *td, int pages); -void vm_thread_stack_back(struct domainset *ds, vm_offset_t kaddr, -vm_page_t ma[], int npages, int req_class); +vm_pindex_t vm_kstack_pindex(vm_offset_t ks, int npages); +vm_object_t vm_thread_kstack_size_to_obj(int npages); +int vm_thread_stack_back(vm_offset_t kaddr, vm_page_t ma[], int npages, +int req_class, int domain); u_int vm_active_count(void); u_int vm_inactive_count(void); u_int vm_laundry_count(void); diff --git a/sys/vm/vm_glue.c b/sys/vm/vm_glue.c index 100d31e7c4ec..4292a7533503 100644 --- a/sys/vm/vm_glue.c +++ b/sys/vm/vm_glue.c @@ -96,14 +96,23 @@ #include #include #include +#include #include #include #include #include #include +#include #include +#if VM_NRESERVLEVEL > 0 +#define KVA_KSTACK_QUANTUM_SHIFT (VM_LEVEL_0_ORDER + PAGE_SHIFT) +#else +#define KVA_KSTACK_QUANTUM_SHIFT (8 + PAGE_SHIFT) +#endif +#define KVA_KSTACK_QUANTUM (1ul << KVA_KSTACK_QUANTUM_SHIFT) + /* * MPSAFE * @@ -262,9 +271,11 @@ vm_sync_icache(vm_map_t map, vm_offset_t va, vm_offset_t sz) pmap_sync_icache(map->pmap, va, sz); } -vm_object_t kstack_object; +static vm_object_t kstack_object; +static vm_object_t kstack_alt_object; static uma_zone_t kstack_cache; static int kstack_cache_size; +static vmem_t *vmd_kstack_arena[MAXMEMDOM]; static int sysctl_kstack_cache_size(SYSCTL_HANDLER_ARGS) @@ -282,63 +293,218 @@ SYSCTL_PROC(_vm, OID_AUTO, kstack_cache_size, sysctl_kstack_cache_size, "IU", "Maximum number of cached kernel stacks"); /* - * Create the kernel stack (including pcb for i386) for a new thread. + * Allocate a virtual address range from a domain kstack arena, following + * the specified NUMA policy. */ static vm_offset_t -vm_thread_stack_create(struct domainset *ds, int pages) +vm_thread_alloc_kstack_kva(vm_size_t size, int domain) { - vm_page_t ma[KSTACK_MAX_PAGES]; - vm_offset_t ks; - int i; +#ifndef __ILP32__ + int rv; + v
git: 849599e28a87 - main - committers-src: Add bnovkov@ with markj@ and jhb@ as mentors
The branch main has been updated by bnovkov: URL: https://cgit.FreeBSD.org/src/commit/?id=849599e28a8708b137be2c8ed2c7ca114d0caf1e commit 849599e28a8708b137be2c8ed2c7ca114d0caf1e Author: Bojan Novković AuthorDate: 2024-02-19 15:55:31 + Commit: Bojan Novković CommitDate: 2024-02-19 15:55:31 + committers-src: Add bnovkov@ with markj@ and jhb@ as mentors Add mentorship information for bnovkov@. Approved by: markj (mentor) Differential Revision: https://reviews.freebsd.org/D43963 --- share/misc/committers-src.dot | 3 +++ 1 file changed, 3 insertions(+) diff --git a/share/misc/committers-src.dot b/share/misc/committers-src.dot index 6201c0c69894..d9fd06cd6597 100644 --- a/share/misc/committers-src.dot +++ b/share/misc/committers-src.dot @@ -131,6 +131,7 @@ bdrewery [label="Bryan Drewery\nbdrew...@freebsd.org\n2013/12/14"] benl [label="Ben Laurie\nb...@freebsd.org\n2011/05/18"] benno [label="Benno Rice\nbe...@freebsd.org\n2000/11/02"] bms [label="Bruce M Simpson\n...@freebsd.org\n2003/08/06"] +bnovkov [label="Bojan Novkovic\nbnov...@freebsd.org\n2024/02/13"] br [label="Ruslan Bukin\n...@freebsd.org\n2013/09/02"] brian [label="Brian Somers\nbr...@freebsd.org\n1996/12/16"] brooks [label="Brooks Davis\nbro...@freebsd.org\n2001/06/21"] @@ -636,6 +637,7 @@ jfv -> erj jhb -> arr jhb -> avg +jhb -> bnovkov jhb -> jch jhb -> jeff jhb -> kbyanc @@ -750,6 +752,7 @@ marcel -> marius marcel -> nwhitehorn marcel -> sjg +markj -> bnovkov markj -> cem markj -> christos markj -> dougm
git: c21bc6f3c242 - main - ddb: Add CTF-based pretty printing
The branch main has been updated by bnovkov: URL: https://cgit.FreeBSD.org/src/commit/?id=c21bc6f3c2425de74141bfee07b609bf65b5a6b3 commit c21bc6f3c2425de74141bfee07b609bf65b5a6b3 Author: Bojan Novković AuthorDate: 2024-03-22 03:01:34 + Commit: Bojan Novković CommitDate: 2024-03-22 03:03:33 + ddb: Add CTF-based pretty printing Add basic CTF support and a CTF-powered pretty-printer to ddb. The db_ctf.* files expose a basic interface for fetching type data for ELF symbols, interacting with the CTF string table, and translating type identifiers to type data. The db_pprint.c file uses those interfaces to implement a pretty-printer for all kernel ELF symbols. The pretty-printer works with symbol names and arbitrary addresses: pprint struct thread 0x8194ad90 Pretty-printing currently only works after the root filesystem gets mounted because the CTF info is not available during early boot. Differential Revision: https://reviews.freebsd.org/D37899 Approved by: markj (mentor) --- share/man/man4/ddb.4| 26 +++ sys/conf/files | 2 + sys/ddb/db_command.c| 1 + sys/ddb/db_ctf.c| 326 +++ sys/ddb/db_ctf.h| 64 +++ sys/ddb/db_pprint.c | 450 sys/ddb/ddb.h | 1 + sys/kern/kern_ctf.c | 40 + sys/kern/kern_linker.c | 68 +++- sys/kern/link_elf.c | 37 sys/kern/link_elf_obj.c | 14 ++ sys/kern/linker_if.m| 23 +++ sys/sys/linker.h| 3 + 13 files changed, 1054 insertions(+), 1 deletion(-) diff --git a/share/man/man4/ddb.4 b/share/man/man4/ddb.4 index 3648c9ca58cb..f3443cbac127 100644 --- a/share/man/man4/ddb.4 +++ b/share/man/man4/ddb.4 @@ -289,6 +289,32 @@ eax = xx ecx = yy .Ed .Pp +.It Ic pprint Ns Oo Li / Ns Cm d depth Oc Oo Ar name Oc +Pretty-print symbol specified by +.Ar name +using CTF debugging data. Works for all symbols exported by the kernel and loaded kernel modules. +.Pp +If the +.Cm d +modifier has been specified, contents of structs nested up to +.Ar depth +levels deep will also be included in the output. +.Ed +.Pp +.It Ic pprint struct Ns Oo Li / Ns Cm d depth Ic Oc Oo Ar name Oc Ns Op Ns Ar addr +Print memory at +.Ar addr +as struct +.Ar name Ns . +Works for all structs defined by the kernel and loaded kernel modules. +.Pp +If the +.Cm d +modifier has been specified, contents of structs nested up to +.Ar depth +levels deep will also be included in the output. +.Ed +.Pp .It Xo .Ic write Ns Op Li / Ns Cm bhl .Ar addr expr1 Op Ar expr2 ... diff --git a/sys/conf/files b/sys/conf/files index c902bcfdbd52..021829408c0f 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -718,12 +718,14 @@ ddb/db_access.c optional ddb ddb/db_break.c optional ddb ddb/db_capture.c optional ddb ddb/db_command.c optional ddb +ddb/db_ctf.c optional ddb ddb/db_examine.c optional ddb ddb/db_expr.c optional ddb ddb/db_input.c optional ddb ddb/db_lex.c optional ddb ddb/db_main.c optional ddb ddb/db_output.coptional ddb +ddb/db_pprint.coptional ddb ddb/db_print.c optional ddb ddb/db_ps.coptional ddb ddb/db_run.c optional ddb diff --git a/sys/ddb/db_command.c b/sys/ddb/db_command.c index 9d79e3b2a6d3..0c88d496f6b8 100644 --- a/sys/ddb/db_command.c +++ b/sys/ddb/db_command.c @@ -163,6 +163,7 @@ static struct db_command db_cmds[] = { DB_CMD("capture", db_capture_cmd, CS_OWN|DB_CMD_MEMSAFE), DB_CMD("textdump", db_textdump_cmd,CS_OWN|DB_CMD_MEMSAFE), DB_CMD("findstack", db_findstack_cmd, 0), + DB_CMD("pprint",db_pprint_cmd, CS_OWN), }; struct db_command_table db_cmd_table = LIST_HEAD_INITIALIZER(db_cmd_table); diff --git a/sys/ddb/db_ctf.c b/sys/ddb/db_ctf.c new file mode 100644 index ..03145064885c --- /dev/null +++ b/sys/ddb/db_ctf.c @@ -0,0 +1,326 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2023 Bojan Novković + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + *notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + *notice, this list of conditions and the following disclaimer in the + *documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS
git: 637e67e03290 - main - ddb: Drop obsolete -FreeBSD identifier from license
The branch main has been updated by bnovkov: URL: https://cgit.FreeBSD.org/src/commit/?id=637e67e0329058c86353dbe523740e34d8fefd11 commit 637e67e0329058c86353dbe523740e34d8fefd11 Author: Bojan Novković AuthorDate: 2024-03-28 19:32:52 + Commit: Bojan Novković CommitDate: 2024-03-28 19:32:52 + ddb: Drop obsolete -FreeBSD identifier from license Reported by:jrtc27 Fixes: c21bc6f3c242 ("ddb: Add CTF-based pretty printing") Approved by:markj (mentor) --- sys/ddb/db_ctf.c| 2 +- sys/ddb/db_ctf.h| 2 +- sys/ddb/db_pprint.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sys/ddb/db_ctf.c b/sys/ddb/db_ctf.c index 03145064885c..56a6086849e9 100644 --- a/sys/ddb/db_ctf.c +++ b/sys/ddb/db_ctf.c @@ -1,5 +1,5 @@ /*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2023 Bojan Novković * diff --git a/sys/ddb/db_ctf.h b/sys/ddb/db_ctf.h index 6da5f76b6cf6..c4c977cb8205 100644 --- a/sys/ddb/db_ctf.h +++ b/sys/ddb/db_ctf.h @@ -1,5 +1,5 @@ /*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2023 Bojan Novković * diff --git a/sys/ddb/db_pprint.c b/sys/ddb/db_pprint.c index aae3d698e8ec..8aa14550f068 100644 --- a/sys/ddb/db_pprint.c +++ b/sys/ddb/db_pprint.c @@ -1,5 +1,5 @@ /*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2022 Bojan Novković *
git: 722b8e3cb62b - main - Fix style nits in kern_linker.c
The branch main has been updated by bnovkov: URL: https://cgit.FreeBSD.org/src/commit/?id=722b8e3cb62bd3e43035527e08fe058d5046901d commit 722b8e3cb62bd3e43035527e08fe058d5046901d Author: Bojan Novković AuthorDate: 2024-03-28 19:36:30 + Commit: Bojan Novković CommitDate: 2024-03-28 19:36:30 + Fix style nits in kern_linker.c Reported by:jrtc27 Fixes: c21bc6f3c242 ("ddb: Add CTF-based pretty printing") Approved by:markj (mentor) --- sys/kern/link_elf.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sys/kern/link_elf.c b/sys/kern/link_elf.c index dddead849dc9..b08c19f3c018 100644 --- a/sys/kern/link_elf.c +++ b/sys/kern/link_elf.c @@ -147,7 +147,6 @@ static int link_elf_lookup_debug_symbol(linker_file_t, const char *, c_linker_sym_t *); static int link_elf_lookup_debug_symbol_ctf(linker_file_t lf, const char *name, c_linker_sym_t *sym, linker_ctf_t *lc); - static int link_elf_symbol_values(linker_file_t, c_linker_sym_t, linker_symval_t *); static int link_elf_debug_symbol_values(linker_file_t, c_linker_sym_t,
git: bdc903460be4 - main - kern_ctf.c: Don't print out warning messages unconditionally
The branch main has been updated by bnovkov: URL: https://cgit.FreeBSD.org/src/commit/?id=bdc903460be4b6a729c1b7cde55963730c68cec4 commit bdc903460be4b6a729c1b7cde55963730c68cec4 Author: Bojan Novković AuthorDate: 2024-03-29 19:17:19 + Commit: Bojan Novković CommitDate: 2024-03-29 19:32:18 + kern_ctf.c: Don't print out warning messages unconditionally The kernel CTF loading routines print various warnings when attempting to load CTF data from an ELF file. After the changes in c21bc6f3c242 those warnings are unnecessarily printed for each kernel module that was compiled without CTF data. The kernel linker already uses the bootverbose flag to conditionally print CTF loading errors. This patch alters kern_ctf.c routines to do the same. Reported by:alexan...@leidinger.net Approved by:markj (mentor) Fixes: c21bc6f3c242 ("ddb: Add CTF-based pretty printing") --- sys/kern/kern_ctf.c | 35 +++ 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/sys/kern/kern_ctf.c b/sys/kern/kern_ctf.c index b525c274f9e0..1087406ff82e 100644 --- a/sys/kern/kern_ctf.c +++ b/sys/kern/kern_ctf.c @@ -144,9 +144,12 @@ link_elf_ctf_get(linker_file_t lf, linker_ctf_t *lc) * .SUNW_ctf section containing the CTF data. */ if (hdr->e_shstrndx == 0 || shdr[hdr->e_shstrndx].sh_type != SHT_STRTAB) { - printf("%s(%d): module %s e_shstrndx is %d, sh_type is %d\n", - __func__, __LINE__, lf->pathname, hdr->e_shstrndx, - shdr[hdr->e_shstrndx].sh_type); + if (bootverbose) { + printf( + "%s(%d): module %s e_shstrndx is %d, sh_type is %d\n", + __func__, __LINE__, lf->pathname, hdr->e_shstrndx, + shdr[hdr->e_shstrndx].sh_type); + } error = EFTYPE; goto out; } @@ -167,8 +170,10 @@ link_elf_ctf_get(linker_file_t lf, linker_ctf_t *lc) /* Check if the CTF section wasn't found. */ if (i >= hdr->e_shnum) { - printf("%s(%d): module %s has no .SUNW_ctf section\n", - __func__, __LINE__, lf->pathname); + if (bootverbose) { + printf("%s(%d): module %s has no .SUNW_ctf section\n", + __func__, __LINE__, lf->pathname); + } error = EFTYPE; goto out; } @@ -181,17 +186,21 @@ link_elf_ctf_get(linker_file_t lf, linker_ctf_t *lc) /* Check the CTF magic number. */ if (cth.cth_magic != CTF_MAGIC) { - printf("%s(%d): module %s has invalid format\n", - __func__, __LINE__, lf->pathname); + if (bootverbose) { + printf("%s(%d): module %s has invalid format\n", + __func__, __LINE__, lf->pathname); + } error = EFTYPE; goto out; } if (cth.cth_version != CTF_VERSION_2 && cth.cth_version != CTF_VERSION_3) { - printf( - "%s(%d): module %s CTF format has unsupported version %d\n", - __func__, __LINE__, lf->pathname, cth.cth_version); + if (bootverbose) { + printf( + "%s(%d): module %s CTF format has unsupported version %d\n", + __func__, __LINE__, lf->pathname, cth.cth_version); + } error = EFTYPE; goto out; } @@ -250,8 +259,10 @@ link_elf_ctf_get(linker_file_t lf, linker_ctf_t *lc) ret = uncompress(ctftab + sizeof(cth), &destlen, raw + sizeof(cth), shdr[i].sh_size - sizeof(cth)); if (ret != Z_OK) { - printf("%s(%d): zlib uncompress returned %d\n", - __func__, __LINE__, ret); + if (bootverbose) { + printf("%s(%d): zlib uncompress returned %d\n", + __func__, __LINE__, ret); + } error = EIO; goto out; }
git: aada453dcbaa - main - ddb: Properly pretty-print non-labeled enum values
The branch main has been updated by bnovkov: URL: https://cgit.FreeBSD.org/src/commit/?id=aada453dcbaab1b8f7d50b66add5a38eb9e06cc3 commit aada453dcbaab1b8f7d50b66add5a38eb9e06cc3 Author: Bojan Novković AuthorDate: 2024-04-03 15:47:00 + Commit: Bojan Novković CommitDate: 2024-04-03 16:17:11 + ddb: Properly pretty-print non-labeled enum values The ddb pretty-printer currently does not print out enum values that are not labeled (e.g. X | Y). The enum printer was reworked to print non-labeled values. Reported by:jrtc27 Fixes: c21bc6f ("ddb: Add CTF-based pretty printing") Approved by:markj (mentor) --- sys/ddb/db_pprint.c | 11 ++- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/sys/ddb/db_pprint.c b/sys/ddb/db_pprint.c index 8aa14550f068..b4116372cf65 100644 --- a/sys/ddb/db_pprint.c +++ b/sys/ddb/db_pprint.c @@ -225,13 +225,14 @@ db_pprint_enum(db_addr_t addr, struct ctf_type_v3 *type, u_int depth) for (; ep < endp; ep++) { if (val == ep->cte_value) { valname = db_ctf_stroff_to_str(&sym_data, ep->cte_name); - if (valname != NULL) - db_printf("%s (0x%lx)", valname, (long)val); - else - db_printf("(0x%lx)", (long)val); - break; + if (valname != NULL) { + db_printf("%s (0x%lx)", valname, val); + break; + } } } + if (ep == endp) + db_printf("0x%lx", val); } /*
git: 872c4402af13 - main - ddb: Don't throw away qualifier when pretty-printing unnamed pointers
The branch main has been updated by bnovkov: URL: https://cgit.FreeBSD.org/src/commit/?id=872c4402af1325ed156d7f5ee2252dd36b016b5c commit 872c4402af1325ed156d7f5ee2252dd36b016b5c Author: Bojan Novković AuthorDate: 2024-04-03 15:55:13 + Commit: Bojan Novković CommitDate: 2024-04-03 16:17:54 + ddb: Don't throw away qualifier when pretty-printing unnamed pointers Reported by:jrtc27 Fixes: c21bc6f ("ddb: Add CTF-based pretty printing") Approved by:markj (mentor) --- sys/ddb/db_pprint.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/ddb/db_pprint.c b/sys/ddb/db_pprint.c index b4116372cf65..8e6759a03220 100644 --- a/sys/ddb/db_pprint.c +++ b/sys/ddb/db_pprint.c @@ -276,7 +276,7 @@ db_pprint_ptr(db_addr_t addr, struct ctf_type_v3 *type, u_int depth) if (name != NULL) db_printf("(%s%s *) 0x%lx", qual, name, (long)val); else - db_printf("0x%lx", (long)val); + db_printf("(%s *) 0x%lx", qual, (long)val); } }
git: a02f9685edd1 - main - vm_meter: Add counter for NOFREE pages
The branch main has been updated by bnovkov: URL: https://cgit.FreeBSD.org/src/commit/?id=a02f9685edd168ef51e2e6fd98f09c9b866fa9a9 commit a02f9685edd168ef51e2e6fd98f09c9b866fa9a9 Author: Bojan Novković AuthorDate: 2024-10-07 14:56:08 + Commit: Bojan Novković CommitDate: 2024-10-07 16:46:32 + vm_meter: Add counter for NOFREE pages This change adds a new counter that tracks the total number of permanently allocated pages. Differential Revision: https://reviews.freebsd.org/D46978 Reviewed by:alc, markj --- sys/sys/vmmeter.h | 8 sys/vm/vm_meter.c | 2 ++ sys/vm/vm_page.c | 1 + 3 files changed, 11 insertions(+) diff --git a/sys/sys/vmmeter.h b/sys/sys/vmmeter.h index 36321be22205..ac4d2f7e7c90 100644 --- a/sys/sys/vmmeter.h +++ b/sys/sys/vmmeter.h @@ -120,6 +120,7 @@ struct vmmeter { counter_u64_t v_rforkpages; /* (p) pages affected by rfork() */ counter_u64_t v_kthreadpages; /* (p) ... and by kernel fork() */ counter_u64_t v_wire_count; /* (p) pages wired down */ + counter_u64_t v_nofree_count; /* (p) permanently allocated pages */ #defineVM_METER_NCOUNTERS \ (offsetof(struct vmmeter, v_page_size) / sizeof(counter_u64_t)) /* @@ -174,6 +175,13 @@ vm_wire_count(void) return (VM_CNT_FETCH(v_wire_count)); } +static inline u_int +vm_nofree_count(void) +{ + + return (VM_CNT_FETCH(v_nofree_count)); +} + /* * Return TRUE if we are under our severe low-free-pages threshold * diff --git a/sys/vm/vm_meter.c b/sys/vm/vm_meter.c index 7348577fc3cb..faf4074ef0c6 100644 --- a/sys/vm/vm_meter.c +++ b/sys/vm/vm_meter.c @@ -90,6 +90,7 @@ struct vmmeter __read_mostly vm_cnt = { .v_rforkpages = EARLY_COUNTER, .v_kthreadpages = EARLY_COUNTER, .v_wire_count = EARLY_COUNTER, + .v_nofree_count = EARLY_COUNTER, }; u_long __exclusive_cache_line vm_user_wire_count; @@ -386,6 +387,7 @@ VM_STATS_UINT(v_free_target, "Pages desired free"); VM_STATS_UINT(v_free_min, "Minimum low-free-pages threshold"); VM_STATS_PROC(v_free_count, "Free pages", vm_free_count); VM_STATS_PROC(v_wire_count, "Wired pages", vm_wire_count); +VM_STATS_PROC(v_nofree_count, "Permanently allocated pages", vm_nofree_count); VM_STATS_PROC(v_active_count, "Active pages", vm_active_count); VM_STATS_UINT(v_inactive_target, "Desired inactive pages"); VM_STATS_PROC(v_inactive_count, "Inactive pages", vm_inactive_count); diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c index 6256472e0336..67a9c2119ab8 100644 --- a/sys/vm/vm_page.c +++ b/sys/vm/vm_page.c @@ -2594,6 +2594,7 @@ vm_page_alloc_nofree_domain(int domain, int req) } m = &nqp->ma[nqp->offs++]; vm_domain_free_unlock(vmd); + VM_CNT_ADD(v_nofree_count, 1); return (m); }
git: 596a36ddc478 - main - vmstat: Add NOFREE page count to -s report
The branch main has been updated by bnovkov: URL: https://cgit.FreeBSD.org/src/commit/?id=596a36ddc47889e1b3bde00566cef2d8695c2847 commit 596a36ddc47889e1b3bde00566cef2d8695c2847 Author: Bojan Novković AuthorDate: 2024-10-07 15:02:42 + Commit: Bojan Novković CommitDate: 2024-10-07 16:46:32 + vmstat: Add NOFREE page count to -s report This change adds the number of permanently allocated pages to the 'sum' structure report. Differential Revision: https://reviews.freebsd.org/D46980 Reviewed by:markj --- usr.bin/vmstat/vmstat.c | 4 1 file changed, 4 insertions(+) diff --git a/usr.bin/vmstat/vmstat.c b/usr.bin/vmstat/vmstat.c index 8c7790403786..8d6c40a4d2fd 100644 --- a/usr.bin/vmstat/vmstat.c +++ b/usr.bin/vmstat/vmstat.c @@ -142,6 +142,7 @@ static struct __vmmeter { u_int v_free_count; u_int v_wire_count; u_long v_user_wire_count; + u_int v_nofree_count; u_int v_active_count; u_int v_inactive_target; u_int v_inactive_count; @@ -558,6 +559,7 @@ fill_vmmeter(struct __vmmeter *vmmp) GET_VM_STATS(vm, v_free_count); GET_VM_STATS(vm, v_wire_count); GET_VM_STATS(vm, v_user_wire_count); + GET_VM_STATS(vm, v_nofree_count); GET_VM_STATS(vm, v_active_count); GET_VM_STATS(vm, v_inactive_target); GET_VM_STATS(vm, v_inactive_count); @@ -1004,6 +1006,8 @@ dosum(void) sum.v_wire_count); xo_emit("{:virtual-user-wired-pages/%9lu} {N:virtual user pages wired " "down}\n", sum.v_user_wire_count); + xo_emit("{:nofree-pages/%9u} {N:permanently allocated pages}\n", + sum.v_nofree_count); xo_emit("{:free-pages/%9u} {N:pages free}\n", sum.v_free_count); xo_emit("{:bytes-per-page/%9u} {N:bytes per page}\n", sum.v_page_size);
git: 149e1af6ae49 - main - vm_kern: Use VM_ALLOC_NOFREE when allocating 'zero_region' page
The branch main has been updated by bnovkov: URL: https://cgit.FreeBSD.org/src/commit/?id=149e1af6ae4936fac0a907d4c62d745c179b4dc5 commit 149e1af6ae4936fac0a907d4c62d745c179b4dc5 Author: Bojan Novković AuthorDate: 2024-10-05 15:05:40 + Commit: Bojan Novković CommitDate: 2024-10-05 15:05:40 + vm_kern: Use VM_ALLOC_NOFREE when allocating 'zero_region' page Allocate the 'zero_region' page using VM_ALLOC_NOFREE since it never gets released. Differential Revision: https://reviews.freebsd.org/D46885 Reviewed by:alc, markj, kib --- sys/vm/vm_kern.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sys/vm/vm_kern.c b/sys/vm/vm_kern.c index fb7c80b767ed..22776e2196b0 100644 --- a/sys/vm/vm_kern.c +++ b/sys/vm/vm_kern.c @@ -762,7 +762,8 @@ kmem_init_zero_region(void) * zeros, while not using much more physical resources. */ addr = kva_alloc(ZERO_REGION_SIZE); - m = vm_page_alloc_noobj(VM_ALLOC_WIRED | VM_ALLOC_ZERO); + m = vm_page_alloc_noobj(VM_ALLOC_WIRED | VM_ALLOC_ZERO | + VM_ALLOC_NOFREE); for (i = 0; i < ZERO_REGION_SIZE; i += PAGE_SIZE) pmap_qenter(addr + i, &m, 1); pmap_protect(kernel_pmap, addr, addr + ZERO_REGION_SIZE, VM_PROT_READ);
git: 29a6f8fd93c2 - main - vm: Use VM_ALLOC_NOFREE when allocating bogus_page
The branch main has been updated by bnovkov: URL: https://cgit.FreeBSD.org/src/commit/?id=29a6f8fd93c278f0c7d7b2248068bb0353027e4a commit 29a6f8fd93c278f0c7d7b2248068bb0353027e4a Author: Bojan Novković AuthorDate: 2024-09-22 13:31:45 + Commit: Bojan Novković CommitDate: 2024-09-22 14:02:53 + vm: Use VM_ALLOC_NOFREE when allocating bogus_page Allocate the 'bogus_page' page using VM_ALLOC_NOFREE since it never gets released. Differential Revision: https://reviews.freebsd.org/D46699 Reviewed by:alc, markj, kib --- sys/vm/vm_page.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c index ba32a9eb9e63..40108e8a9b0a 100644 --- a/sys/vm/vm_page.c +++ b/sys/vm/vm_page.c @@ -194,7 +194,7 @@ vm_page_init(void *dummy) fakepg_zone = uma_zcreate("fakepg", sizeof(struct vm_page), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); - bogus_page = vm_page_alloc_noobj(VM_ALLOC_WIRED); + bogus_page = vm_page_alloc_noobj(VM_ALLOC_WIRED | VM_ALLOC_NOFREE); } static int pgcache_zone_max_pcpu;
git: 51fda658baa3 - main - vmm: Properly handle writes spanning across two pages in vm_handle_db
The branch main has been updated by bnovkov: URL: https://cgit.FreeBSD.org/src/commit/?id=51fda658baa3f80c9778f3a9873fbf67df87119b commit 51fda658baa3f80c9778f3a9873fbf67df87119b Author: Bojan Novković AuthorDate: 2024-09-29 11:10:10 + Commit: Bojan Novković CommitDate: 2024-10-02 16:43:36 + vmm: Properly handle writes spanning across two pages in vm_handle_db The vm_handle_db function is responsible for writing correct status register values into memory when a guest VM is being single-stepped using the RFLAGS.TF mechanism. However, it currently does not properly handle an edge case where the resulting write spans across two pages. This commit fixes this by making vm_handle_db use two vm_copy_info structs. Security: HYP-09 Reviewed by:markj --- sys/amd64/vmm/vmm.c | 10 +- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c index a2c2b342bee4..5484d71cefd2 100644 --- a/sys/amd64/vmm/vmm.c +++ b/sys/amd64/vmm/vmm.c @@ -1795,7 +1795,7 @@ vm_handle_db(struct vcpu *vcpu, struct vm_exit *vme, bool *retu) int error, fault; uint64_t rsp; uint64_t rflags; - struct vm_copyinfo copyinfo; + struct vm_copyinfo copyinfo[2]; *retu = true; if (!vme->u.dbg.pushf_intercept || vme->u.dbg.tf_shadow_val != 0) { @@ -1804,21 +1804,21 @@ vm_handle_db(struct vcpu *vcpu, struct vm_exit *vme, bool *retu) vm_get_register(vcpu, VM_REG_GUEST_RSP, &rsp); error = vm_copy_setup(vcpu, &vme->u.dbg.paging, rsp, sizeof(uint64_t), - VM_PROT_RW, ©info, 1, &fault); + VM_PROT_RW, copyinfo, nitems(copyinfo), &fault); if (error != 0 || fault != 0) { *retu = false; return (EINVAL); } /* Read pushed rflags value from top of stack. */ - vm_copyin(©info, &rflags, sizeof(uint64_t)); + vm_copyin(copyinfo, &rflags, sizeof(uint64_t)); /* Clear TF bit. */ rflags &= ~(PSL_T); /* Write updated value back to memory. */ - vm_copyout(&rflags, ©info, sizeof(uint64_t)); - vm_copy_teardown(©info, 1); + vm_copyout(&rflags, copyinfo, sizeof(uint64_t)); + vm_copy_teardown(copyinfo, nitems(copyinfo)); return (0); }
git: b42b18fb24f5 - main - x86: Add definitions for XSAVE state component information
The branch main has been updated by bnovkov: URL: https://cgit.FreeBSD.org/src/commit/?id=b42b18fb24f58d9b3d8b60d6901e582d407521d6 commit b42b18fb24f58d9b3d8b60d6901e582d407521d6 Author: Bojan Novković AuthorDate: 2024-11-26 17:48:46 + Commit: Bojan Novković CommitDate: 2024-11-26 18:16:05 + x86: Add definitions for XSAVE state component information Reviewed by: kib --- sys/x86/include/specialreg.h | 8 1 file changed, 8 insertions(+) diff --git a/sys/x86/include/specialreg.h b/sys/x86/include/specialreg.h index 51c513b6a5ab..9dc30e31e540 100644 --- a/sys/x86/include/specialreg.h +++ b/sys/x86/include/specialreg.h @@ -386,6 +386,14 @@ #defineCPUID_EXTSTATE_XINUSE 0x0004 #defineCPUID_EXTSTATE_XSAVES 0x0008 +/* + * CPUID instruction 0xd Processor Extended State Enumeration + * Sub-leaf > 1 ecx info + */ +#defineCPUID_EXTSTATE_SUPERVISOR 0x0001 +#defineCPUID_EXTSTATE_ALIGNED 0x0002 +#defineCPUID_EXTSTATE_XFD_SUPPORTED0x0004 + /* * AMD extended function 8000_0007h ebx info */
git: e17e33f997d6 - main - sdhci: Refactor the generic FDT driver
The branch main has been updated by bnovkov: URL: https://cgit.FreeBSD.org/src/commit/?id=e17e33f997d63107e3a6859cfe3c19eba041b424 commit e17e33f997d63107e3a6859cfe3c19eba041b424 Author: Bojan Novković AuthorDate: 2025-01-18 19:02:51 + Commit: Bojan Novković CommitDate: 2025-02-07 09:06:07 + sdhci: Refactor the generic FDT driver This patch refactors the 'sdhci_fdt.c' driver by moving all vendor specific routines into separate files and making the base 'sdhci_fdt' driver subclassable. The goal is to make adding new FDT-based drivers easier and more maintainable. No functional change intended. Reviewed by:manu, imp Differential Revision: https://reviews.freebsd.org/D48527 --- sys/arm/rockchip/files.rk32xx | 1 + sys/arm64/conf/std.xilinx | 1 + sys/conf/files.arm64 | 3 + sys/dev/sdhci/sdhci_fdt.c | 273 +-- sys/dev/sdhci/sdhci_fdt.h | 66 + sys/dev/sdhci/sdhci_fdt_rockchip.c | 283 + sys/dev/sdhci/sdhci_fdt_xilinx.c | 115 +++ 7 files changed, 507 insertions(+), 235 deletions(-) diff --git a/sys/arm/rockchip/files.rk32xx b/sys/arm/rockchip/files.rk32xx index a9ca6cb1b5ae..7331b12a06ed 100644 --- a/sys/arm/rockchip/files.rk32xx +++ b/sys/arm/rockchip/files.rk32xx @@ -27,3 +27,4 @@ dev/iicbus/pmic/act8846_regulator.c standard dev/iicbus/pmic/fan53555.c standard dev/iicbus/rtc/hym8563.c standard dev/mmc/host/dwmmc_rockchip.c optionaldwmmc +dev/sdhci/sdhci_fdt_rockchip.c optionalsdhci diff --git a/sys/arm64/conf/std.xilinx b/sys/arm64/conf/std.xilinx index 50ebf5ade53b..2283616e8cdf 100644 --- a/sys/arm64/conf/std.xilinx +++ b/sys/arm64/conf/std.xilinx @@ -15,6 +15,7 @@ devicecgem# Cadence GEM Gigabit Ethernet device # MMC/SD/SDIO Card slot support device sdhci +device sdhci_xilinx # IICBUS device cdnc_i2c diff --git a/sys/conf/files.arm64 b/sys/conf/files.arm64 index 4b73ebd1e6db..43da6e757b1c 100644 --- a/sys/conf/files.arm64 +++ b/sys/conf/files.arm64 @@ -436,6 +436,9 @@ dev/sdhci/sdhci_xenon.c optional sdhci_xenon sdhci dev/sdhci/sdhci_xenon_acpi.c optional sdhci_xenon sdhci acpi dev/sdhci/sdhci_xenon_fdt.coptional sdhci_xenon sdhci fdt +dev/sdhci/sdhci_fdt_xilinx.c optional sdhci_xilinx sdhci fdt +dev/sdhci/sdhci_fdt_rockchip.c optional sdhci fdt soc_rockchip + dev/sram/mmio_sram.c optional fdt mmio_sram dev/sram/mmio_sram_if.moptional fdt mmio_sram diff --git a/sys/dev/sdhci/sdhci_fdt.c b/sys/dev/sdhci/sdhci_fdt.c index 1d8013ee7088..efc12b54e10f 100644 --- a/sys/dev/sdhci/sdhci_fdt.c +++ b/sys/dev/sdhci/sdhci_fdt.c @@ -47,106 +47,37 @@ #include #include -#include -#include #include #include +#include + +#include +#include #include #include #include #include +#include #include "mmcbr_if.h" #include "sdhci_if.h" #include "opt_mmccam.h" -#include "clkdev_if.h" -#include "syscon_if.h" - -#defineMAX_SLOTS 6 #defineSDHCI_FDT_ARMADA38X 1 #defineSDHCI_FDT_XLNX_ZY7 2 #defineSDHCI_FDT_QUALCOMM 3 -#defineSDHCI_FDT_RK33994 -#defineSDHCI_FDT_RK35685 -#defineSDHCI_FDT_XLNX_ZMP 6 - -#defineRK3399_GRF_EMMCCORE_CON00xf000 -#define RK3399_CORECFG_BASECLKFREQ 0xff00 -#define RK3399_CORECFG_TIMEOUTCLKUNIT (1 << 7) -#define RK3399_CORECFG_TUNINGCOUNT 0x3f -#defineRK3399_GRF_EMMCCORE_CON11 0xf02c -#define RK3399_CORECFG_CLOCKMULTIPLIER 0xff - -#defineRK3568_EMMC_HOST_CTRL 0x0508 -#defineRK3568_EMMC_EMMC_CTRL 0x052c -#defineRK3568_EMMC_ATCTRL 0x0540 -#defineRK3568_EMMC_DLL_CTRL0x0800 -#define DLL_CTRL_SRST 0x0001 -#define DLL_CTRL_START 0x0002 -#define DLL_CTRL_START_POINT_DEFAULT 0x0005 -#define DLL_CTRL_INCREMENT_DEFAULT 0x0200 - -#defineRK3568_EMMC_DLL_RXCLK 0x0804 -#define DLL_RXCLK_DELAY_ENABLE 0x0800 -#define DLL_RXCLK_NO_INV 0x2000 - -#defineRK3568_EMMC_DLL_TXCLK 0x0808 -#define DLL_TXCLK_DELAY_ENABLE 0x0800 -#define DLL_TXCLK_TAPNUM_DEFAULT 0x0008 -#define DLL_TXCLK_TAPN
git: 0c4fa0bdcf87 - main - x86: Add definitions for some Intel Processor Trace bits
The branch main has been updated by bnovkov: URL: https://cgit.FreeBSD.org/src/commit/?id=0c4fa0bdcf87bee66d749c7550da852717522bdf commit 0c4fa0bdcf87bee66d749c7550da852717522bdf Author: Bojan Novković AuthorDate: 2024-12-15 14:03:34 + Commit: Bojan Novković CommitDate: 2024-12-15 15:39:36 + x86: Add definitions for some Intel Processor Trace bits This patch adds definitions for Intel PT-related MSRs and several PT feature bits. Reviewed by:kib, markj Differential Revision: https://reviews.freebsd.org/D46419 --- sys/x86/include/specialreg.h | 9 + 1 file changed, 9 insertions(+) diff --git a/sys/x86/include/specialreg.h b/sys/x86/include/specialreg.h index 9dc30e31e540..e9dde5c3b46a 100644 --- a/sys/x86/include/specialreg.h +++ b/sys/x86/include/specialreg.h @@ -123,6 +123,7 @@ #defineXFEATURE_ENABLED_OPMASK 0x0020 #defineXFEATURE_ENABLED_ZMM_HI256 0x0040 #defineXFEATURE_ENABLED_HI16_ZMM 0x0080 +#defineXFEATURE_ENABLED_PT 0x0100 #defineXFEATURE_ENABLED_PKRU 0x0200 #defineXFEATURE_ENABLED_TILECONFIG 0x0002 #defineXFEATURE_ENABLED_TILEDATA 0x0004 @@ -213,6 +214,7 @@ #defineCPUPT_MTC (1 << 3)/* MTC Supported */ #defineCPUPT_PRW (1 << 4)/* PTWRITE Supported */ #defineCPUPT_PWR (1 << 5)/* Power Event Trace Supported */ +#defineCPUPT_DIS_TNT (1 << 8)/* TNT disable supported */ /* Leaf 0 ecx. */ #defineCPUPT_TOPA (1 << 0)/* ToPA Output Supported */ @@ -654,6 +656,12 @@ #defineMSR_PAT 0x277 #defineMSR_MC0_CTL20x280 #defineMSR_MTRRdefType 0x2ff +#defineMSR_IA_GLOBAL_STATUS0x38E +#defineMSR_IA_GLOBAL_CTRL 0x38F +#defineMSR_IA_GLOBAL_OVF_CTRL 0x390 +#defineMSR_IA_GLOBAL_STATUS_RESET 0x390 +#defineMSR_IA_GLOBAL_STATUS_SET0x391 +#define GLOBAL_STATUS_FLAG_TRACETOPAPMI(1ULL << 55) #defineMSR_MC0_CTL 0x400 #defineMSR_MC0_STATUS 0x401 #defineMSR_MC0_ADDR0x402 @@ -781,6 +789,7 @@ #define RTIT_CTL_ADDR2_CFG_M (0xfULL << RTIT_CTL_ADDR2_CFG_S) #define RTIT_CTL_ADDR3_CFG_S 44 #define RTIT_CTL_ADDR3_CFG_M (0xfULL << RTIT_CTL_ADDR3_CFG_S) +#defineRTIT_CTL_DIS_TNT(1ULL << 55) #defineMSR_IA32_RTIT_STATUS0x571 /* Tracing Status Register (R/W) */ #define RTIT_STATUS_FILTEREN (1 << 0) #define RTIT_STATUS_CONTEXTEN (1 << 1)
git: 04e832672159 - main - x86: Allow sharing of perfomance counter interrupts
The branch main has been updated by bnovkov: URL: https://cgit.FreeBSD.org/src/commit/?id=04e832672159cae412e8984e0b0cabfa6e7428b7 commit 04e832672159cae412e8984e0b0cabfa6e7428b7 Author: Bojan Novković AuthorDate: 2024-12-15 14:00:19 + Commit: Bojan Novković CommitDate: 2024-12-15 15:39:36 + x86: Allow sharing of perfomance counter interrupts This patch refactors the Performance Counter interrupt setup code to allow sharing the interrupt line between multiple drivers. More specifically, Performance Counter interrupts are used by both hwpmc(4) and hwt(4)'s upcoming Intel Processor Trace backend. Reviewed by:kib Differential Revision: https://reviews.freebsd.org/D46420 --- sys/dev/hwpmc/hwpmc_core.c | 4 ++-- sys/dev/hwpmc/hwpmc_x86.c | 4 ++-- sys/x86/include/apicvar.h | 6 +++--- sys/x86/x86/local_apic.c | 33 ++--- 4 files changed, 21 insertions(+), 26 deletions(-) diff --git a/sys/dev/hwpmc/hwpmc_core.c b/sys/dev/hwpmc/hwpmc_core.c index bf224ded126f..83784b93718e 100644 --- a/sys/dev/hwpmc/hwpmc_core.c +++ b/sys/dev/hwpmc/hwpmc_core.c @@ -1051,7 +1051,7 @@ core_intr(struct trapframe *tf) counter_u64_add(pmc_stats.pm_intr_ignored, 1); if (found_interrupt) - lapic_reenable_pmc(); + lapic_reenable_pcint(); return (found_interrupt); } @@ -1150,7 +1150,7 @@ core2_intr(struct trapframe *tf) counter_u64_add(pmc_stats.pm_intr_ignored, 1); if (found_interrupt) - lapic_reenable_pmc(); + lapic_reenable_pcint(); /* * Reenable all non-stalled PMCs. diff --git a/sys/dev/hwpmc/hwpmc_x86.c b/sys/dev/hwpmc/hwpmc_x86.c index 1d04a6610674..54cc919eec30 100644 --- a/sys/dev/hwpmc/hwpmc_x86.c +++ b/sys/dev/hwpmc/hwpmc_x86.c @@ -242,7 +242,7 @@ pmc_md_initialize(void) return (NULL); /* disallow sampling if we do not have an LAPIC */ - if (md != NULL && !lapic_enable_pmc()) + if (md != NULL && !lapic_enable_pcint()) for (i = 0; i < md->pmd_nclass; i++) { if (i == PMC_CLASS_INDEX_SOFT) continue; @@ -256,7 +256,7 @@ void pmc_md_finalize(struct pmc_mdep *md) { - lapic_disable_pmc(); + lapic_disable_pcint(); if (cpu_vendor_id == CPU_VENDOR_AMD || cpu_vendor_id == CPU_VENDOR_HYGON) pmc_amd_finalize(md); diff --git a/sys/x86/include/apicvar.h b/sys/x86/include/apicvar.h index fc9bb0123539..c537d0ee0cdd 100644 --- a/sys/x86/include/apicvar.h +++ b/sys/x86/include/apicvar.h @@ -231,9 +231,9 @@ voidapic_enable_vector(u_int apic_id, u_int vector); void apic_disable_vector(u_int apic_id, u_int vector); void apic_free_vector(u_int apic_id, u_int vector, u_int irq); void lapic_calibrate_timer(void); -intlapic_enable_pmc(void); -void lapic_disable_pmc(void); -void lapic_reenable_pmc(void); +intlapic_enable_pcint(void); +void lapic_disable_pcint(void); +void lapic_reenable_pcint(void); void lapic_enable_cmc(void); intlapic_enable_mca_elvt(void); void lapic_ipi_raw(register_t icrlo, u_int dest); diff --git a/sys/x86/x86/local_apic.c b/sys/x86/x86/local_apic.c index 6a913883cc5c..86cbe9a050dc 100644 --- a/sys/x86/x86/local_apic.c +++ b/sys/x86/x86/local_apic.c @@ -35,7 +35,6 @@ #include #include "opt_atpic.h" -#include "opt_hwpmc_hooks.h" #include "opt_ddb.h" @@ -50,6 +49,7 @@ #include #include #include +#include #include #include #include @@ -206,6 +206,7 @@ static uint64_t lapic_ipi_wait_mult; static int __read_mostly lapic_ds_idle_timeout = 100; #endif unsigned int max_apic_id; +static int pcint_refcnt = 0; SYSCTL_NODE(_hw, OID_AUTO, apic, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "APIC options"); @@ -809,20 +810,19 @@ lapic_intrcnt(void *dummy __unused) SYSINIT(lapic_intrcnt, SI_SUB_INTR, SI_ORDER_MIDDLE, lapic_intrcnt, NULL); void -lapic_reenable_pmc(void) +lapic_reenable_pcint(void) { -#ifdef HWPMC_HOOKS uint32_t value; + if (refcount_load(&pcint_refcnt) == 0) + return; value = lapic_read32(LAPIC_LVT_PCINT); value &= ~APIC_LVT_M; lapic_write32(LAPIC_LVT_PCINT, value); -#endif } -#ifdef HWPMC_HOOKS static void -lapic_update_pmc(void *dummy) +lapic_update_pcint(void *dummy) { struct lapic *la; @@ -830,7 +830,6 @@ lapic_update_pmc(void *dummy) lapic_write32(LAPIC_LVT_PCINT, lvt_mode(la, APIC_LVT_PMC, lapic_read32(LAPIC_LVT_PCINT))); } -#endif void lapic_calibrate_timer(void) @@ -858,9 +857,8 @@ lapic_calibrate_timer(void) } int -lapic_enable_pmc(void) +lapic_enable_pcint(void) { -#ifdef HWPMC_HOOKS u_int32_t maxlvt; #ifdef DEV_ATPIC @@ -873,21 +871,18 @@ lapic_enable_pmc(void)
git: d5ce54dddf49 - main - hwpmc_x86: Register interrupt handler using the dynamic NMI registration interface
The branch main has been updated by bnovkov: URL: https://cgit.FreeBSD.org/src/commit/?id=d5ce54dddf4927a2edd3e57ee67722dfccb567a8 commit d5ce54dddf4927a2edd3e57ee67722dfccb567a8 Author: Bojan Novković AuthorDate: 2024-12-15 14:06:58 + Commit: Bojan Novković CommitDate: 2024-12-15 15:39:36 + hwpmc_x86: Register interrupt handler using the dynamic NMI registration interface Register the PCINT handler using the nmi_{register, remove}_handler interfaces (introduced in D46421) in preparation for hwt(4)'s Intel Processor Trace backend. No functional change intended. Reviewed by:kib Differential Revision: https://reviews.freebsd.org/D47989 --- sys/dev/hwpmc/hwpmc_x86.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sys/dev/hwpmc/hwpmc_x86.c b/sys/dev/hwpmc/hwpmc_x86.c index 54cc919eec30..2c6c4cd148bf 100644 --- a/sys/dev/hwpmc/hwpmc_x86.c +++ b/sys/dev/hwpmc/hwpmc_x86.c @@ -248,6 +248,7 @@ pmc_md_initialize(void) continue; md->pmd_classdep[i].pcd_caps &= ~PMC_CAP_INTERRUPT; } + nmi_register_handler(md->pmd_intr); return (md); } @@ -257,6 +258,7 @@ pmc_md_finalize(struct pmc_mdep *md) { lapic_disable_pcint(); + nmi_remove_handler(md->pmd_intr); if (cpu_vendor_id == CPU_VENDOR_AMD || cpu_vendor_id == CPU_VENDOR_HYGON) pmc_amd_finalize(md);
git: 593e874e6124 - main - amd64: Add wrappers for XRSTORS and XSAVES
The branch main has been updated by bnovkov: URL: https://cgit.FreeBSD.org/src/commit/?id=593e874e61249c845ca83e9284e4d9061643e8fb commit 593e874e61249c845ca83e9284e4d9061643e8fb Author: Bojan Novković AuthorDate: 2024-12-15 14:02:13 + Commit: Bojan Novković CommitDate: 2024-12-15 15:39:36 + amd64: Add wrappers for XRSTORS and XSAVES Reviewed by:kib Differential Revision: https://reviews.freebsd.org/D46984 --- sys/amd64/include/cpufunc.h | 23 +++ 1 file changed, 23 insertions(+) diff --git a/sys/amd64/include/cpufunc.h b/sys/amd64/include/cpufunc.h index ca53d73b0186..d180f5c76afb 100644 --- a/sys/amd64/include/cpufunc.h +++ b/sys/amd64/include/cpufunc.h @@ -942,6 +942,29 @@ sgx_eremove(void *epc) return (sgx_encls(SGX_EREMOVE, 0, (uint64_t)epc, 0)); } +static __inline void +xrstors(uint8_t *save_area, uint64_t state_bitmap) +{ + uint32_t low, hi; + + low = state_bitmap; + hi = state_bitmap >> 32; + __asm __volatile("xrstors %0" : : "m"(*save_area), "a"(low), + "d"(hi)); +} + +static __inline void +xsaves(uint8_t *save_area, uint64_t state_bitmap) +{ + uint32_t low, hi; + + low = state_bitmap; + hi = state_bitmap >> 32; + __asm __volatile("xsaves %0" : "=m"(*save_area) : "a"(low), + "d"(hi) + : "memory"); +} + void reset_dbregs(void); #ifdef _KERNEL
git: 459dc427873c - main - x86: Refactor kernel-mode NMI handling
The branch main has been updated by bnovkov: URL: https://cgit.FreeBSD.org/src/commit/?id=459dc427873c9a294387ec74a96e6f7824de7435 commit 459dc427873c9a294387ec74a96e6f7824de7435 Author: Bojan Novković AuthorDate: 2024-12-15 13:56:40 + Commit: Bojan Novković CommitDate: 2024-12-15 15:39:36 + x86: Refactor kernel-mode NMI handling This refactor aims to add the ability to share performance counter interrupts by refactoring the kernel-mode NMI handler. The handler now allows multiple drivers to service the same interrupt (e.g. hwpmc(4) and hwt(4)'s Intel Processor Trace backend). Reviewed by:kib, avg Differential Revision: https://reviews.freebsd.org/D46421 --- sys/amd64/amd64/trap.c| 32 ++-- sys/i386/i386/trap.c | 26 +++-- sys/x86/include/x86_var.h | 4 +- sys/x86/x86/cpu_machdep.c | 95 +-- 4 files changed, 104 insertions(+), 53 deletions(-) diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c index 6ceeea41ea91..4590be501d64 100644 --- a/sys/amd64/amd64/trap.c +++ b/sys/amd64/amd64/trap.c @@ -230,38 +230,22 @@ trap(struct trapframe *frame) VM_CNT_INC(v_trap); type = frame->tf_trapno; -#ifdef SMP - /* Handler for NMI IPIs used for stopping CPUs. */ - if (type == T_NMI && ipi_nmi_handler() == 0) - return; -#endif - #ifdef KDB if (kdb_active) { kdb_reenter(); return; } #endif + if (type == T_NMI) { + nmi_handle_intr(frame); + return; + } if (type == T_RESERVED) { trap_fatal(frame, 0); return; } - if (type == T_NMI) { -#ifdef HWPMC_HOOKS - /* -* CPU PMCs interrupt using an NMI. If the PMC module is -* active, pass the 'rip' value to the PMC module's interrupt -* handler. A non-zero return value from the handler means that -* the NMI was consumed by it and we can return immediately. -*/ - if (pmc_intr != NULL && - (*pmc_intr)(frame) != 0) - return; -#endif - } - if ((frame->tf_rflags & PSL_I) == 0) { /* * Buggy application or kernel code has disabled @@ -392,10 +376,6 @@ trap(struct trapframe *frame) signo = SIGFPE; break; - case T_NMI: - nmi_handle_intr(type, frame); - return; - case T_OFLOW: /* integer overflow fault */ ucode = FPE_INTOVF; signo = SIGFPE; @@ -619,10 +599,6 @@ trap(struct trapframe *frame) return; #endif break; - - case T_NMI: - nmi_handle_intr(type, frame); - return; } trap_fatal(frame, 0); diff --git a/sys/i386/i386/trap.c b/sys/i386/i386/trap.c index 693e3a2f94b4..9e310c049daa 100644 --- a/sys/i386/i386/trap.c +++ b/sys/i386/i386/trap.c @@ -237,12 +237,6 @@ trap(struct trapframe *frame) KASSERT((read_eflags() & PSL_I) == 0, ("trap: interrupts enabled, type %d frame %p", type, frame)); -#ifdef SMP - /* Handler for NMI IPIs used for stopping CPUs. */ - if (type == T_NMI && ipi_nmi_handler() == 0) - return; -#endif /* SMP */ - #ifdef KDB if (kdb_active) { kdb_reenter(); @@ -251,24 +245,14 @@ trap(struct trapframe *frame) #endif trap_check_kstack(); - if (type == T_RESERVED) { - trap_fatal(frame, 0); + if (type == T_NMI) { + nmi_handle_intr(frame); return; } - if (type == T_NMI) { -#ifdef HWPMC_HOOKS - /* -* CPU PMCs interrupt using an NMI so we check for that first. -* If the HWPMC module is active, 'pmc_hook' will point to -* the function to be called. A non-zero return value from the -* hook means that the NMI was consumed by it and that we can -* return immediately. -*/ - if (pmc_intr != NULL && - (*pmc_intr)(frame) != 0) - return; -#endif + if (type == T_RESERVED) { + trap_fatal(frame, 0); + return; } if (type == T_MCHK) { diff --git a/sys/x86/include/x86_var.h b/sys/x86/include/x86_var.h index 6609871bf89e..dbb4e9557ed0 100644 --- a/sys/x86/include/x86_var.h +++ b/sys/x86/include/x86_var.h @@ -148,7 +148,9 @@ voidzenbleed_sanitize_enable(void); void zenbleed_check_and_app
git: 7bcaff05223e - main - x86: Add routines for querying XSAVE feature information
The branch main has been updated by bnovkov: URL: https://cgit.FreeBSD.org/src/commit/?id=7bcaff05223eb81611372e341a120391925fa724 commit 7bcaff05223eb81611372e341a120391925fa724 Author: Bojan Novković AuthorDate: 2024-12-15 14:04:58 + Commit: Bojan Novković CommitDate: 2024-12-15 15:39:36 + x86: Add routines for querying XSAVE feature information This patch adds several routines that track and expose information about various XSAVE-related features. More specifically, it adds the ability to check whether a given XFEATURE is supported and which XSAVE extensions are supported. Furthermore, it adds several routines for calculating the size and offsets within a save area given a XSAVE feature bitmap. Reviewed by:kib Differential Revision: https://reviews.freebsd.org/D47394 --- sys/amd64/amd64/fpu.c | 111 +- sys/x86/include/fpu.h | 6 +++ 2 files changed, 116 insertions(+), 1 deletion(-) diff --git a/sys/amd64/amd64/fpu.c b/sys/amd64/amd64/fpu.c index 58a135e827a8..591bd196ca7d 100644 --- a/sys/amd64/amd64/fpu.c +++ b/sys/amd64/amd64/fpu.c @@ -164,12 +164,14 @@ SYSCTL_INT(_hw, HW_FLOATINGPT, floatingpoint, CTLFLAG_RD, int use_xsave; /* non-static for cpu_switch.S */ uint64_t xsave_mask; /* the same */ +static uint64_t xsave_extensions; static uma_zone_t fpu_save_area_zone; static struct savefpu *fpu_initialstate; static struct xsave_area_elm_descr { u_int offset; u_int size; + u_int flags; } *xsave_area_desc; static void @@ -452,6 +454,9 @@ fpuinitstate(void *arg __unused) * Region of an XSAVE Area" for the source of offsets/sizes. */ if (use_xsave) { + cpuid_count(0xd, 1, cp); + xsave_extensions = cp[0]; + xstate_bv = (uint64_t *)((char *)(fpu_initialstate + 1) + offsetof(struct xstate_hdr, xstate_bv)); *xstate_bv = XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE; @@ -465,8 +470,9 @@ fpuinitstate(void *arg __unused) for (i = 2; i < max_ext_n; i++) { cpuid_count(0xd, i, cp); - xsave_area_desc[i].offset = cp[1]; xsave_area_desc[i].size = cp[0]; + xsave_area_desc[i].offset = cp[1]; + xsave_area_desc[i].flags = cp[2]; } } @@ -1285,3 +1291,106 @@ fpu_save_area_reset(struct savefpu *fsa) bcopy(fpu_initialstate, fsa, cpu_max_ext_state_size); } + +static __inline void +xsave_extfeature_check(uint64_t feature) +{ + + KASSERT((feature & (feature - 1)) == 0, + ("%s: invalid XFEATURE 0x%lx", __func__, feature)); + KASSERT(feature < flsl(xsave_mask), + ("%s: unsupported XFEATURE 0x%lx", __func__, feature)); +} + +static __inline void +xsave_extstate_bv_check(uint64_t xstate_bv) +{ + KASSERT(xstate_bv != 0 && ilog2(xstate_bv) < flsl(xsave_mask), + ("%s: invalid XSTATE_BV 0x%lx", __func__, xstate_bv)); +} + +/* + * Returns whether the XFEATURE 'feature' is supported as a user state + * or supervisor state component. + */ +bool +xsave_extfeature_supported(uint64_t feature, bool supervisor) +{ + int idx; + + KASSERT(use_xsave, ("%s: XSAVE not supported", __func__)); + xsave_extfeature_check(feature); + + if ((xsave_mask & feature) == 0) + return (false); + idx = ilog2(feature); + return (((xsave_area_desc[idx].flags & CPUID_EXTSTATE_SUPERVISOR) != 0) == + supervisor); +} + +/* + * Returns whether the given XSAVE extension is supported. + */ +bool +xsave_extension_supported(uint64_t extension) +{ + KASSERT(use_xsave, ("%s: XSAVE not supported", __func__)); + + return ((xsave_extensions & extension) != 0); +} + +/* + * Returns offset for XFEATURE 'feature' given the requested feature bitmap + * 'xstate_bv', and extended region format ('compact'). + */ +size_t +xsave_area_offset(uint64_t xstate_bv, uint64_t feature, +bool compact) +{ + int i, idx; + size_t offs; + struct xsave_area_elm_descr *xep; + + KASSERT(use_xsave, ("%s: XSAVE not supported", __func__)); + xsave_extstate_bv_check(xstate_bv); + xsave_extfeature_check(feature); + + idx = ilog2(feature); + if (!compact) + return (xsave_area_desc[idx].offset); + offs = sizeof(struct savefpu) + sizeof(struct xstate_hdr); + xstate_bv &= ~(XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE); + while ((i = ffs(xstate_bv) - 1) > 0 && i < idx) { + xep = &xsave_area_desc[i]; + if ((xep->flags & CPUID_EXTSTATE_ALIGNED) != 0) +
git: 3342e5967dc7 - main - i386: Fix incorrect NMI handler invocations
The branch main has been updated by bnovkov: URL: https://cgit.FreeBSD.org/src/commit/?id=3342e5967dc7193d97f99a92b81824db81efe2f1 commit 3342e5967dc7193d97f99a92b81824db81efe2f1 Author: Bojan Novković AuthorDate: 2024-12-15 17:44:34 + Commit: Bojan Novković CommitDate: 2024-12-15 17:47:52 + i386: Fix incorrect NMI handler invocations Fixes: 459dc42 --- sys/i386/i386/trap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sys/i386/i386/trap.c b/sys/i386/i386/trap.c index 9e310c049daa..a8b7df42a283 100644 --- a/sys/i386/i386/trap.c +++ b/sys/i386/i386/trap.c @@ -428,7 +428,7 @@ user_trctrap_out: } return; #else /* !POWERFAIL_NMI */ - nmi_handle_intr(type, frame); + nmi_handle_intr(frame); return; #endif /* POWERFAIL_NMI */ @@ -685,7 +685,7 @@ kernel_trctrap: } return; #else /* !POWERFAIL_NMI */ - nmi_handle_intr(type, frame); + nmi_handle_intr(frame); return; #endif /* POWERFAIL_NMI */ }
git: b9951017bab3 - main - amd64/fpu: Track supervisor state XSAVE components
The branch main has been updated by bnovkov: URL: https://cgit.FreeBSD.org/src/commit/?id=b9951017bab396e24042e85632e2cc34ee0329ff commit b9951017bab396e24042e85632e2cc34ee0329ff Author: Bojan Novković AuthorDate: 2025-01-15 16:41:24 + Commit: Bojan Novković CommitDate: 2025-01-22 12:58:34 + amd64/fpu: Track supervisor state XSAVE components The amd64/fpu.c xsave_* routines track supported XSAVE components and features. However, they only track supported user state components, and there is currently no way for a consumer to check whether the CPU supports a supervisor state component. Fix this by saving the supported supervisor state components, enumerated by CPUID function 0DH, sub-function 1, in a separate mask. Reviewed by:kib Differential Revision: https://reviews.freebsd.org/D48466 --- sys/amd64/amd64/fpu.c | 38 -- sys/x86/include/fpu.h | 5 +++-- 2 files changed, 27 insertions(+), 16 deletions(-) diff --git a/sys/amd64/amd64/fpu.c b/sys/amd64/amd64/fpu.c index 591bd196ca7d..79d1722268b7 100644 --- a/sys/amd64/amd64/fpu.c +++ b/sys/amd64/amd64/fpu.c @@ -164,6 +164,7 @@ SYSCTL_INT(_hw, HW_FLOATINGPT, floatingpoint, CTLFLAG_RD, int use_xsave; /* non-static for cpu_switch.S */ uint64_t xsave_mask; /* the same */ +static uint64_t xsave_mask_supervisor; static uint64_t xsave_extensions; static uma_zone_t fpu_save_area_zone; static struct savefpu *fpu_initialstate; @@ -324,6 +325,7 @@ fpuinit_bsp1(void) ctx_switch_xsave[3] |= 0x10; restore_wp(old_wp); } + xsave_mask_supervisor = ((uint64_t)cp[3] << 32) | cp[2]; } /* @@ -421,7 +423,7 @@ fpuinitstate(void *arg __unused) XSAVE_AREA_ALIGN - 1, 0); fpu_initialstate = uma_zalloc(fpu_save_area_zone, M_WAITOK | M_ZERO); if (use_xsave) { - max_ext_n = flsl(xsave_mask); + max_ext_n = flsl(xsave_mask | xsave_mask_supervisor); xsave_area_desc = malloc(max_ext_n * sizeof(struct xsave_area_elm_descr), M_DEVBUF, M_WAITOK | M_ZERO); } @@ -1293,19 +1295,25 @@ fpu_save_area_reset(struct savefpu *fsa) } static __inline void -xsave_extfeature_check(uint64_t feature) +xsave_extfeature_check(uint64_t feature, bool supervisor) { + uint64_t mask; + mask = supervisor ? xsave_mask_supervisor : xsave_mask; KASSERT((feature & (feature - 1)) == 0, ("%s: invalid XFEATURE 0x%lx", __func__, feature)); - KASSERT(feature < flsl(xsave_mask), - ("%s: unsupported XFEATURE 0x%lx", __func__, feature)); + KASSERT(ilog2(feature) <= ilog2(mask), + ("%s: unsupported %s XFEATURE 0x%lx", __func__, + supervisor ? "supervisor" : "user", feature)); } static __inline void -xsave_extstate_bv_check(uint64_t xstate_bv) +xsave_extstate_bv_check(uint64_t xstate_bv, bool supervisor) { - KASSERT(xstate_bv != 0 && ilog2(xstate_bv) < flsl(xsave_mask), + uint64_t mask; + + mask = supervisor ? xsave_mask_supervisor : xsave_mask; + KASSERT(xstate_bv != 0 && ilog2(xstate_bv) <= ilog2(mask), ("%s: invalid XSTATE_BV 0x%lx", __func__, xstate_bv)); } @@ -1317,11 +1325,13 @@ bool xsave_extfeature_supported(uint64_t feature, bool supervisor) { int idx; + uint64_t mask; KASSERT(use_xsave, ("%s: XSAVE not supported", __func__)); - xsave_extfeature_check(feature); + xsave_extfeature_check(feature, supervisor); - if ((xsave_mask & feature) == 0) + mask = supervisor ? xsave_mask_supervisor : xsave_mask; + if ((mask & feature) == 0) return (false); idx = ilog2(feature); return (((xsave_area_desc[idx].flags & CPUID_EXTSTATE_SUPERVISOR) != 0) == @@ -1345,15 +1355,15 @@ xsave_extension_supported(uint64_t extension) */ size_t xsave_area_offset(uint64_t xstate_bv, uint64_t feature, -bool compact) +bool compact, bool supervisor) { int i, idx; size_t offs; struct xsave_area_elm_descr *xep; KASSERT(use_xsave, ("%s: XSAVE not supported", __func__)); - xsave_extstate_bv_check(xstate_bv); - xsave_extfeature_check(feature); + xsave_extstate_bv_check(xstate_bv, supervisor); + xsave_extfeature_check(feature, supervisor); idx = ilog2(feature); if (!compact) @@ -1376,16 +1386,16 @@ xsave_area_offset(uint64_t xstate_bv, uint64_t feature, * 'xstate_bv' and extended region format ('compact'). */ size_t -xsave_area_size(uint64_t xstate_bv, bool compact) +xsave_area_size(uint64_t xstate_bv, bool compact, bool supervisor) { int last_idx; KASSERT(use_xsave, ("%s: XSAVE not suppo
git: f0d036cf6655 - main - amd64/fpu: Fix build for NODEBUG kernels
The branch main has been updated by bnovkov: URL: https://cgit.FreeBSD.org/src/commit/?id=f0d036cf665520cead4970b4337d72b077ed5eea commit f0d036cf665520cead4970b4337d72b077ed5eea Author: Bojan Novković AuthorDate: 2025-01-22 15:02:17 + Commit: Bojan Novković CommitDate: 2025-01-22 15:15:32 + amd64/fpu: Fix build for NODEBUG kernels Fixes: b995101 Reported by:Michael Butler (i...@protected-networks.net) --- sys/amd64/amd64/fpu.c | 4 1 file changed, 4 insertions(+) diff --git a/sys/amd64/amd64/fpu.c b/sys/amd64/amd64/fpu.c index 79d1722268b7..48bfaa53c7b4 100644 --- a/sys/amd64/amd64/fpu.c +++ b/sys/amd64/amd64/fpu.c @@ -1297,6 +1297,7 @@ fpu_save_area_reset(struct savefpu *fsa) static __inline void xsave_extfeature_check(uint64_t feature, bool supervisor) { +#ifdef INVARIANTS uint64_t mask; mask = supervisor ? xsave_mask_supervisor : xsave_mask; @@ -1305,16 +1306,19 @@ xsave_extfeature_check(uint64_t feature, bool supervisor) KASSERT(ilog2(feature) <= ilog2(mask), ("%s: unsupported %s XFEATURE 0x%lx", __func__, supervisor ? "supervisor" : "user", feature)); +#endif } static __inline void xsave_extstate_bv_check(uint64_t xstate_bv, bool supervisor) { +#ifdef INVARIANTS uint64_t mask; mask = supervisor ? xsave_mask_supervisor : xsave_mask; KASSERT(xstate_bv != 0 && ilog2(xstate_bv) <= ilog2(mask), ("%s: invalid XSTATE_BV 0x%lx", __func__, xstate_bv)); +#endif } /*
git: 19f202f859b1 - main - sdhci: Fixes for sdhci_fdt_rockchip.c
The branch main has been updated by bnovkov: URL: https://cgit.FreeBSD.org/src/commit/?id=19f202f859b101507e26fe181aaf9f0b99f4d59c commit 19f202f859b101507e26fe181aaf9f0b99f4d59c Author: Bojan Novković AuthorDate: 2025-02-16 10:41:43 + Commit: Bojan Novković CommitDate: 2025-02-16 10:41:43 + sdhci: Fixes for sdhci_fdt_rockchip.c This change fixes a couple of issues in the Rockchip SDHCI driver: - Fix a panic caused by sdhci_fdt_rockchip_attach not populating the softc's dev variable before initializing clocks - Fix a bug where sdhci_fdt_rockchip_set_clock fails to call sdhci_fdt_set_clock Fixes: e17e33f997d6 Reported by:Alonso Cárdenas Márquez (acarde...@bsd-peru.org) --- sys/dev/sdhci/sdhci_fdt_rockchip.c | 13 ++--- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/sys/dev/sdhci/sdhci_fdt_rockchip.c b/sys/dev/sdhci/sdhci_fdt_rockchip.c index b3311d3e8a48..44a5e2ffe271 100644 --- a/sys/dev/sdhci/sdhci_fdt_rockchip.c +++ b/sys/dev/sdhci/sdhci_fdt_rockchip.c @@ -217,7 +217,7 @@ sdhci_fdt_rockchip_set_clock(device_t dev, struct sdhci_slot *slot, int clock) DLL_STRBIN_TAPNUM_FROM_SW); } } - return (sdhci_fdt_rockchip_set_clock(dev, slot, clock)); + return (sdhci_fdt_set_clock(dev, slot, clock)); } static int @@ -226,6 +226,7 @@ sdhci_fdt_rockchip_attach(device_t dev) struct sdhci_fdt_softc *sc = device_get_softc(dev); int err, compat; + sc->dev = dev; compat = ofw_bus_search_compatible(dev, compat_data)->ocd_data; switch (compat) { case SDHCI_FDT_RK3399: @@ -243,12 +244,10 @@ sdhci_fdt_rockchip_attach(device_t dev) device_printf(dev, "Cannot get syscon handle\n"); return (err); } - if (compat == SDHCI_FDT_RK3399) { - err = sdhci_init_rk3399(dev); - if (err != 0) { - device_printf(dev, "Cannot init RK3399 SDHCI\n"); - return (err); - } + err = sdhci_init_rk3399(dev); + if (err != 0) { + device_printf(dev, "Cannot init RK3399 SDHCI\n"); + return (err); } break; case SDHCI_FDT_RK3568:
git: df436036e82b - main - umtx: Don't sleep after casueword32 failure in do_sem2_wake
The branch main has been updated by bnovkov: URL: https://cgit.FreeBSD.org/src/commit/?id=df436036e82b895a6233d803bc8bf14d2cfe90d7 commit df436036e82b895a6233d803bc8bf14d2cfe90d7 Author: Bojan Novković AuthorDate: 2025-01-30 15:10:04 + Commit: Bojan Novković CommitDate: 2025-02-17 16:40:34 + umtx: Don't sleep after casueword32 failure in do_sem2_wake When a casueword32 operation fails, 'do_sem2_wake' will call 'thread_check_susp' to avoid a potential livelock. However, it instructs 'thread_check_susp' to sleep while holding a previously busied umtxq key. This is explicitly discouraged by the comments in 'thread_check_susp' which state that a thread shouldn't sleep if it owns a kernel resource. Fix this by passing 'false' to 'thread_check_susp'. Reviewed by:kib PR: 282713 Differential Revision: https://reviews.freebsd.org/D48728 Sponsored by: Klara Inc. --- sys/kern/kern_umtx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/kern/kern_umtx.c b/sys/kern/kern_umtx.c index a9294c324cb4..938dcf2ff1cb 100644 --- a/sys/kern/kern_umtx.c +++ b/sys/kern/kern_umtx.c @@ -3789,7 +3789,7 @@ do_sem2_wake(struct thread *td, struct _usem2 *sem) rv = casueword32(&sem->_count, count, &count, count & ~USEM_HAS_WAITERS); if (rv == 1) { - rv = thread_check_susp(td, true); + rv = thread_check_susp(td, false); if (rv != 0) break; }