git: 82f5dfc12139 - main - db_pprint: Fix offset calculation for struct members

2024-07-21 Thread Bojan Novković
The branch main has been updated by bnovkov:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=82f5dfc121391604b079ea96aa14ea71e6b618c9

commit 82f5dfc121391604b079ea96aa14ea71e6b618c9
Author: Bojan Novković 
AuthorDate: 2024-07-21 16:45:33 +
Commit: Bojan Novković 
CommitDate: 2024-07-21 17:31:48 +

db_pprint: Fix offset calculation for struct members

The struct pretty-printing code uses the ctm_offset field in
struct ctf_member_v3 to calculate the address of a struct member.
However, the code treats this as a byte offset rather than the
offset in bits, leading to wrong values being printed.
Fix this by diving with ctm_offset by NBBY.

Approved by: markj (mentor)
Fixes: c21bc6f3c242
---
 sys/ddb/db_pprint.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sys/ddb/db_pprint.c b/sys/ddb/db_pprint.c
index 10334ce650c8..2925caedd49d 100644
--- a/sys/ddb/db_pprint.c
+++ b/sys/ddb/db_pprint.c
@@ -117,7 +117,7 @@ db_pprint_struct(db_addr_t addr, struct ctf_type_v3 *type, 
u_int depth)
return;
}
mtype = db_ctf_typeid_to_type(&sym_data, mp->ctm_type);
-   maddr = addr + mp->ctm_offset;
+   maddr = addr + (mp->ctm_offset / NBBY);
mname = db_ctf_stroff_to_str(&sym_data, mp->ctm_name);
db_indent = depth;
if (mname != NULL) {
@@ -140,7 +140,7 @@ db_pprint_struct(db_addr_t addr, struct ctf_type_v3 *type, 
u_int depth)
return;
}
mtype = db_ctf_typeid_to_type(&sym_data, mp->ctlm_type);
-   maddr = addr + CTF_LMEM_OFFSET(mp);
+   maddr = addr + (CTF_LMEM_OFFSET(mp) / NBBY);
mname = db_ctf_stroff_to_str(&sym_data, mp->ctlm_name);
db_indent = depth;
if (mname != NULL) {



git: 1cbd613f3343 - main - db_pprint: Properly handle complex pointer types

2024-07-21 Thread Bojan Novković
The branch main has been updated by bnovkov:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=1cbd613f3343c873ace8a56df2e515626a18ef22

commit 1cbd613f3343c873ace8a56df2e515626a18ef22
Author: Bojan Novković 
AuthorDate: 2024-07-21 16:51:22 +
Commit: Bojan Novković 
CommitDate: 2024-07-21 17:31:59 +

db_pprint: Properly handle complex pointer types

The existing pretty-printing code fails to properly print complex
pointer types. This commit fixes this behaviour by traversing the
chain of CTF types until a base type is encountered.

Approved by: markj (mentor)
Fixes: c21bc6f3c242
---
 sys/ddb/db_pprint.c | 38 ++
 1 file changed, 30 insertions(+), 8 deletions(-)

diff --git a/sys/ddb/db_pprint.c b/sys/ddb/db_pprint.c
index 2925caedd49d..0ca2b0bb952c 100644
--- a/sys/ddb/db_pprint.c
+++ b/sys/ddb/db_pprint.c
@@ -45,6 +45,7 @@ static void db_pprint_type(db_addr_t addr, struct ctf_type_v3 
*type,
 
 static u_int max_depth = DB_PPRINT_DEFAULT_DEPTH;
 static struct db_ctf_sym_data sym_data;
+static const char *asteriskstr = "*";
 
 /*
  * Pretty-prints a CTF_INT type.
@@ -248,9 +249,14 @@ db_pprint_ptr(db_addr_t addr, struct ctf_type_v3 *type, 
u_int depth)
const char *qual = "";
const char *name;
db_addr_t val;
+   uint32_t tid;
u_int kind;
+   int ptrcnt;
 
-   ref_type = db_ctf_typeid_to_type(&sym_data, type->ctt_type);
+   ptrcnt = 1;
+   tid = type->ctt_type;
+again:
+   ref_type = db_ctf_typeid_to_type(&sym_data, tid);
kind = CTF_V3_INFO_KIND(ref_type->ctt_info);
switch (kind) {
case CTF_K_STRUCT:
@@ -258,25 +264,41 @@ db_pprint_ptr(db_addr_t addr, struct ctf_type_v3 *type, 
u_int depth)
break;
case CTF_K_VOLATILE:
qual = "volatile ";
-   break;
+   tid = ref_type->ctt_type;
+   goto again;
case CTF_K_CONST:
qual = "const ";
-   break;
+   tid = ref_type->ctt_type;
+   goto again;
+   case CTF_K_RESTRICT:
+   qual = "restrict ";
+   tid = ref_type->ctt_type;
+   goto again;
+   case CTF_K_POINTER:
+   ptrcnt++;
+   tid = ref_type->ctt_type;
+   goto again;
+   case CTF_K_TYPEDEF:
+   tid = ref_type->ctt_type;
+   goto again;
default:
break;
}
 
-   val = db_get_value(addr, sizeof(db_addr_t), false);
-   if (depth < max_depth) {
+   ptrcnt = min(ptrcnt, strlen(asteriskstr));
+   val = (addr != 0) ? db_get_value(addr, sizeof(db_addr_t), false) : 0;
+   if (depth < max_depth && (val != 0)) {
/* Print contents of memory pointed to by this pointer. */
-   db_pprint_type(addr, ref_type, depth + 1);
+   db_pprint_type(val, ref_type, depth + 1);
} else {
name = db_ctf_stroff_to_str(&sym_data, ref_type->ctt_name);
db_indent = depth;
if (name != NULL)
-   db_printf("(%s%s *) 0x%lx", qual, name, (long)val);
+   db_printf("(%s%s %.*s) 0x%lx", qual, name, ptrcnt,
+   asteriskstr, (long)val);
else
-   db_printf("(%s *) 0x%lx", qual, (long)val);
+   db_printf("(%s %.*s) 0x%lx", qual, ptrcnt, asteriskstr,
+   (long)val);
}
 }
 



git: 78f3e0f6b3ad - main - malloc(9): Introduce M_NEVERFREED

2024-07-30 Thread Bojan Novković
The branch main has been updated by bnovkov:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=78f3e0f6b3ad70d9574730fc3338474376ef8ebd

commit 78f3e0f6b3ad70d9574730fc3338474376ef8ebd
Author: Bojan Novković 
AuthorDate: 2024-05-03 18:18:56 +
Commit: Bojan Novković 
CommitDate: 2024-07-30 15:38:18 +

malloc(9): Introduce M_NEVERFREED

This patch adds an additional malloc(9) flag to distinguish allocations
that are never freed during runtime.

Differential Revision:  https://reviews.freebsd.org/D45045
Reviewed by:alc, kib, markj
Tested by:  alc
---
 sys/sys/malloc.h  | 3 ++-
 sys/vm/uma_core.c | 3 +++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/sys/sys/malloc.h b/sys/sys/malloc.h
index dfd7928fc258..9b281da4b4d4 100644
--- a/sys/sys/malloc.h
+++ b/sys/sys/malloc.h
@@ -60,8 +60,9 @@
 #defineM_BESTFIT   0x2000  /* only for vmem, low 
fragmentation */
 #defineM_EXEC  0x4000  /* allocate executable space */
 #defineM_NEXTFIT   0x8000  /* only for vmem, follow cursor 
*/
+#defineM_NEVERFREED0x1 /* chunk will never get freed */
 
-#defineM_VERSION   2020110501
+#defineM_VERSION   2024073001
 
 /*
  * Two malloc type structures are present: malloc_type, which is used by a
diff --git a/sys/vm/uma_core.c b/sys/vm/uma_core.c
index 516ac2c2965a..e93c561d759a 100644
--- a/sys/vm/uma_core.c
+++ b/sys/vm/uma_core.c
@@ -1791,6 +1791,9 @@ keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int 
domain, int flags,
if (keg->uk_flags & UMA_ZONE_NODUMP)
aflags |= M_NODUMP;
 
+   if (keg->uk_flags & UMA_ZONE_NOFREE)
+   aflags |= M_NEVERFREED;
+
/* zone is passed for legacy reasons. */
size = keg->uk_ppera * PAGE_SIZE;
mem = keg->uk_allocf(zone, size, domain, &sflags, aflags);



git: 92b9138991dd - main - vm: Introduce VM_ALLOC_NOFREE and PG_NOFREE

2024-07-30 Thread Bojan Novković
The branch main has been updated by bnovkov:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=92b9138991dd2829ac744592cb9f9f3415be146c

commit 92b9138991dd2829ac744592cb9f9f3415be146c
Author: Bojan Novković 
AuthorDate: 2024-07-14 13:13:56 +
Commit: Bojan Novković 
CommitDate: 2024-07-30 15:38:24 +

vm: Introduce VM_ALLOC_NOFREE and PG_NOFREE

This patch adds two additional vm_page flags to distinguish pages that
never get released while the system is running (e.g. UMA_ZONE_NOFREE slabs).

Differential Revision:  https://reviews.freebsd.org/D45970
Reviewed by:alc, kib, markj
Tested by:  alc
---
 sys/vm/vm_page.c | 13 ++---
 sys/vm/vm_page.h |  5 -
 2 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c
index 64413ba10bfa..3b6b88e4eb32 100644
--- a/sys/vm/vm_page.c
+++ b/sys/vm/vm_page.c
@@ -2082,7 +2082,8 @@ vm_page_alloc_domain_after(vm_object_t object, 
vm_pindex_t pindex, int domain,
 #defineVPA_FLAGS   (VM_ALLOC_CLASS_MASK | VM_ALLOC_WAITFAIL |  
\
 VM_ALLOC_NOWAIT | VM_ALLOC_NOBUSY |\
 VM_ALLOC_SBUSY | VM_ALLOC_WIRED |  \
-VM_ALLOC_NODUMP | VM_ALLOC_ZERO | VM_ALLOC_COUNT_MASK)
+VM_ALLOC_NODUMP | VM_ALLOC_ZERO |  \
+VM_ALLOC_NOFREE | VM_ALLOC_COUNT_MASK)
KASSERT((req & ~VPA_FLAGS) == 0,
("invalid request %#x", req));
KASSERT(((req & (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)) !=
@@ -2154,6 +2155,8 @@ found:
flags |= m->flags & PG_ZERO;
if ((req & VM_ALLOC_NODUMP) != 0)
flags |= PG_NODUMP;
+   if ((req & VM_ALLOC_NOFREE) != 0)
+   flags |= PG_NOFREE;
m->flags = flags;
m->a.flags = 0;
m->oflags = (object->flags & OBJ_UNMANAGED) != 0 ? VPO_UNMANAGED : 0;
@@ -2418,11 +2421,13 @@ vm_page_alloc_noobj_domain(int domain, int req)
 #defineVPAN_FLAGS  (VM_ALLOC_CLASS_MASK | VM_ALLOC_WAITFAIL |  
\
 VM_ALLOC_NOWAIT | VM_ALLOC_WAITOK |\
 VM_ALLOC_NOBUSY | VM_ALLOC_WIRED | \
-VM_ALLOC_NODUMP | VM_ALLOC_ZERO | VM_ALLOC_COUNT_MASK)
+VM_ALLOC_NODUMP | VM_ALLOC_ZERO |  \
+VM_ALLOC_NOFREE | VM_ALLOC_COUNT_MASK)
KASSERT((req & ~VPAN_FLAGS) == 0,
("invalid request %#x", req));
 
-   flags = (req & VM_ALLOC_NODUMP) != 0 ? PG_NODUMP : 0;
+   flags = ((req & VM_ALLOC_NODUMP) != 0 ? PG_NODUMP : 0) |
+   ((req & VM_ALLOC_NOFREE) != 0 ? PG_NOFREE : 0);
vmd = VM_DOMAIN(domain);
 again:
if (vmd->vmd_pgcache[VM_FREEPOOL_DIRECT].zone != NULL) {
@@ -3937,6 +3942,8 @@ vm_page_free_prep(vm_page_t m)
m, i, (uintmax_t)*p));
}
 #endif
+   KASSERT((m->flags & PG_NOFREE) == 0,
+   ("%s: attempting to free a PG_NOFREE page", __func__));
if ((m->oflags & VPO_UNMANAGED) == 0) {
KASSERT(!pmap_page_is_mapped(m),
("vm_page_free_prep: freeing mapped page %p", m));
diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h
index 61a0228273c2..07a6c98c8ee8 100644
--- a/sys/vm/vm_page.h
+++ b/sys/vm/vm_page.h
@@ -457,6 +457,7 @@ extern struct mtx_padalign pa_lock[];
 #definePG_ZERO 0x04/* page is zeroed */
 #definePG_MARKER   0x08/* special queue marker page */
 #definePG_NODUMP   0x10/* don't include this page in a 
dump */
+#definePG_NOFREE   0x20/* page should never be freed. 
*/
 
 /*
  * Misc constants.
@@ -537,7 +538,7 @@ vm_page_t PHYS_TO_VM_PAGE(vm_paddr_t pa);
 #defineVM_ALLOC_WIRED  0x0020  /* (acgnp) Allocate a wired 
page */
 #defineVM_ALLOC_ZERO   0x0040  /* (acgnp) Allocate a zeroed 
page */
 #defineVM_ALLOC_NORECLAIM  0x0080  /* (c) Do not reclaim after 
failure */
-#defineVM_ALLOC_AVAIL0 0x0100
+#defineVM_ALLOC_NOFREE 0x0100  /* (an) Page will never be 
released */
 #defineVM_ALLOC_NOBUSY 0x0200  /* (acgp) Do not excl busy the 
page */
 #defineVM_ALLOC_NOCREAT0x0400  /* (gp) Don't create a page */
 #defineVM_ALLOC_AVAIL1 0x0800
@@ -575,6 +576,8 @@ malloc2vm_flags(int malloc_flags)
pflags |= VM_ALLOC_WAITOK;
if ((malloc_flags & M_NORECLAIM))
pflags |= VM_ALLOC_NORECLAIM;
+   if ((malloc_flags & M_NEVERFREED))
+   pflags |= VM_ALLOC_NOFREE;
return (pflags);
 }
 #endif



git: a8693e89e3e4 - main - vm: Introduce vm_page_alloc_nofree_domain

2024-07-30 Thread Bojan Novković
The branch main has been updated by bnovkov:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=a8693e89e3e4a04efd02901cc93bb6148e3e40d6

commit a8693e89e3e4a04efd02901cc93bb6148e3e40d6
Author: Bojan Novković 
AuthorDate: 2024-07-14 13:14:22 +
Commit: Bojan Novković 
CommitDate: 2024-07-30 15:38:24 +

vm: Introduce vm_page_alloc_nofree_domain

This patch adds a reservation-aware bump allocator intended for
allocating NOFREE pages. The main goal of this change is to reduce the
long-term fragmentation issues caused by pages that are never freed during 
runtime.

The `vm_page_alloc_nofree_domain` routine hands out 0-order pages from
a preallocated superpage. Once an active NOFREE superpage fills up, the
routine will try to allocate a new one and discard the old one.
This routine will get invoked whenever VM_ALLOC_NOFREE is passed to
vm_page_alloc_noobj or vm_page_alloc.

Differential Revision:  https://reviews.freebsd.org/D45863
Reviewed by:alc, kib, markj
Tested by:  alc
---
 sys/vm/vm_page.c  | 62 +++
 sys/vm/vm_pagequeue.h |  4 
 2 files changed, 66 insertions(+)

diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c
index 3b6b88e4eb32..ff9df7f4a9fc 100644
--- a/sys/vm/vm_page.c
+++ b/sys/vm/vm_page.c
@@ -163,6 +163,7 @@ SYSCTL_PROC(_vm, OID_AUTO, page_blacklist, CTLTYPE_STRING | 
CTLFLAG_RD |
 static uma_zone_t fakepg_zone;
 
 static void vm_page_alloc_check(vm_page_t m);
+static vm_page_t vm_page_alloc_nofree_domain(int domain, int req);
 static bool _vm_page_busy_sleep(vm_object_t obj, vm_page_t m,
 vm_pindex_t pindex, const char *wmesg, int allocflags, bool locked);
 static void vm_page_clear_dirty_mask(vm_page_t m, vm_page_bits_t pagebits);
@@ -2099,6 +2100,11 @@ vm_page_alloc_domain_after(vm_object_t object, 
vm_pindex_t pindex, int domain,
if (!vm_pager_can_alloc_page(object, pindex))
return (NULL);
 again:
+   if (__predict_false((req & VM_ALLOC_NOFREE) != 0)) {
+   m = vm_page_alloc_nofree_domain(domain, req);
+   if (m != NULL)
+   goto found;
+   }
 #if VM_NRESERVLEVEL > 0
/*
 * Can we allocate the page from a reservation?
@@ -2430,6 +2436,12 @@ vm_page_alloc_noobj_domain(int domain, int req)
((req & VM_ALLOC_NOFREE) != 0 ? PG_NOFREE : 0);
vmd = VM_DOMAIN(domain);
 again:
+   if (__predict_false((req & VM_ALLOC_NOFREE) != 0)) {
+   m = vm_page_alloc_nofree_domain(domain, req);
+   if (m != NULL)
+   goto found;
+   }
+
if (vmd->vmd_pgcache[VM_FREEPOOL_DIRECT].zone != NULL) {
m = uma_zalloc(vmd->vmd_pgcache[VM_FREEPOOL_DIRECT].zone,
M_NOWAIT | M_NOVM);
@@ -2480,6 +2492,56 @@ found:
return (m);
 }
 
+#if VM_NRESERVLEVEL > 1
+#defineVM_NOFREE_IMPORT_ORDER  (VM_LEVEL_1_ORDER + VM_LEVEL_0_ORDER)
+#elif VM_NRESERVLEVEL > 0
+#defineVM_NOFREE_IMPORT_ORDER  VM_LEVEL_0_ORDER
+#else
+#defineVM_NOFREE_IMPORT_ORDER  8
+#endif
+
+/*
+ * Allocate a single NOFREE page.
+ *
+ * This routine hands out NOFREE pages from higher-order
+ * physical memory blocks in order to reduce memory fragmentation.
+ * When a NOFREE for a given domain chunk is used up,
+ * the routine will try to fetch a new one from the freelists
+ * and discard the old one.
+ */
+static vm_page_t
+vm_page_alloc_nofree_domain(int domain, int req)
+{
+   vm_page_t m;
+   struct vm_domain *vmd;
+   struct vm_nofreeq *nqp;
+
+   KASSERT((req & VM_ALLOC_NOFREE) != 0, ("invalid request %#x", req));
+
+   vmd = VM_DOMAIN(domain);
+   nqp = &vmd->vmd_nofreeq;
+   vm_domain_free_lock(vmd);
+   if (nqp->offs >= (1 << VM_NOFREE_IMPORT_ORDER) || nqp->ma == NULL) {
+   if (!vm_domain_allocate(vmd, req,
+   1 << VM_NOFREE_IMPORT_ORDER)) {
+   vm_domain_free_unlock(vmd);
+   return (NULL);
+   }
+   nqp->ma = vm_phys_alloc_pages(domain, VM_FREEPOOL_DEFAULT,
+   VM_LEVEL_0_ORDER);
+   if (nqp->ma == NULL) {
+   vm_domain_freecnt_inc(vmd, 1 << VM_NOFREE_IMPORT_ORDER);
+   vm_domain_free_unlock(vmd);
+   return (NULL);
+   }
+   nqp->offs = 0;
+   }
+   m = &nqp->ma[nqp->offs++];
+   vm_domain_free_unlock(vmd);
+
+   return (m);
+}
+
 vm_page_t
 vm_page_alloc_noobj(int req)
 {
diff --git a/sys/vm/vm_pagequeue.h b/sys/vm/vm_pagequeue.h
index 7e133ec947b4..86863a0a6400 100644
--- a/sys/vm/vm_pagequeue.h
+++ b/sys/vm/vm_pagequeue.h
@@ -246,6 +246,10 @@ struct vm_domain {
u_int vmd_domain;   /* (c) Domain number. */
u_int vmd_p

git: 3f32a7e4eee5 - main - vm: Add a KVA arena for M_NEVERFREED allocations

2024-07-30 Thread Bojan Novković
The branch main has been updated by bnovkov:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=3f32a7e4eee53d5565a4076e69a41d1afd803e0c

commit 3f32a7e4eee53d5565a4076e69a41d1afd803e0c
Author: Bojan Novković 
AuthorDate: 2024-07-16 14:14:30 +
Commit: Bojan Novković 
CommitDate: 2024-07-30 15:38:24 +

vm: Add a KVA arena for M_NEVERFREED allocations

This patch adds a new KVA arena for separating M_NEVERFREED allocations.
Separating KVAs for pages that are never freed should facilitate
superpage promotion in the kernel.

Differential Revision:  https://reviews.freebsd.org/D45997
Reviewed by:alc, kib, markj
Tested by:  alc
---
 sys/vm/vm_kern.c  | 21 -
 sys/vm/vm_pagequeue.h |  1 +
 2 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/sys/vm/vm_kern.c b/sys/vm/vm_kern.c
index a04044463fe2..fb7c80b767ed 100644
--- a/sys/vm/vm_kern.c
+++ b/sys/vm/vm_kern.c
@@ -473,10 +473,12 @@ kmem_malloc_domain(int domain, vm_size_t size, int flags)
vm_size_t asize;
int rv;
 
-   if (__predict_true((flags & M_EXEC) == 0))
+   if (__predict_true((flags & (M_EXEC | M_NEVERFREED)) == 0))
arena = vm_dom[domain].vmd_kernel_arena;
-   else
+   else if ((flags & M_EXEC) != 0)
arena = vm_dom[domain].vmd_kernel_rwx_arena;
+   else
+   arena = vm_dom[domain].vmd_kernel_nofree_arena;
asize = round_page(size);
if (vmem_alloc(arena, asize, flags | M_BESTFIT, &addr))
return (0);
@@ -882,20 +884,29 @@ kmem_init(vm_offset_t start, vm_offset_t end)
/*
 * In architectures with superpages, maintain separate arenas
 * for allocations with permissions that differ from the
-* "standard" read/write permissions used for kernel memory,
-* so as not to inhibit superpage promotion.
+* "standard" read/write permissions used for kernel memory
+* and pages that are never released, so as not to inhibit
+* superpage promotion.
 *
-* Use the base import quantum since this arena is rarely used.
+* Use the base import quantum since these arenas are rarely
+* used.
 */
 #if VM_NRESERVLEVEL > 0
vm_dom[domain].vmd_kernel_rwx_arena = vmem_create(
"kernel rwx arena domain", 0, 0, PAGE_SIZE, 0, M_WAITOK);
+   vm_dom[domain].vmd_kernel_nofree_arena = vmem_create(
+   "kernel NOFREE arena domain", 0, 0, PAGE_SIZE, 0, M_WAITOK);
vmem_set_import(vm_dom[domain].vmd_kernel_rwx_arena,
kva_import_domain, (vmem_release_t *)vmem_xfree,
kernel_arena, KVA_QUANTUM);
+   vmem_set_import(vm_dom[domain].vmd_kernel_nofree_arena,
+   kva_import_domain, (vmem_release_t *)vmem_xfree,
+   kernel_arena, KVA_QUANTUM);
 #else
vm_dom[domain].vmd_kernel_rwx_arena =
vm_dom[domain].vmd_kernel_arena;
+   vm_dom[domain].vmd_kernel_nofree_arena =
+   vm_dom[domain].vmd_kernel_arena;
 #endif
}
 
diff --git a/sys/vm/vm_pagequeue.h b/sys/vm/vm_pagequeue.h
index 86863a0a6400..af1183e63e53 100644
--- a/sys/vm/vm_pagequeue.h
+++ b/sys/vm/vm_pagequeue.h
@@ -243,6 +243,7 @@ struct vm_domain {
} vmd_pgcache[VM_NFREEPOOL];
struct vmem *vmd_kernel_arena;  /* (c) per-domain kva R/W arena. */
struct vmem *vmd_kernel_rwx_arena; /* (c) per-domain kva R/W/X arena. */
+   struct vmem *vmd_kernel_nofree_arena; /* (c) per-domain kva NOFREE 
arena. */
u_int vmd_domain;   /* (c) Domain number. */
u_int vmd_page_count;   /* (c) Total page count. */
long vmd_segs;  /* (c) bitmask of the segments */



git: ddc09a10eaa6 - main - pmap_growkernel: Use VM_ALLOC_NOFREE when allocating pagetable pages

2024-07-30 Thread Bojan Novković
The branch main has been updated by bnovkov:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=ddc09a10eaa66bbebeb691021bb2a9f934d33d58

commit ddc09a10eaa66bbebeb691021bb2a9f934d33d58
Author: Bojan Novković 
AuthorDate: 2024-07-16 15:12:25 +
Commit: Bojan Novković 
CommitDate: 2024-07-30 15:38:24 +

pmap_growkernel: Use VM_ALLOC_NOFREE when allocating pagetable pages

This patch modifies pmap_growkernel in all pmaps to use VM_ALLOC_NOFREE
when allocating new pagetable pages. This should help reduce longterm
fragmentation as these pages are never released after
they are allocated.

Differential Revision:  https://reviews.freebsd.org/D45998
Reviewed by:alc, markj, kib, mhorne
Tested by:  alc
---
 sys/amd64/amd64/pmap.c  | 7 ---
 sys/arm/arm/pmap-v6.c   | 2 +-
 sys/arm64/arm64/pmap.c  | 6 +++---
 sys/i386/i386/pmap.c| 4 ++--
 sys/powerpc/aim/mmu_radix.c | 6 +++---
 sys/riscv/riscv/pmap.c  | 6 +++---
 6 files changed, 16 insertions(+), 15 deletions(-)

diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index 778d07689ff0..cf0fc7184f56 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -5156,8 +5156,8 @@ pmap_growkernel(vm_offset_t addr)
pdpe = pmap_pdpe(kernel_pmap, end);
if ((*pdpe & X86_PG_V) == 0) {
nkpg = pmap_alloc_pt_page(kernel_pmap,
-   pmap_pdpe_pindex(end), VM_ALLOC_WIRED |
-   VM_ALLOC_INTERRUPT | VM_ALLOC_ZERO);
+   pmap_pdpe_pindex(end), VM_ALLOC_INTERRUPT |
+   VM_ALLOC_NOFREE | VM_ALLOC_WIRED | 
VM_ALLOC_ZERO);
if (nkpg == NULL)
panic("pmap_growkernel: no memory to grow 
kernel");
paddr = VM_PAGE_TO_PHYS(nkpg);
@@ -5176,7 +5176,8 @@ pmap_growkernel(vm_offset_t addr)
}
 
nkpg = pmap_alloc_pt_page(kernel_pmap, pmap_pde_pindex(end),
-   VM_ALLOC_WIRED | VM_ALLOC_INTERRUPT | VM_ALLOC_ZERO);
+   VM_ALLOC_INTERRUPT | VM_ALLOC_NOFREE | VM_ALLOC_WIRED |
+   VM_ALLOC_ZERO);
if (nkpg == NULL)
panic("pmap_growkernel: no memory to grow kernel");
paddr = VM_PAGE_TO_PHYS(nkpg);
diff --git a/sys/arm/arm/pmap-v6.c b/sys/arm/arm/pmap-v6.c
index de1082e7ae62..6cc78b187a9a 100644
--- a/sys/arm/arm/pmap-v6.c
+++ b/sys/arm/arm/pmap-v6.c
@@ -2067,7 +2067,7 @@ pmap_growkernel(vm_offset_t addr)
 * Install new PT2s page into kernel PT2TAB.
 */
m = vm_page_alloc_noobj(VM_ALLOC_INTERRUPT |
-   VM_ALLOC_WIRED | VM_ALLOC_ZERO);
+   VM_ALLOC_NOFREE | VM_ALLOC_WIRED | VM_ALLOC_ZERO);
if (m == NULL)
panic("%s: no memory to grow kernel", __func__);
m->pindex = pte1_index(kernel_vm_end) & ~PT2PG_MASK;
diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index 59de6ef37f09..58795e25c82e 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -2959,7 +2959,7 @@ pmap_growkernel(vm_offset_t addr)
if (pmap_load(l1) == 0) {
/* We need a new PDP entry */
nkpg = vm_page_alloc_noobj(VM_ALLOC_INTERRUPT |
-   VM_ALLOC_WIRED | VM_ALLOC_ZERO);
+   VM_ALLOC_NOFREE | VM_ALLOC_WIRED | VM_ALLOC_ZERO);
if (nkpg == NULL)
panic("pmap_growkernel: no memory to grow 
kernel");
nkpg->pindex = kernel_vm_end >> L1_SHIFT;
@@ -2978,8 +2978,8 @@ pmap_growkernel(vm_offset_t addr)
continue;
}
 
-   nkpg = vm_page_alloc_noobj(VM_ALLOC_INTERRUPT | VM_ALLOC_WIRED |
-   VM_ALLOC_ZERO);
+   nkpg = vm_page_alloc_noobj(VM_ALLOC_INTERRUPT |
+   VM_ALLOC_NOFREE | VM_ALLOC_WIRED | VM_ALLOC_ZERO);
if (nkpg == NULL)
panic("pmap_growkernel: no memory to grow kernel");
nkpg->pindex = kernel_vm_end >> L2_SHIFT;
diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c
index 5808c31a99af..57ba48d399c3 100644
--- a/sys/i386/i386/pmap.c
+++ b/sys/i386/i386/pmap.c
@@ -2264,8 +2264,8 @@ __CONCAT(PMTYPE, growkernel)(vm_offset_t addr)
continue;
}
 
-   nkpg = vm_page_alloc_noobj(VM_ALLOC_INTERRUPT | VM_ALLOC_WIRED |
-   VM_ALLOC_ZERO);
+   nkpg = vm_page_alloc_noobj(VM_ALLOC_INTERRUPT |
+   VM_ALLOC_NOFREE | VM_ALLOC_WIRED | VM_ALLOC_ZERO);
if (nkpg ==

git: 1206cf04a717 - main - sys: Bump __FreeBSD_version to mark new malloc(9) flags

2024-07-30 Thread Bojan Novković
The branch main has been updated by bnovkov:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=1206cf04a717a55bc15255ed043b066941cb43f2

commit 1206cf04a717a55bc15255ed043b066941cb43f2
Author: Bojan Novković 
AuthorDate: 2024-07-30 14:41:22 +
Commit: Bojan Novković 
CommitDate: 2024-07-30 15:38:25 +

sys: Bump __FreeBSD_version to mark new malloc(9) flags

This bump is meant to cover the addition of the M_NEVERFREED flag.
---
 sys/sys/param.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sys/sys/param.h b/sys/sys/param.h
index 7461f7dec403..79d188c93230 100644
--- a/sys/sys/param.h
+++ b/sys/sys/param.h
@@ -73,7 +73,7 @@
  * cannot include sys/param.h and should only be updated here.
  */
 #undef __FreeBSD_version
-#define __FreeBSD_version 1500022
+#define __FreeBSD_version 1500023
 
 /*
  * __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD,



git: d9ce4c0b6617 - main - vm_page: Fix inconsistent use of VM_NOFREE_IMPORT_ORDER in vm_page_alloc_nofree_domain

2024-07-30 Thread Bojan Novković
The branch main has been updated by bnovkov:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=d9ce4c0b66170383a558b90ca835d31ee6d87927

commit d9ce4c0b66170383a558b90ca835d31ee6d87927
Author: Bojan Novković 
AuthorDate: 2024-07-30 17:17:18 +
Commit: Bojan Novković 
CommitDate: 2024-07-30 17:25:37 +

vm_page: Fix inconsistent use of VM_NOFREE_IMPORT_ORDER in 
vm_page_alloc_nofree_domain

Pass VM_NOFREE_IMPORT_ORDER to vm_phys_alloc_pages instead of
VM_LEVEL_0_ORDER when allocating a higher-order page for
the NOFREE page allocator.

Reported by:alc
Fixes:  a8693e8
---
 sys/vm/vm_page.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c
index ff9df7f4a9fc..0f41ea5a6bb5 100644
--- a/sys/vm/vm_page.c
+++ b/sys/vm/vm_page.c
@@ -2528,7 +2528,7 @@ vm_page_alloc_nofree_domain(int domain, int req)
return (NULL);
}
nqp->ma = vm_phys_alloc_pages(domain, VM_FREEPOOL_DEFAULT,
-   VM_LEVEL_0_ORDER);
+   VM_NOFREE_IMPORT_ORDER);
if (nqp->ma == NULL) {
vm_domain_freecnt_inc(vmd, 1 << VM_NOFREE_IMPORT_ORDER);
vm_domain_free_unlock(vmd);



git: 31cc65708c66 - main - man9: Document M_NEVERFREED and VM_ALLOC_NOFREE

2024-08-05 Thread Bojan Novković
The branch main has been updated by bnovkov:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=31cc65708c664c2a0257c26503d39ebc506f674e

commit 31cc65708c664c2a0257c26503d39ebc506f674e
Author: Bojan Novković 
AuthorDate: 2024-07-31 17:36:55 +
Commit: Bojan Novković 
CommitDate: 2024-08-05 16:41:22 +

man9: Document M_NEVERFREED and VM_ALLOC_NOFREE

Reviewed by:alc, kib, markj
Differential Revision:  https://reviews.freebsd.org/D46198
---
 share/man/man9/malloc.9| 11 ++-
 share/man/man9/vm_page_alloc.9 |  6 +-
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/share/man/man9/malloc.9 b/share/man/man9/malloc.9
index 0ab5b2ed6e8d..d8759a255492 100644
--- a/share/man/man9/malloc.9
+++ b/share/man/man9/malloc.9
@@ -28,7 +28,7 @@
 .\"
 .\" $NetBSD: malloc.9,v 1.3 1996/11/11 00:05:11 lukem Exp $
 .\"
-.Dd October 12, 2022
+.Dd August 4, 2024
 .Dt MALLOC 9
 .Os
 .Sh NAME
@@ -269,6 +269,15 @@ This option should only be used in combination with
 .Dv M_NOWAIT
 when an allocation failure cannot be tolerated by the caller without
 catastrophic effects on the system.
+.It Dv M_NEVERFREED
+This is an internal flag used by the
+.Xr UMA 9
+allocator and should not be used in regular
+.Fn malloc
+invocations.
+See the description of VM_ALLOC_NOFREE in
+.Xr vm_page_alloc 9
+for more details.
 .El
 .Pp
 Exactly one of either
diff --git a/share/man/man9/vm_page_alloc.9 b/share/man/man9/vm_page_alloc.9
index 7d6cf1692bb1..4bf8db33a28d 100644
--- a/share/man/man9/vm_page_alloc.9
+++ b/share/man/man9/vm_page_alloc.9
@@ -28,7 +28,7 @@
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
 .\" DAMAGE.
 .\"
-.Dd July 21, 2024
+.Dd August 4, 2024
 .Dt VM_PAGE_ALLOC 9
 .Os
 .Sh NAME
@@ -307,6 +307,10 @@ pages will be allocated by the caller in the near future.
 must be no larger than 65535.
 If the system is short of free pages, this hint may cause the kernel
 to reclaim memory more aggressively than it would otherwise.
+.It Dv VM_ALLOC_NOFREE
+The caller asserts that the returned page will never be released.
+If this flag is specified, the allocator will try to fetch a page from a
+special per-domain arena in order to curb long-term physical memory 
fragmentation.
 .El
 .Sh RETURN VALUES
 If the allocation was successful, a pointer to the



git: 06134ea2f38c - main - malloc(9): Check for M_NEVERFREED

2024-08-05 Thread Bojan Novković
The branch main has been updated by bnovkov:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=06134ea2f38ca214b53a1613e110e8332b2804e4

commit 06134ea2f38ca214b53a1613e110e8332b2804e4
Author: Bojan Novković 
AuthorDate: 2024-07-31 17:43:31 +
Commit: Bojan Novković 
CommitDate: 2024-08-05 16:44:10 +

malloc(9): Check for M_NEVERFREED

The recently introduced M_NEVERFREED flag is not meant to be used for
regular malloc requests. Enforce this by checking for M_NEVERFREED
in malloc_dbg.

Reviewed by:alc, kib, markj
Differential Revision:  https://reviews.freebsd.org/D46199
---
 sys/kern/kern_malloc.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sys/kern/kern_malloc.c b/sys/kern/kern_malloc.c
index 3c4cb63003c4..9d7e0464e0f7 100644
--- a/sys/kern/kern_malloc.c
+++ b/sys/kern/kern_malloc.c
@@ -542,6 +542,8 @@ malloc_dbg(caddr_t *vap, size_t *sizep, struct malloc_type 
*mtp,
once++;
}
}
+   KASSERT((flags & M_NEVERFREED) == 0,
+   ("malloc: M_NEVERFREED is for internal use only"));
 #endif
 #ifdef MALLOC_MAKE_FAILURES
if ((flags & M_NOWAIT) && (malloc_failure_rate != 0)) {



git: da76d349b6b1 - main - uma: Deduplicate uma_small_alloc

2024-05-25 Thread Bojan Novković
The branch main has been updated by bnovkov:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=da76d349b6b104f4e70562304c800a0793dea18d

commit da76d349b6b104f4e70562304c800a0793dea18d
Author: Bojan Novković 
AuthorDate: 2024-05-03 16:48:18 +
Commit: Bojan Novković 
CommitDate: 2024-05-25 17:24:46 +

uma: Deduplicate uma_small_alloc

This commit refactors the UMA small alloc code and
removes most UMA machine-dependent code.
The existing machine-dependent uma_small_alloc code is almost identical
across all architectures, except for powerpc where using the direct
map addresses involved extra steps in some cases.

The MI/MD split was replaced by a default uma_small_alloc
implementation that can be overridden by architecture-specific code by
defining the UMA_MD_SMALL_ALLOC symbol. Furthermore, UMA_USE_DMAP was
introduced to replace most UMA_MD_SMALL_ALLOC uses.

Reviewed by: markj, kib
Approved by: markj (mentor)
Differential Revision:  https://reviews.freebsd.org/D45084
---
 sys/amd64/amd64/uma_machdep.c | 71 ---
 sys/amd64/include/vmparam.h   |  6 ++--
 sys/arm64/arm64/uma_machdep.c | 69 -
 sys/arm64/include/vmparam.h   |  2 +-
 sys/conf/files.amd64  |  1 -
 sys/conf/files.arm64  |  1 -
 sys/conf/files.riscv  |  1 -
 sys/kern/subr_vmem.c  |  6 ++--
 sys/powerpc/include/vmparam.h |  6 ++--
 sys/riscv/include/vmparam.h   |  2 +-
 sys/riscv/riscv/uma_machdep.c | 68 -
 sys/vm/uma_core.c | 43 --
 sys/vm/vm_map.c   |  8 ++---
 sys/vm/vm_radix.c |  2 +-
 14 files changed, 57 insertions(+), 229 deletions(-)

diff --git a/sys/amd64/amd64/uma_machdep.c b/sys/amd64/amd64/uma_machdep.c
deleted file mode 100644
index f83f0674cc4e..
--- a/sys/amd64/amd64/uma_machdep.c
+++ /dev/null
@@ -1,71 +0,0 @@
-/*-
- * SPDX-License-Identifier: BSD-2-Clause
- *
- * Copyright (c) 2003 Alan L. Cox 
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *notice, this list of conditions and the following disclaimer in the
- *documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-
-void *
-uma_small_alloc(uma_zone_t zone, vm_size_t bytes, int domain, u_int8_t *flags,
-int wait)
-{
-   vm_page_t m;
-   vm_paddr_t pa;
-   void *va;
-
-   *flags = UMA_SLAB_PRIV;
-   m = vm_page_alloc_noobj_domain(domain, malloc2vm_flags(wait) |
-   VM_ALLOC_WIRED);
-   if (m == NULL)
-   return (NULL);
-   pa = m->phys_addr;
-   if ((wait & M_NODUMP) == 0)
-   dump_add_page(pa);
-   va = (void *)PHYS_TO_DMAP(pa);
-   return (va);
-}
-
-void
-uma_small_free(void *mem, vm_size_t size, u_int8_t flags)
-{
-   vm_page_t m;
-   vm_paddr_t pa;
-
-   pa = DMAP_TO_PHYS((vm_offset_t)mem);
-   dump_drop_page(pa);
-   m = PHYS_TO_VM_PAGE(pa);
-   vm_page_unwire_noq(m);
-   vm_page_free(m);
-}
diff --git a/sys/amd64/include/vmparam.h b/sys/amd64/include/vmparam.h
index bff9bf840036..e5155a7c7d47 100644
--- a/sys/amd64/include/vmparam.h
+++ b/sys/amd64/include/vmparam.h
@@ -72,12 +72,12 @@
 #endif
 
 /*
- * We provide a machine specific single page allocator through the use
- * of the direct mapped segment.  This uses 2MB pages for reduced
+ * We provide a single page allocator through the use of the
+ * direct mapped segment.  This uses 2MB pages for reduced
  * TLB pressure.
  */
 #if !defined(KASAN) && !defined(KMSAN)
-#defineUMA_MD_SMALL_ALLOC
+#define UMA_USE_DMAP
 #endif
 
 /*
dif

git: 0a44b8a56d23 - main - vm: Simplify startup page dumping conditional

2024-05-25 Thread Bojan Novković
The branch main has been updated by bnovkov:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=0a44b8a56d23e24b05471ddb038b7dd30b149efe

commit 0a44b8a56d23e24b05471ddb038b7dd30b149efe
Author: Bojan Novković 
AuthorDate: 2024-05-03 17:53:56 +
Commit: Bojan Novković 
CommitDate: 2024-05-25 17:24:55 +

vm: Simplify startup page dumping conditional

This commit introduces the MINIDUMP_STARTUP_PAGE_TRACKING symbol and
uses it to simplify several instances of a complex preprocessor conditional
for adding pages allocated when bootstraping the kernel to minidumps.

Reviewed by:markj, mhorne
Approved by:markj (mentor)
Differential Revision: https://reviews.freebsd.org/D45085
---
 sys/amd64/include/vmparam.h   |  3 ++-
 sys/arm/include/vmparam.h |  1 +
 sys/arm64/include/vmparam.h   |  1 +
 sys/i386/include/vmparam.h|  1 +
 sys/powerpc/include/vmparam.h |  2 ++
 sys/riscv/include/vmparam.h   |  1 +
 sys/vm/uma_core.c |  6 ++
 sys/vm/vm_page.c  | 12 +---
 8 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/sys/amd64/include/vmparam.h b/sys/amd64/include/vmparam.h
index e5155a7c7d47..93c2648e8fac 100644
--- a/sys/amd64/include/vmparam.h
+++ b/sys/amd64/include/vmparam.h
@@ -294,7 +294,8 @@
 /*
  * Need a page dump array for minidump.
  */
-#define MINIDUMP_PAGE_TRACKING 1
+#define MINIDUMP_PAGE_TRACKING1
+#define MINIDUMP_STARTUP_PAGE_TRACKING 1
 
 #endif /* _MACHINE_VMPARAM_H_ */
 
diff --git a/sys/arm/include/vmparam.h b/sys/arm/include/vmparam.h
index 4ad42cfbe16b..15807923cefb 100644
--- a/sys/arm/include/vmparam.h
+++ b/sys/arm/include/vmparam.h
@@ -200,5 +200,6 @@ extern vm_offset_t vm_max_kernel_address;
  * Need a page dump array for minidump.
  */
 #define MINIDUMP_PAGE_TRACKING 1
+#define MINIDUMP_STARTUP_PAGE_TRACKING 0
 
 #endif /* _MACHINE_VMPARAM_H_ */
diff --git a/sys/arm64/include/vmparam.h b/sys/arm64/include/vmparam.h
index 0dcd02d63938..0e93e4026d4a 100644
--- a/sys/arm64/include/vmparam.h
+++ b/sys/arm64/include/vmparam.h
@@ -317,6 +317,7 @@ extern vm_offset_t dmap_max_addr;
  * Need a page dump array for minidump.
  */
 #define MINIDUMP_PAGE_TRACKING 1
+#define MINIDUMP_STARTUP_PAGE_TRACKING 1
 
 #endif /* !_MACHINE_VMPARAM_H_ */
 
diff --git a/sys/i386/include/vmparam.h b/sys/i386/include/vmparam.h
index 0d9734ae3830..6f8885a539c3 100644
--- a/sys/i386/include/vmparam.h
+++ b/sys/i386/include/vmparam.h
@@ -246,5 +246,6 @@
  * Need a page dump array for minidump.
  */
 #define MINIDUMP_PAGE_TRACKING 1
+#define MINIDUMP_STARTUP_PAGE_TRACKING 0
 
 #endif /* _MACHINE_VMPARAM_H_ */
diff --git a/sys/powerpc/include/vmparam.h b/sys/powerpc/include/vmparam.h
index 250da8298610..a612acbef239 100644
--- a/sys/powerpc/include/vmparam.h
+++ b/sys/powerpc/include/vmparam.h
@@ -308,11 +308,13 @@ externint vm_level_0_order;
  * Need a page dump array for minidump.
  */
 #define MINIDUMP_PAGE_TRACKING 1
+#define MINIDUMP_STARTUP_PAGE_TRACKING 1
 #else
 /*
  * No minidump with 32-bit powerpc.
  */
 #define MINIDUMP_PAGE_TRACKING 0
+#define MINIDUMP_STARTUP_PAGE_TRACKING 0
 #endif
 
 #definePMAP_HAS_DMAP   (hw_direct_map)
diff --git a/sys/riscv/include/vmparam.h b/sys/riscv/include/vmparam.h
index 5711bc8c347e..5092977d0669 100644
--- a/sys/riscv/include/vmparam.h
+++ b/sys/riscv/include/vmparam.h
@@ -257,5 +257,6 @@ extern vm_offset_t init_pt_va;
  * Need a page dump array for minidump.
  */
 #define MINIDUMP_PAGE_TRACKING 1
+#define MINIDUMP_STARTUP_PAGE_TRACKING 1
 
 #endif /* !_MACHINE_VMPARAM_H_ */
diff --git a/sys/vm/uma_core.c b/sys/vm/uma_core.c
index f9b6e18899c6..59066eb96ae9 100644
--- a/sys/vm/uma_core.c
+++ b/sys/vm/uma_core.c
@@ -1890,8 +1890,7 @@ startup_alloc(uma_zone_t zone, vm_size_t bytes, int 
domain, uint8_t *pflag,
 
pa = VM_PAGE_TO_PHYS(m);
for (i = 0; i < pages; i++, pa += PAGE_SIZE) {
-#if defined(__aarch64__) || defined(__amd64__) || \
-defined(__riscv) || defined(__powerpc64__)
+#if MINIDUMP_PAGE_TRACKING && MINIDUMP_STARTUP_PAGE_TRACKING
if ((wait & M_NODUMP) == 0)
dump_add_page(pa);
 #endif
@@ -1918,8 +1917,7 @@ startup_free(void *mem, vm_size_t bytes)
if (va >= bootstart && va + bytes <= bootmem)
pmap_remove(kernel_pmap, va, va + bytes);
for (; bytes != 0; bytes -= PAGE_SIZE, m++) {
-#if defined(__aarch64__) || defined(__amd64__) || \
-defined(__riscv) || defined(__powerpc64__)
+#if MINIDUMP_PAGE_TRACKING && MINIDUMP_STARTUP_PAGE_TRACKING
dump_drop_page(VM_PAGE_TO_PHYS(m));
 #endif
vm_page_unwire_noq(m);
diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c
index 4429ec067673..9ba31cb9e1b3 100644
--- a/sys/vm/vm_page.c
+++ b/sys/vm/vm_page.c
@@ -620,11 +620,7 @@ vm_page_startup(vm_offset_t vaddr)
vm_page_dump = (void *)(uintptr_t)pmap_map(&vaddr, new_end,

git: d25ed6504383 - main - uma: Fix improper uses of UMA_MD_SMALL_ALLOC

2024-05-26 Thread Bojan Novković
The branch main has been updated by bnovkov:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=d25ed6504383e15b7eb6d04876b70548fffc9690

commit d25ed6504383e15b7eb6d04876b70548fffc9690
Author: Bojan Novković 
AuthorDate: 2024-05-26 07:54:45 +
Commit: Bojan Novković 
CommitDate: 2024-05-26 05:27:37 +

uma: Fix improper uses of UMA_MD_SMALL_ALLOC

UMA_MD_SMALL_ALLOC was recently replaced by UMA_USE_DMAP, but
da76d349b6b1 missed some improper uses of the old symbol.
This change makes sure that UMA_USE_DMAP is used properly in
code that selects uma_small_alloc.

Fixes: da76d349b6b1
Reported by: eduardo, rlibby
Approved by: markj (mentor)
Differential Revision:  https://reviews.freebsd.org/D45368
---
 sys/vm/uma_core.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sys/vm/uma_core.c b/sys/vm/uma_core.c
index 59066eb96ae9..516ac2c2965a 100644
--- a/sys/vm/uma_core.c
+++ b/sys/vm/uma_core.c
@@ -2523,7 +2523,7 @@ keg_ctor(void *mem, int size, void *udata, int flags)
 * If we haven't booted yet we need allocations to go through the
 * startup cache until the vm is ready.
 */
-#ifdef UMA_MD_SMALL_ALLOC
+#ifdef UMA_USE_DMAP
if (keg->uk_ppera == 1)
keg->uk_allocf = uma_small_alloc;
else
@@ -2536,7 +2536,7 @@ keg_ctor(void *mem, int size, void *udata, int flags)
keg->uk_allocf = contig_alloc;
else
keg->uk_allocf = page_alloc;
-#ifdef UMA_MD_SMALL_ALLOC
+#ifdef UMA_USE_DMAP
if (keg->uk_ppera == 1)
keg->uk_freef = uma_small_free;
else
@@ -5221,7 +5221,7 @@ uma_zone_reserve_kva(uma_zone_t zone, int count)
keg->uk_kva = kva;
keg->uk_offset = 0;
zone->uz_max_items = pages * keg->uk_ipers;
-#ifdef UMA_MD_SMALL_ALLOC
+#ifdef UMA_USE_DMAP
keg->uk_allocf = (keg->uk_ppera > 1) ? noobj_alloc : uma_small_alloc;
 #else
keg->uk_allocf = noobj_alloc;



git: 4c053c17f2c8 - main - zfs: Update use of UMA-related symbols in arc_available_memory

2024-05-27 Thread Bojan Novković
The branch main has been updated by bnovkov:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=4c053c17f2c8a715988f215d16284879857ca376

commit 4c053c17f2c8a715988f215d16284879857ca376
Author: Bojan Novković 
AuthorDate: 2024-05-27 13:28:03 +
Commit: Bojan Novković 
CommitDate: 2024-05-27 13:47:17 +

zfs: Update use of UMA-related symbols in arc_available_memory

da76d34 repurposed the use of UMA_MD_SMALL_ALLOC in a way that breaks
arc_available_memory on -CURRENT. This change ensures that
arc_available_memory uses the new symbol while maintaining compatibility
with older FreeBSD releases. This change was submitted to upstream
as well.

Approved by:markj (mentor)
Fixes:  da76d34
---
 sys/contrib/openzfs/module/os/freebsd/zfs/arc_os.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/arc_os.c 
b/sys/contrib/openzfs/module/os/freebsd/zfs/arc_os.c
index 92696c0bf1ae..478b74828c65 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/arc_os.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/arc_os.c
@@ -89,7 +89,7 @@ arc_available_memory(void)
if (n < lowest) {
lowest = n;
}
-#if defined(__i386) || !defined(UMA_MD_SMALL_ALLOC)
+#if !defined(UMA_MD_SMALL_ALLOC) && !defined(UMA_USE_DMAP)
/*
 * If we're on an i386 platform, it's possible that we'll exhaust the
 * kernel heap space before we ever run out of available physical



git: b53b21e8f81a - main - amd64 pmap: Release PTP reference on leaf ptpage allocation failure

2024-06-16 Thread Bojan Novković
The branch main has been updated by bnovkov:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=b53b21e8f81a8d2d233b99cee6426c2f64765a3c

commit b53b21e8f81a8d2d233b99cee6426c2f64765a3c
Author: Bojan Novković 
AuthorDate: 2024-06-13 15:58:49 +
Commit: Bojan Novković 
CommitDate: 2024-06-16 16:19:26 +

amd64 pmap: Release PTP reference on leaf ptpage allocation failure

aa3bcaa fixed an edge case invloving mlock() and superpage creation
by creating and inserting a leaf pagetable page for mlock'd superpages.
However, the code does not properly release the reference to the
pagetable page in the error handling path.
This commit fixes the issue by adding calls to 'pmap_abort_ptp'
in the error handling path.

Reported by: alc
Approved by: markj (mentor)
Fixes: aa3bcaa
Differential Revision: https://reviews.freebsd.org/D45577
---
 sys/amd64/amd64/pmap.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index 4d4ecc8ea4e2..dee208fc9145 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -7595,10 +7595,13 @@ pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t 
newpde, u_int flags,
if ((newpde & PG_W) != 0 && pmap != kernel_pmap) {
uwptpg = pmap_alloc_pt_page(pmap, pmap_pde_pindex(va),
VM_ALLOC_WIRED);
-   if (uwptpg == NULL)
+   if (uwptpg == NULL) {
+   pmap_abort_ptp(pmap, va, pdpg);
return (KERN_RESOURCE_SHORTAGE);
+   }
if (pmap_insert_pt_page(pmap, uwptpg, true, false)) {
pmap_free_pt_page(pmap, uwptpg, false);
+   pmap_abort_ptp(pmap, va, pdpg);
return (KERN_RESOURCE_SHORTAGE);
}
 



git: 5d4545a2270e - main - arm64 pmap: Release PTP reference on leaf ptpage allocation failure

2024-06-16 Thread Bojan Novković
The branch main has been updated by bnovkov:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=5d4545a2270e9d6c37c0a580c010c579ccdfa129

commit 5d4545a2270e9d6c37c0a580c010c579ccdfa129
Author: Bojan Novković 
AuthorDate: 2024-06-13 15:59:29 +
Commit: Bojan Novković 
CommitDate: 2024-06-16 16:19:26 +

arm64 pmap: Release PTP reference on leaf ptpage allocation failure

808f5ac fixed an edge case invloving mlock() and superpage creation
by creating and inserting a leaf pagetable page for mlock'd superpages.
However, the code does not properly release the reference to the
pagetable page in the error handling path.
This commit fixes the issue by adding calls to 'pmap_abort_ptp'
in the error handling path.

Reported by: alc
Approved by: markj (mentor)
Fixes: 808f5ac
Differential Revision:  https://reviews.freebsd.org/D45578
---
 sys/arm64/arm64/pmap.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index 7b30b2a6ae37..e8991ae706f9 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -5545,12 +5545,14 @@ pmap_enter_l2(pmap_t pmap, vm_offset_t va, pd_entry_t 
new_l2, u_int flags,
if ((new_l2 & ATTR_SW_WIRED) != 0 && pmap != kernel_pmap) {
uwptpg = vm_page_alloc_noobj(VM_ALLOC_WIRED);
if (uwptpg == NULL) {
+   pmap_abort_ptp(pmap, va, l2pg);
return (KERN_RESOURCE_SHORTAGE);
}
uwptpg->pindex = pmap_l2_pindex(va);
if (pmap_insert_pt_page(pmap, uwptpg, true, false)) {
vm_page_unwire_noq(uwptpg);
vm_page_free(uwptpg);
+   pmap_abort_ptp(pmap, va, l2pg);
return (KERN_RESOURCE_SHORTAGE);
}
pmap_resident_count_inc(pmap, 1);



git: 200de4dc0716 - main - powerpc_mmu_radix: Introduce 'pmap_abort_ptp'

2024-06-16 Thread Bojan Novković
The branch main has been updated by bnovkov:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=200de4dc0716ae2cc2b0995ccb6eee555d14c751

commit 200de4dc0716ae2cc2b0995ccb6eee555d14c751
Author: Bojan Novković 
AuthorDate: 2024-06-13 16:03:31 +
Commit: Bojan Novković 
CommitDate: 2024-06-16 16:19:26 +

powerpc_mmu_radix: Introduce 'pmap_abort_ptp'

This commit moves code for releasing pagetable page references
into a separate function. No functional change intended.

Approved by: markj (mentor)
Differential Revision:  https://reviews.freebsd.org/D45581
---
 sys/powerpc/aim/mmu_radix.c | 34 +++---
 1 file changed, 23 insertions(+), 11 deletions(-)

diff --git a/sys/powerpc/aim/mmu_radix.c b/sys/powerpc/aim/mmu_radix.c
index 0a534e3f9d3f..746b1ef49a99 100644
--- a/sys/powerpc/aim/mmu_radix.c
+++ b/sys/powerpc/aim/mmu_radix.c
@@ -3137,6 +3137,28 @@ out:
return (rv);
 }
 
+/*
+ * Release a page table page reference after a failed attempt to create a
+ * mapping.
+ */
+static void
+pmap_abort_ptp(pmap_t pmap, vm_offset_t va, vm_page_t pdpg)
+{
+   struct spglist free;
+
+   SLIST_INIT(&free);
+   if (pmap_unwire_ptp(pmap, va, pdpg, &free)) {
+   /*
+* Although "va" is not mapped, paging-
+* structure caches could nonetheless have
+* entries that refer to the freed page table
+* pages.  Invalidate those entries.
+*/
+   pmap_invalidate_page(pmap, va);
+   vm_page_free_pages_toq(&free, true);
+   }
+}
+
 /*
  * Tries to create a read- and/or execute-only 2MB page mapping.  Returns true
  * if successful.  Returns false if (1) a page table page cannot be allocated
@@ -3264,17 +3286,7 @@ pmap_enter_l3e(pmap_t pmap, vm_offset_t va, pml3_entry_t 
newpde, u_int flags,
 * Abort this mapping if its PV entry could not be created.
 */
if (!pmap_pv_insert_l3e(pmap, va, newpde, flags, lockp)) {
-   SLIST_INIT(&free);
-   if (pmap_unwire_ptp(pmap, va, pdpg, &free)) {
-   /*
-* Although "va" is not mapped, paging-
-* structure caches could nonetheless have
-* entries that refer to the freed page table
-* pages.  Invalidate those entries.
-*/
-   pmap_invalidate_page(pmap, va);
-   vm_page_free_pages_toq(&free, true);
-   }
+   pmap_abort_ptp(pmap, va, pdpg);
if (uwptpg != NULL) {
mt = pmap_remove_pt_page(pmap, va);
KASSERT(mt == uwptpg,



git: 858ead4bcefb - main - powerpc_mmu_radix: Release PTP reference on leaf ptpage allocation failure

2024-06-16 Thread Bojan Novković
The branch main has been updated by bnovkov:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=858ead4bcefb4657629cba29b0e4507db509ee36

commit 858ead4bcefb4657629cba29b0e4507db509ee36
Author: Bojan Novković 
AuthorDate: 2024-06-13 16:11:12 +
Commit: Bojan Novković 
CommitDate: 2024-06-16 16:19:27 +

powerpc_mmu_radix: Release PTP reference on leaf ptpage allocation failure

0013741 fixed an edge case invloving mlock() and superpage creation
by creating and inserting a leaf pagetable page for mlock'd superpages.
However, the code does not properly release the reference to the
pagetable page in the error handling path.
This commit fixes the issue by adding calls to 'pmap_abort_ptp'
in the error handling path.

Reported by: alc
Approved by: markj (mentor)
Fixes: 0013741
Differential Revision: https://reviews.freebsd.org/D45582
---
 sys/powerpc/aim/mmu_radix.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/sys/powerpc/aim/mmu_radix.c b/sys/powerpc/aim/mmu_radix.c
index 746b1ef49a99..ae6e4d116e87 100644
--- a/sys/powerpc/aim/mmu_radix.c
+++ b/sys/powerpc/aim/mmu_radix.c
@@ -3268,12 +3268,15 @@ pmap_enter_l3e(pmap_t pmap, vm_offset_t va, 
pml3_entry_t newpde, u_int flags,
uwptpg = NULL;
if ((newpde & PG_W) != 0 && pmap != kernel_pmap) {
uwptpg = vm_page_alloc_noobj(VM_ALLOC_WIRED);
-   if (uwptpg == NULL)
+   if (uwptpg == NULL) {
+   pmap_abort_ptp(pmap, va, pdpg);
return (KERN_RESOURCE_SHORTAGE);
+   }
uwptpg->pindex = pmap_l3e_pindex(va);
if (pmap_insert_pt_page(pmap, uwptpg)) {
vm_page_unwire_noq(uwptpg);
vm_page_free(uwptpg);
+   pmap_abort_ptp(pmap, va, pdpg);
return (KERN_RESOURCE_SHORTAGE);
}
pmap_resident_count_inc(pmap, 1);



git: e8816b4b66ad - main - riscv pmap: Introduce 'pmap_abort_ptp'

2024-06-16 Thread Bojan Novković
The branch main has been updated by bnovkov:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=e8816b4b66adf2e6052803cd0eb609ee63fbb3ed

commit e8816b4b66adf2e6052803cd0eb609ee63fbb3ed
Author: Bojan Novković 
AuthorDate: 2024-06-13 16:13:53 +
Commit: Bojan Novković 
CommitDate: 2024-06-16 16:19:27 +

riscv pmap: Introduce 'pmap_abort_ptp'

This commit moves code for releasing pagetable page references
into a separate function. No functional change intended.

Approved by: markj (mentor)
Differential Revision: https://reviews.freebsd.org/D45579
---
 sys/riscv/riscv/pmap.c | 34 +++---
 1 file changed, 23 insertions(+), 11 deletions(-)

diff --git a/sys/riscv/riscv/pmap.c b/sys/riscv/riscv/pmap.c
index 1902f1f4009b..4f6305ed651d 100644
--- a/sys/riscv/riscv/pmap.c
+++ b/sys/riscv/riscv/pmap.c
@@ -3137,6 +3137,28 @@ out:
return (rv);
 }
 
+/*
+ * Release a page table page reference after a failed attempt to create a
+ * mapping.
+ */
+static void
+pmap_abort_ptp(pmap_t pmap, vm_offset_t va, vm_page_t l2pg)
+{
+   struct spglist free;
+
+   SLIST_INIT(&free);
+   if (pmap_unwire_ptp(pmap, va, l2pg, &free)) {
+   /*
+* Although "va" is not mapped, paging-structure
+* caches could nonetheless have entries that
+* refer to the freed page table pages.
+* Invalidate those entries.
+*/
+   pmap_invalidate_page(pmap, va);
+   vm_page_free_pages_toq(&free, true);
+   }
+}
+
 /*
  * Tries to create a read- and/or execute-only 2MB page mapping.  Returns
  * KERN_SUCCESS if the mapping was created.  Otherwise, returns an error
@@ -3285,17 +3307,7 @@ pmap_enter_l2(pmap_t pmap, vm_offset_t va, pd_entry_t 
new_l2, u_int flags,
 * Abort this mapping if its PV entry could not be created.
 */
if (!pmap_pv_insert_l2(pmap, va, new_l2, flags, lockp)) {
-   SLIST_INIT(&free);
-   if (pmap_unwire_ptp(pmap, va, l2pg, &free)) {
-   /*
-* Although "va" is not mapped, paging-structure
-* caches could nonetheless have entries that
-* refer to the freed page table pages.
-* Invalidate those entries.
-*/
-   pmap_invalidate_page(pmap, va);
-   vm_page_free_pages_toq(&free, true);
-   }
+   pmap_abort_ptp(pmap, va, l2pg);
if (uwptpg != NULL) {
mt = pmap_remove_pt_page(pmap, va);
KASSERT(mt == uwptpg,



git: 774549fe06ac - main - riscv pmap: Release PTP reference on leaf ptpage allocation failure

2024-06-16 Thread Bojan Novković
The branch main has been updated by bnovkov:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=774549fe06ac0f45a5a5a661a7fb4107a8695d4c

commit 774549fe06ac0f45a5a5a661a7fb4107a8695d4c
Author: Bojan Novković 
AuthorDate: 2024-06-13 16:14:21 +
Commit: Bojan Novković 
CommitDate: 2024-06-16 16:19:27 +

riscv pmap: Release PTP reference on leaf ptpage allocation failure

d0941ed fixed an edge case invloving mlock() and superpage creation
by creating and inserting a leaf pagetable page for mlock'd superpages.
However, the code does not properly release the reference to the
pagetable page in the error handling path.
This commit fixes the issue by adding calls to 'pmap_abort_ptp'
in the error handling path.

Reported by: alc
Approved by: markj (mentor)
Fixes: d0941ed
Differential Revision: https://reviews.freebsd.org/D45580
---
 sys/riscv/riscv/pmap.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sys/riscv/riscv/pmap.c b/sys/riscv/riscv/pmap.c
index 4f6305ed651d..1e4061935ca0 100644
--- a/sys/riscv/riscv/pmap.c
+++ b/sys/riscv/riscv/pmap.c
@@ -3291,12 +3291,14 @@ pmap_enter_l2(pmap_t pmap, vm_offset_t va, pd_entry_t 
new_l2, u_int flags,
if ((new_l2 & PTE_SW_WIRED) != 0 && pmap != kernel_pmap) {
uwptpg = vm_page_alloc_noobj(VM_ALLOC_WIRED);
if (uwptpg == NULL) {
+   pmap_abort_ptp(pmap, va, l2pg);
return (KERN_RESOURCE_SHORTAGE);
}
uwptpg->pindex = pmap_l2_pindex(va);
if (pmap_insert_pt_page(pmap, uwptpg, true, false)) {
vm_page_unwire_noq(uwptpg);
vm_page_free(uwptpg);
+   pmap_abort_ptp(pmap, va, l2pg);
return (KERN_RESOURCE_SHORTAGE);
}
pmap_resident_count_inc(pmap, 1);



git: 7a79d0669761 - main - vm: improve kstack_object pindex calculation to avoid pindex holes

2024-04-10 Thread Bojan Novković
The branch main has been updated by bnovkov:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=7a79d066976149349ecb90240d02eed0c4268737

commit 7a79d066976149349ecb90240d02eed0c4268737
Author: Bojan Novković 
AuthorDate: 2024-04-09 19:02:12 +
Commit: Bojan Novković 
CommitDate: 2024-04-10 15:37:20 +

vm: improve kstack_object pindex calculation to avoid pindex holes

This commit replaces the linear transformation of kernel virtual
addresses to kstack_object pindex values with a non-linear
scheme that circumvents physical memory fragmentation caused by
kernel stack guard pages. The new mapping scheme is used to
effectively "skip" guard pages and assign pindices for
non-guard pages in a contiguous fashion.

The new allocation scheme requires that all default-sized kstack KVAs
come from a separate, specially aligned region of the KVA space.
For this to work, this commited introduces a dedicated per-domain
kstack KVA arena used to allocate kernel stacks of default size.
The behaviour on 32-bit platforms remains unchanged due to a
significatly smaller KVA space.

Aside from fullfilling the requirements imposed by the new scheme, a
separate kstack KVA arena facilitates superpage promotion in the rest
of kernel and causes most kstacks to have guard pages at both ends.

Reviewed by:  alc, kib, markj
Tested by:markj
Approved by:  markj (mentor)
Differential Revision: https://reviews.freebsd.org/D38852
---
 sys/sys/proc.h  |   3 +-
 sys/vm/vm_extern.h  |   6 +-
 sys/vm/vm_glue.c| 336 +---
 sys/vm/vm_kern.h|   2 -
 sys/vm/vm_swapout.c |  29 +++--
 5 files changed, 314 insertions(+), 62 deletions(-)

diff --git a/sys/sys/proc.h b/sys/sys/proc.h
index b08226c89dfd..fa4c7d2768f0 100644
--- a/sys/sys/proc.h
+++ b/sys/sys/proc.h
@@ -366,7 +366,8 @@ struct thread {
struct callout  td_slpcallout;  /* (h) Callout for sleep. */
struct trapframe *td_frame; /* (k) */
vm_offset_t td_kstack;  /* (a) Kernel VA of kstack. */
-   int td_kstack_pages; /* (a) Size of the kstack. */
+   u_short td_kstack_pages;/* (a) Size of the kstack. */
+   u_short td_kstack_domain;   /* (a) Domain backing kstack 
KVA. */
volatile u_int  td_critnest;/* (k*) Critical section nest level. */
struct mdthread td_md;  /* (k) Any machine-dependent fields. */
struct kaudit_record*td_ar; /* (k) Active audit record, if any. */
diff --git a/sys/vm/vm_extern.h b/sys/vm/vm_extern.h
index 2e2bc18a0233..b50abab7380c 100644
--- a/sys/vm/vm_extern.h
+++ b/sys/vm/vm_extern.h
@@ -127,8 +127,10 @@ struct sf_buf *vm_imgact_map_page(vm_object_t object, 
vm_ooffset_t offset);
 void vm_imgact_unmap_page(struct sf_buf *sf);
 void vm_thread_dispose(struct thread *td);
 int vm_thread_new(struct thread *td, int pages);
-void vm_thread_stack_back(struct domainset *ds, vm_offset_t kaddr,
-vm_page_t ma[], int npages, int req_class);
+vm_pindex_t vm_kstack_pindex(vm_offset_t ks, int npages);
+vm_object_t vm_thread_kstack_size_to_obj(int npages);
+int vm_thread_stack_back(vm_offset_t kaddr, vm_page_t ma[], int npages,
+int req_class, int domain);
 u_int vm_active_count(void);
 u_int vm_inactive_count(void);
 u_int vm_laundry_count(void);
diff --git a/sys/vm/vm_glue.c b/sys/vm/vm_glue.c
index 100d31e7c4ec..4292a7533503 100644
--- a/sys/vm/vm_glue.c
+++ b/sys/vm/vm_glue.c
@@ -96,14 +96,23 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
 #include 
 #include 
+#include 
 
 #include 
 
+#if VM_NRESERVLEVEL > 0
+#define KVA_KSTACK_QUANTUM_SHIFT (VM_LEVEL_0_ORDER + PAGE_SHIFT)
+#else
+#define KVA_KSTACK_QUANTUM_SHIFT (8 + PAGE_SHIFT)
+#endif
+#define KVA_KSTACK_QUANTUM (1ul << KVA_KSTACK_QUANTUM_SHIFT)
+
 /*
  * MPSAFE
  *
@@ -262,9 +271,11 @@ vm_sync_icache(vm_map_t map, vm_offset_t va, vm_offset_t 
sz)
pmap_sync_icache(map->pmap, va, sz);
 }
 
-vm_object_t kstack_object;
+static vm_object_t kstack_object;
+static vm_object_t kstack_alt_object;
 static uma_zone_t kstack_cache;
 static int kstack_cache_size;
+static vmem_t *vmd_kstack_arena[MAXMEMDOM];
 
 static int
 sysctl_kstack_cache_size(SYSCTL_HANDLER_ARGS)
@@ -282,63 +293,218 @@ SYSCTL_PROC(_vm, OID_AUTO, kstack_cache_size,
 sysctl_kstack_cache_size, "IU", "Maximum number of cached kernel stacks");
 
 /*
- * Create the kernel stack (including pcb for i386) for a new thread.
+ * Allocate a virtual address range from a domain kstack arena, following
+ * the specified NUMA policy.
  */
 static vm_offset_t
-vm_thread_stack_create(struct domainset *ds, int pages)
+vm_thread_alloc_kstack_kva(vm_size_t size, int domain)
 {
-   vm_page_t ma[KSTACK_MAX_PAGES];
-   vm_offset_t ks;
-   int i;
+#ifndef __ILP32__
+   int rv;
+   v

git: 849599e28a87 - main - committers-src: Add bnovkov@ with markj@ and jhb@ as mentors

2024-02-19 Thread Bojan Novković
The branch main has been updated by bnovkov:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=849599e28a8708b137be2c8ed2c7ca114d0caf1e

commit 849599e28a8708b137be2c8ed2c7ca114d0caf1e
Author: Bojan Novković 
AuthorDate: 2024-02-19 15:55:31 +
Commit: Bojan Novković 
CommitDate: 2024-02-19 15:55:31 +

committers-src: Add bnovkov@ with markj@ and jhb@ as mentors

Add mentorship information for bnovkov@.

Approved by:  markj (mentor)
Differential Revision: https://reviews.freebsd.org/D43963
---
 share/misc/committers-src.dot | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/share/misc/committers-src.dot b/share/misc/committers-src.dot
index 6201c0c69894..d9fd06cd6597 100644
--- a/share/misc/committers-src.dot
+++ b/share/misc/committers-src.dot
@@ -131,6 +131,7 @@ bdrewery [label="Bryan 
Drewery\nbdrew...@freebsd.org\n2013/12/14"]
 benl [label="Ben Laurie\nb...@freebsd.org\n2011/05/18"]
 benno [label="Benno Rice\nbe...@freebsd.org\n2000/11/02"]
 bms [label="Bruce M Simpson\n...@freebsd.org\n2003/08/06"]
+bnovkov [label="Bojan Novkovic\nbnov...@freebsd.org\n2024/02/13"]
 br [label="Ruslan Bukin\n...@freebsd.org\n2013/09/02"]
 brian [label="Brian Somers\nbr...@freebsd.org\n1996/12/16"]
 brooks [label="Brooks Davis\nbro...@freebsd.org\n2001/06/21"]
@@ -636,6 +637,7 @@ jfv -> erj
 
 jhb -> arr
 jhb -> avg
+jhb -> bnovkov
 jhb -> jch
 jhb -> jeff
 jhb -> kbyanc
@@ -750,6 +752,7 @@ marcel -> marius
 marcel -> nwhitehorn
 marcel -> sjg
 
+markj -> bnovkov
 markj -> cem
 markj -> christos
 markj -> dougm



git: c21bc6f3c242 - main - ddb: Add CTF-based pretty printing

2024-03-21 Thread Bojan Novković
The branch main has been updated by bnovkov:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=c21bc6f3c2425de74141bfee07b609bf65b5a6b3

commit c21bc6f3c2425de74141bfee07b609bf65b5a6b3
Author: Bojan Novković 
AuthorDate: 2024-03-22 03:01:34 +
Commit: Bojan Novković 
CommitDate: 2024-03-22 03:03:33 +

ddb: Add CTF-based pretty printing

Add basic CTF support and a CTF-powered pretty-printer to ddb.

The db_ctf.* files expose a basic interface for fetching type
data for ELF symbols, interacting with the CTF string table,
and translating type identifiers to type data.

The db_pprint.c file uses those interfaces to implement
a pretty-printer for all kernel ELF symbols.
The pretty-printer works with symbol names and arbitrary addresses:
pprint struct thread 0x8194ad90

Pretty-printing currently only works after the root filesystem
gets mounted because the CTF info is not available during
early boot.

Differential Revision:  https://reviews.freebsd.org/D37899
Approved by: markj (mentor)
---
 share/man/man4/ddb.4|  26 +++
 sys/conf/files  |   2 +
 sys/ddb/db_command.c|   1 +
 sys/ddb/db_ctf.c| 326 +++
 sys/ddb/db_ctf.h|  64 +++
 sys/ddb/db_pprint.c | 450 
 sys/ddb/ddb.h   |   1 +
 sys/kern/kern_ctf.c |  40 +
 sys/kern/kern_linker.c  |  68 +++-
 sys/kern/link_elf.c |  37 
 sys/kern/link_elf_obj.c |  14 ++
 sys/kern/linker_if.m|  23 +++
 sys/sys/linker.h|   3 +
 13 files changed, 1054 insertions(+), 1 deletion(-)

diff --git a/share/man/man4/ddb.4 b/share/man/man4/ddb.4
index 3648c9ca58cb..f3443cbac127 100644
--- a/share/man/man4/ddb.4
+++ b/share/man/man4/ddb.4
@@ -289,6 +289,32 @@ eax = xx
 ecx = yy
 .Ed
 .Pp
+.It Ic pprint Ns Oo Li / Ns Cm d depth Oc Oo Ar name Oc
+Pretty-print symbol specified by
+.Ar name
+using CTF debugging data. Works for all symbols exported by the kernel and 
loaded kernel modules.
+.Pp
+If the
+.Cm d
+modifier has been specified, contents of structs nested up to
+.Ar depth
+levels deep will also be included in the output.
+.Ed
+.Pp
+.It Ic pprint struct Ns Oo Li / Ns Cm d depth Ic Oc Oo Ar name Oc Ns Op Ns Ar 
addr
+Print memory at
+.Ar addr
+as struct
+.Ar name Ns .
+Works for all structs defined by the kernel and loaded kernel modules.
+.Pp
+If the
+.Cm d
+modifier has been specified, contents of structs nested up to
+.Ar depth
+levels deep will also be included in the output.
+.Ed
+.Pp
 .It Xo
 .Ic write Ns Op Li / Ns Cm bhl
 .Ar addr expr1 Op Ar expr2 ...
diff --git a/sys/conf/files b/sys/conf/files
index c902bcfdbd52..021829408c0f 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -718,12 +718,14 @@ ddb/db_access.c   optional ddb
 ddb/db_break.c optional ddb
 ddb/db_capture.c   optional ddb
 ddb/db_command.c   optional ddb
+ddb/db_ctf.c   optional ddb
 ddb/db_examine.c   optional ddb
 ddb/db_expr.c  optional ddb
 ddb/db_input.c optional ddb
 ddb/db_lex.c   optional ddb
 ddb/db_main.c  optional ddb
 ddb/db_output.coptional ddb
+ddb/db_pprint.coptional ddb
 ddb/db_print.c optional ddb
 ddb/db_ps.coptional ddb
 ddb/db_run.c   optional ddb
diff --git a/sys/ddb/db_command.c b/sys/ddb/db_command.c
index 9d79e3b2a6d3..0c88d496f6b8 100644
--- a/sys/ddb/db_command.c
+++ b/sys/ddb/db_command.c
@@ -163,6 +163,7 @@ static struct db_command db_cmds[] = {
DB_CMD("capture",   db_capture_cmd, CS_OWN|DB_CMD_MEMSAFE),
DB_CMD("textdump",  db_textdump_cmd,CS_OWN|DB_CMD_MEMSAFE),
DB_CMD("findstack", db_findstack_cmd,   0),
+   DB_CMD("pprint",db_pprint_cmd,  CS_OWN),
 };
 struct db_command_table db_cmd_table = LIST_HEAD_INITIALIZER(db_cmd_table);
 
diff --git a/sys/ddb/db_ctf.c b/sys/ddb/db_ctf.c
new file mode 100644
index ..03145064885c
--- /dev/null
+++ b/sys/ddb/db_ctf.c
@@ -0,0 +1,326 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2023 Bojan Novković 
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *notice, this list of conditions and the following disclaimer in the
+ *documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS 

git: 637e67e03290 - main - ddb: Drop obsolete -FreeBSD identifier from license

2024-03-28 Thread Bojan Novković
The branch main has been updated by bnovkov:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=637e67e0329058c86353dbe523740e34d8fefd11

commit 637e67e0329058c86353dbe523740e34d8fefd11
Author: Bojan Novković 
AuthorDate: 2024-03-28 19:32:52 +
Commit: Bojan Novković 
CommitDate: 2024-03-28 19:32:52 +

ddb: Drop obsolete -FreeBSD identifier from license

Reported by:jrtc27
Fixes:  c21bc6f3c242 ("ddb: Add CTF-based pretty printing")
Approved by:markj (mentor)
---
 sys/ddb/db_ctf.c| 2 +-
 sys/ddb/db_ctf.h| 2 +-
 sys/ddb/db_pprint.c | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/sys/ddb/db_ctf.c b/sys/ddb/db_ctf.c
index 03145064885c..56a6086849e9 100644
--- a/sys/ddb/db_ctf.c
+++ b/sys/ddb/db_ctf.c
@@ -1,5 +1,5 @@
 /*-
- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ * SPDX-License-Identifier: BSD-2-Clause
  *
  * Copyright (c) 2023 Bojan Novković 
  *
diff --git a/sys/ddb/db_ctf.h b/sys/ddb/db_ctf.h
index 6da5f76b6cf6..c4c977cb8205 100644
--- a/sys/ddb/db_ctf.h
+++ b/sys/ddb/db_ctf.h
@@ -1,5 +1,5 @@
 /*-
- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ * SPDX-License-Identifier: BSD-2-Clause
  *
  * Copyright (c) 2023 Bojan Novković 
  *
diff --git a/sys/ddb/db_pprint.c b/sys/ddb/db_pprint.c
index aae3d698e8ec..8aa14550f068 100644
--- a/sys/ddb/db_pprint.c
+++ b/sys/ddb/db_pprint.c
@@ -1,5 +1,5 @@
 /*-
- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ * SPDX-License-Identifier: BSD-2-Clause
  *
  * Copyright (c) 2022 Bojan Novković 
  *



git: 722b8e3cb62b - main - Fix style nits in kern_linker.c

2024-03-28 Thread Bojan Novković
The branch main has been updated by bnovkov:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=722b8e3cb62bd3e43035527e08fe058d5046901d

commit 722b8e3cb62bd3e43035527e08fe058d5046901d
Author: Bojan Novković 
AuthorDate: 2024-03-28 19:36:30 +
Commit: Bojan Novković 
CommitDate: 2024-03-28 19:36:30 +

Fix style nits in kern_linker.c

Reported by:jrtc27
Fixes:  c21bc6f3c242 ("ddb: Add CTF-based pretty printing")
Approved by:markj (mentor)
---
 sys/kern/link_elf.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sys/kern/link_elf.c b/sys/kern/link_elf.c
index dddead849dc9..b08c19f3c018 100644
--- a/sys/kern/link_elf.c
+++ b/sys/kern/link_elf.c
@@ -147,7 +147,6 @@ static int  link_elf_lookup_debug_symbol(linker_file_t, 
const char *,
c_linker_sym_t *);
 static int link_elf_lookup_debug_symbol_ctf(linker_file_t lf,
const char *name, c_linker_sym_t *sym, linker_ctf_t *lc);
-
 static int link_elf_symbol_values(linker_file_t, c_linker_sym_t,
linker_symval_t *);
 static int link_elf_debug_symbol_values(linker_file_t, c_linker_sym_t,



git: bdc903460be4 - main - kern_ctf.c: Don't print out warning messages unconditionally

2024-03-29 Thread Bojan Novković
The branch main has been updated by bnovkov:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=bdc903460be4b6a729c1b7cde55963730c68cec4

commit bdc903460be4b6a729c1b7cde55963730c68cec4
Author: Bojan Novković 
AuthorDate: 2024-03-29 19:17:19 +
Commit: Bojan Novković 
CommitDate: 2024-03-29 19:32:18 +

kern_ctf.c: Don't print out warning messages unconditionally

The kernel CTF loading routines print various warnings when attempting
to load CTF data from an ELF file. After the changes in c21bc6f3c242
those warnings are unnecessarily printed for each kernel module
that was compiled without CTF data.

The kernel linker already uses the bootverbose flag to conditionally
print CTF loading errors. This patch alters kern_ctf.c
routines to do the same.

Reported by:alexan...@leidinger.net
Approved by:markj (mentor)
Fixes: c21bc6f3c242 ("ddb: Add CTF-based pretty printing")
---
 sys/kern/kern_ctf.c | 35 +++
 1 file changed, 23 insertions(+), 12 deletions(-)

diff --git a/sys/kern/kern_ctf.c b/sys/kern/kern_ctf.c
index b525c274f9e0..1087406ff82e 100644
--- a/sys/kern/kern_ctf.c
+++ b/sys/kern/kern_ctf.c
@@ -144,9 +144,12 @@ link_elf_ctf_get(linker_file_t lf, linker_ctf_t *lc)
 * .SUNW_ctf section containing the CTF data.
 */
if (hdr->e_shstrndx == 0 || shdr[hdr->e_shstrndx].sh_type != 
SHT_STRTAB) {
-   printf("%s(%d): module %s e_shstrndx is %d, sh_type is %d\n",
-   __func__, __LINE__, lf->pathname, hdr->e_shstrndx,
-   shdr[hdr->e_shstrndx].sh_type);
+   if (bootverbose) {
+   printf(
+   "%s(%d): module %s e_shstrndx is %d, sh_type is 
%d\n",
+   __func__, __LINE__, lf->pathname, hdr->e_shstrndx,
+   shdr[hdr->e_shstrndx].sh_type);
+   }
error = EFTYPE;
goto out;
}
@@ -167,8 +170,10 @@ link_elf_ctf_get(linker_file_t lf, linker_ctf_t *lc)
 
/* Check if the CTF section wasn't found. */
if (i >= hdr->e_shnum) {
-   printf("%s(%d): module %s has no .SUNW_ctf section\n",
-   __func__, __LINE__, lf->pathname);
+   if (bootverbose) {
+   printf("%s(%d): module %s has no .SUNW_ctf section\n",
+   __func__, __LINE__, lf->pathname);
+   }
error = EFTYPE;
goto out;
}
@@ -181,17 +186,21 @@ link_elf_ctf_get(linker_file_t lf, linker_ctf_t *lc)
 
/* Check the CTF magic number. */
if (cth.cth_magic != CTF_MAGIC) {
-   printf("%s(%d): module %s has invalid format\n",
-   __func__, __LINE__, lf->pathname);
+   if (bootverbose) {
+   printf("%s(%d): module %s has invalid format\n",
+   __func__, __LINE__, lf->pathname);
+   }
error = EFTYPE;
goto out;
}
 
if (cth.cth_version != CTF_VERSION_2 &&
cth.cth_version != CTF_VERSION_3) {
-   printf(
-   "%s(%d): module %s CTF format has unsupported version %d\n",
-   __func__, __LINE__, lf->pathname, cth.cth_version);
+   if (bootverbose) {
+   printf(
+   "%s(%d): module %s CTF format has unsupported 
version %d\n",
+   __func__, __LINE__, lf->pathname, cth.cth_version);
+   }
error = EFTYPE;
goto out;
}
@@ -250,8 +259,10 @@ link_elf_ctf_get(linker_file_t lf, linker_ctf_t *lc)
ret = uncompress(ctftab + sizeof(cth), &destlen,
raw + sizeof(cth), shdr[i].sh_size - sizeof(cth));
if (ret != Z_OK) {
-   printf("%s(%d): zlib uncompress returned %d\n",
-   __func__, __LINE__, ret);
+   if (bootverbose) {
+   printf("%s(%d): zlib uncompress returned %d\n",
+   __func__, __LINE__, ret);
+   }
error = EIO;
goto out;
}



git: aada453dcbaa - main - ddb: Properly pretty-print non-labeled enum values

2024-04-03 Thread Bojan Novković
The branch main has been updated by bnovkov:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=aada453dcbaab1b8f7d50b66add5a38eb9e06cc3

commit aada453dcbaab1b8f7d50b66add5a38eb9e06cc3
Author: Bojan Novković 
AuthorDate: 2024-04-03 15:47:00 +
Commit: Bojan Novković 
CommitDate: 2024-04-03 16:17:11 +

ddb: Properly pretty-print non-labeled enum values

The ddb pretty-printer currently does not print out enum values that
are not labeled (e.g. X | Y).
The enum printer was reworked to print non-labeled values.

Reported by:jrtc27
Fixes:  c21bc6f ("ddb: Add CTF-based pretty printing")
Approved by:markj (mentor)
---
 sys/ddb/db_pprint.c | 11 ++-
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/sys/ddb/db_pprint.c b/sys/ddb/db_pprint.c
index 8aa14550f068..b4116372cf65 100644
--- a/sys/ddb/db_pprint.c
+++ b/sys/ddb/db_pprint.c
@@ -225,13 +225,14 @@ db_pprint_enum(db_addr_t addr, struct ctf_type_v3 *type, 
u_int depth)
for (; ep < endp; ep++) {
if (val == ep->cte_value) {
valname = db_ctf_stroff_to_str(&sym_data, ep->cte_name);
-   if (valname != NULL)
-   db_printf("%s (0x%lx)", valname, (long)val);
-   else
-   db_printf("(0x%lx)", (long)val);
-   break;
+   if (valname != NULL) {
+   db_printf("%s (0x%lx)", valname, val);
+   break;
+   }
}
}
+   if (ep == endp)
+   db_printf("0x%lx", val);
 }
 
 /*



git: 872c4402af13 - main - ddb: Don't throw away qualifier when pretty-printing unnamed pointers

2024-04-03 Thread Bojan Novković
The branch main has been updated by bnovkov:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=872c4402af1325ed156d7f5ee2252dd36b016b5c

commit 872c4402af1325ed156d7f5ee2252dd36b016b5c
Author: Bojan Novković 
AuthorDate: 2024-04-03 15:55:13 +
Commit: Bojan Novković 
CommitDate: 2024-04-03 16:17:54 +

ddb: Don't throw away qualifier when pretty-printing unnamed pointers

Reported by:jrtc27
Fixes:  c21bc6f ("ddb: Add CTF-based pretty printing")
Approved by:markj (mentor)
---
 sys/ddb/db_pprint.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sys/ddb/db_pprint.c b/sys/ddb/db_pprint.c
index b4116372cf65..8e6759a03220 100644
--- a/sys/ddb/db_pprint.c
+++ b/sys/ddb/db_pprint.c
@@ -276,7 +276,7 @@ db_pprint_ptr(db_addr_t addr, struct ctf_type_v3 *type, 
u_int depth)
if (name != NULL)
db_printf("(%s%s *) 0x%lx", qual, name, (long)val);
else
-   db_printf("0x%lx", (long)val);
+   db_printf("(%s *) 0x%lx", qual, (long)val);
}
 }
 



git: a02f9685edd1 - main - vm_meter: Add counter for NOFREE pages

2024-10-07 Thread Bojan Novković
The branch main has been updated by bnovkov:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=a02f9685edd168ef51e2e6fd98f09c9b866fa9a9

commit a02f9685edd168ef51e2e6fd98f09c9b866fa9a9
Author: Bojan Novković 
AuthorDate: 2024-10-07 14:56:08 +
Commit: Bojan Novković 
CommitDate: 2024-10-07 16:46:32 +

vm_meter: Add counter for NOFREE pages

This change adds a new counter that tracks the total number
of permanently allocated pages.

Differential Revision:  https://reviews.freebsd.org/D46978
Reviewed by:alc, markj
---
 sys/sys/vmmeter.h | 8 
 sys/vm/vm_meter.c | 2 ++
 sys/vm/vm_page.c  | 1 +
 3 files changed, 11 insertions(+)

diff --git a/sys/sys/vmmeter.h b/sys/sys/vmmeter.h
index 36321be22205..ac4d2f7e7c90 100644
--- a/sys/sys/vmmeter.h
+++ b/sys/sys/vmmeter.h
@@ -120,6 +120,7 @@ struct vmmeter {
counter_u64_t v_rforkpages; /* (p) pages affected by rfork() */
counter_u64_t v_kthreadpages;   /* (p) ... and by kernel fork() */
counter_u64_t v_wire_count; /* (p) pages wired down */
+   counter_u64_t v_nofree_count;   /* (p) permanently allocated pages */
 #defineVM_METER_NCOUNTERS  \
(offsetof(struct vmmeter, v_page_size) / sizeof(counter_u64_t))
/*
@@ -174,6 +175,13 @@ vm_wire_count(void)
return (VM_CNT_FETCH(v_wire_count));
 }
 
+static inline u_int
+vm_nofree_count(void)
+{
+
+   return (VM_CNT_FETCH(v_nofree_count));
+}
+
 /*
  * Return TRUE if we are under our severe low-free-pages threshold
  *
diff --git a/sys/vm/vm_meter.c b/sys/vm/vm_meter.c
index 7348577fc3cb..faf4074ef0c6 100644
--- a/sys/vm/vm_meter.c
+++ b/sys/vm/vm_meter.c
@@ -90,6 +90,7 @@ struct vmmeter __read_mostly vm_cnt = {
.v_rforkpages = EARLY_COUNTER,
.v_kthreadpages = EARLY_COUNTER,
.v_wire_count = EARLY_COUNTER,
+   .v_nofree_count = EARLY_COUNTER,
 };
 
 u_long __exclusive_cache_line vm_user_wire_count;
@@ -386,6 +387,7 @@ VM_STATS_UINT(v_free_target, "Pages desired free");
 VM_STATS_UINT(v_free_min, "Minimum low-free-pages threshold");
 VM_STATS_PROC(v_free_count, "Free pages", vm_free_count);
 VM_STATS_PROC(v_wire_count, "Wired pages", vm_wire_count);
+VM_STATS_PROC(v_nofree_count, "Permanently allocated pages", vm_nofree_count);
 VM_STATS_PROC(v_active_count, "Active pages", vm_active_count);
 VM_STATS_UINT(v_inactive_target, "Desired inactive pages");
 VM_STATS_PROC(v_inactive_count, "Inactive pages", vm_inactive_count);
diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c
index 6256472e0336..67a9c2119ab8 100644
--- a/sys/vm/vm_page.c
+++ b/sys/vm/vm_page.c
@@ -2594,6 +2594,7 @@ vm_page_alloc_nofree_domain(int domain, int req)
}
m = &nqp->ma[nqp->offs++];
vm_domain_free_unlock(vmd);
+   VM_CNT_ADD(v_nofree_count, 1);
 
return (m);
 }



git: 596a36ddc478 - main - vmstat: Add NOFREE page count to -s report

2024-10-07 Thread Bojan Novković
The branch main has been updated by bnovkov:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=596a36ddc47889e1b3bde00566cef2d8695c2847

commit 596a36ddc47889e1b3bde00566cef2d8695c2847
Author: Bojan Novković 
AuthorDate: 2024-10-07 15:02:42 +
Commit: Bojan Novković 
CommitDate: 2024-10-07 16:46:32 +

vmstat: Add NOFREE page count to -s report

This change adds the number of permanently allocated pages
to the 'sum' structure report.

Differential Revision:  https://reviews.freebsd.org/D46980
Reviewed by:markj
---
 usr.bin/vmstat/vmstat.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/usr.bin/vmstat/vmstat.c b/usr.bin/vmstat/vmstat.c
index 8c7790403786..8d6c40a4d2fd 100644
--- a/usr.bin/vmstat/vmstat.c
+++ b/usr.bin/vmstat/vmstat.c
@@ -142,6 +142,7 @@ static struct __vmmeter {
u_int v_free_count;
u_int v_wire_count;
u_long v_user_wire_count;
+   u_int v_nofree_count;
u_int v_active_count;
u_int v_inactive_target;
u_int v_inactive_count;
@@ -558,6 +559,7 @@ fill_vmmeter(struct __vmmeter *vmmp)
GET_VM_STATS(vm, v_free_count);
GET_VM_STATS(vm, v_wire_count);
GET_VM_STATS(vm, v_user_wire_count);
+   GET_VM_STATS(vm, v_nofree_count);
GET_VM_STATS(vm, v_active_count);
GET_VM_STATS(vm, v_inactive_target);
GET_VM_STATS(vm, v_inactive_count);
@@ -1004,6 +1006,8 @@ dosum(void)
sum.v_wire_count);
xo_emit("{:virtual-user-wired-pages/%9lu} {N:virtual user pages wired "
"down}\n", sum.v_user_wire_count);
+   xo_emit("{:nofree-pages/%9u} {N:permanently allocated pages}\n",
+   sum.v_nofree_count);
xo_emit("{:free-pages/%9u} {N:pages free}\n",
sum.v_free_count);
xo_emit("{:bytes-per-page/%9u} {N:bytes per page}\n", sum.v_page_size);



git: 149e1af6ae49 - main - vm_kern: Use VM_ALLOC_NOFREE when allocating 'zero_region' page

2024-10-05 Thread Bojan Novković
The branch main has been updated by bnovkov:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=149e1af6ae4936fac0a907d4c62d745c179b4dc5

commit 149e1af6ae4936fac0a907d4c62d745c179b4dc5
Author: Bojan Novković 
AuthorDate: 2024-10-05 15:05:40 +
Commit: Bojan Novković 
CommitDate: 2024-10-05 15:05:40 +

 vm_kern: Use VM_ALLOC_NOFREE when allocating 'zero_region' page

 Allocate the 'zero_region' page using VM_ALLOC_NOFREE since
 it never gets released.

 Differential Revision:  https://reviews.freebsd.org/D46885
 Reviewed by:alc, markj, kib
---
 sys/vm/vm_kern.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sys/vm/vm_kern.c b/sys/vm/vm_kern.c
index fb7c80b767ed..22776e2196b0 100644
--- a/sys/vm/vm_kern.c
+++ b/sys/vm/vm_kern.c
@@ -762,7 +762,8 @@ kmem_init_zero_region(void)
 * zeros, while not using much more physical resources.
 */
addr = kva_alloc(ZERO_REGION_SIZE);
-   m = vm_page_alloc_noobj(VM_ALLOC_WIRED | VM_ALLOC_ZERO);
+   m = vm_page_alloc_noobj(VM_ALLOC_WIRED | VM_ALLOC_ZERO |
+   VM_ALLOC_NOFREE);
for (i = 0; i < ZERO_REGION_SIZE; i += PAGE_SIZE)
pmap_qenter(addr + i, &m, 1);
pmap_protect(kernel_pmap, addr, addr + ZERO_REGION_SIZE, VM_PROT_READ);



git: 29a6f8fd93c2 - main - vm: Use VM_ALLOC_NOFREE when allocating bogus_page

2024-09-22 Thread Bojan Novković
The branch main has been updated by bnovkov:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=29a6f8fd93c278f0c7d7b2248068bb0353027e4a

commit 29a6f8fd93c278f0c7d7b2248068bb0353027e4a
Author: Bojan Novković 
AuthorDate: 2024-09-22 13:31:45 +
Commit: Bojan Novković 
CommitDate: 2024-09-22 14:02:53 +

vm: Use VM_ALLOC_NOFREE when allocating bogus_page

Allocate the 'bogus_page' page using VM_ALLOC_NOFREE since
it never gets released.

Differential Revision:  https://reviews.freebsd.org/D46699
Reviewed by:alc, markj, kib
---
 sys/vm/vm_page.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c
index ba32a9eb9e63..40108e8a9b0a 100644
--- a/sys/vm/vm_page.c
+++ b/sys/vm/vm_page.c
@@ -194,7 +194,7 @@ vm_page_init(void *dummy)
 
fakepg_zone = uma_zcreate("fakepg", sizeof(struct vm_page), NULL, NULL,
NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
-   bogus_page = vm_page_alloc_noobj(VM_ALLOC_WIRED);
+   bogus_page = vm_page_alloc_noobj(VM_ALLOC_WIRED | VM_ALLOC_NOFREE);
 }
 
 static int pgcache_zone_max_pcpu;



git: 51fda658baa3 - main - vmm: Properly handle writes spanning across two pages in vm_handle_db

2024-10-02 Thread Bojan Novković
The branch main has been updated by bnovkov:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=51fda658baa3f80c9778f3a9873fbf67df87119b

commit 51fda658baa3f80c9778f3a9873fbf67df87119b
Author: Bojan Novković 
AuthorDate: 2024-09-29 11:10:10 +
Commit: Bojan Novković 
CommitDate: 2024-10-02 16:43:36 +

vmm: Properly handle writes spanning across two pages in vm_handle_db

The vm_handle_db function is responsible for writing correct status
register values into memory when a guest VM is being single-stepped
using the RFLAGS.TF mechanism. However, it currently does not properly
handle an edge case where the resulting write spans across two pages.
This commit fixes this by making vm_handle_db use two vm_copy_info
structs.

Security:   HYP-09
Reviewed by:markj
---
 sys/amd64/vmm/vmm.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c
index a2c2b342bee4..5484d71cefd2 100644
--- a/sys/amd64/vmm/vmm.c
+++ b/sys/amd64/vmm/vmm.c
@@ -1795,7 +1795,7 @@ vm_handle_db(struct vcpu *vcpu, struct vm_exit *vme, bool 
*retu)
int error, fault;
uint64_t rsp;
uint64_t rflags;
-   struct vm_copyinfo copyinfo;
+   struct vm_copyinfo copyinfo[2];
 
*retu = true;
if (!vme->u.dbg.pushf_intercept || vme->u.dbg.tf_shadow_val != 0) {
@@ -1804,21 +1804,21 @@ vm_handle_db(struct vcpu *vcpu, struct vm_exit *vme, 
bool *retu)
 
vm_get_register(vcpu, VM_REG_GUEST_RSP, &rsp);
error = vm_copy_setup(vcpu, &vme->u.dbg.paging, rsp, sizeof(uint64_t),
-   VM_PROT_RW, ©info, 1, &fault);
+   VM_PROT_RW, copyinfo, nitems(copyinfo), &fault);
if (error != 0 || fault != 0) {
*retu = false;
return (EINVAL);
}
 
/* Read pushed rflags value from top of stack. */
-   vm_copyin(©info, &rflags, sizeof(uint64_t));
+   vm_copyin(copyinfo, &rflags, sizeof(uint64_t));
 
/* Clear TF bit. */
rflags &= ~(PSL_T);
 
/* Write updated value back to memory. */
-   vm_copyout(&rflags, ©info, sizeof(uint64_t));
-   vm_copy_teardown(©info, 1);
+   vm_copyout(&rflags, copyinfo, sizeof(uint64_t));
+   vm_copy_teardown(copyinfo, nitems(copyinfo));
 
return (0);
 }



git: b42b18fb24f5 - main - x86: Add definitions for XSAVE state component information

2024-11-26 Thread Bojan Novković
The branch main has been updated by bnovkov:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=b42b18fb24f58d9b3d8b60d6901e582d407521d6

commit b42b18fb24f58d9b3d8b60d6901e582d407521d6
Author: Bojan Novković 
AuthorDate: 2024-11-26 17:48:46 +
Commit: Bojan Novković 
CommitDate: 2024-11-26 18:16:05 +

x86: Add definitions for XSAVE state component information

Reviewed by: kib
---
 sys/x86/include/specialreg.h | 8 
 1 file changed, 8 insertions(+)

diff --git a/sys/x86/include/specialreg.h b/sys/x86/include/specialreg.h
index 51c513b6a5ab..9dc30e31e540 100644
--- a/sys/x86/include/specialreg.h
+++ b/sys/x86/include/specialreg.h
@@ -386,6 +386,14 @@
 #defineCPUID_EXTSTATE_XINUSE   0x0004
 #defineCPUID_EXTSTATE_XSAVES   0x0008
 
+/*
+ * CPUID instruction 0xd Processor Extended State Enumeration
+ * Sub-leaf > 1 ecx info
+ */
+#defineCPUID_EXTSTATE_SUPERVISOR   0x0001
+#defineCPUID_EXTSTATE_ALIGNED  0x0002
+#defineCPUID_EXTSTATE_XFD_SUPPORTED0x0004
+
 /*
  * AMD extended function 8000_0007h ebx info
  */



git: e17e33f997d6 - main - sdhci: Refactor the generic FDT driver

2025-02-07 Thread Bojan Novković
The branch main has been updated by bnovkov:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=e17e33f997d63107e3a6859cfe3c19eba041b424

commit e17e33f997d63107e3a6859cfe3c19eba041b424
Author: Bojan Novković 
AuthorDate: 2025-01-18 19:02:51 +
Commit: Bojan Novković 
CommitDate: 2025-02-07 09:06:07 +

sdhci: Refactor the generic FDT driver

This patch refactors the 'sdhci_fdt.c' driver by moving all vendor
specific routines into separate files and making the base 'sdhci_fdt'
driver subclassable. The goal is to make adding new FDT-based drivers
easier and more maintainable. No functional change intended.

Reviewed by:manu, imp
Differential Revision:  https://reviews.freebsd.org/D48527
---
 sys/arm/rockchip/files.rk32xx  |   1 +
 sys/arm64/conf/std.xilinx  |   1 +
 sys/conf/files.arm64   |   3 +
 sys/dev/sdhci/sdhci_fdt.c  | 273 +--
 sys/dev/sdhci/sdhci_fdt.h  |  66 +
 sys/dev/sdhci/sdhci_fdt_rockchip.c | 283 +
 sys/dev/sdhci/sdhci_fdt_xilinx.c   | 115 +++
 7 files changed, 507 insertions(+), 235 deletions(-)

diff --git a/sys/arm/rockchip/files.rk32xx b/sys/arm/rockchip/files.rk32xx
index a9ca6cb1b5ae..7331b12a06ed 100644
--- a/sys/arm/rockchip/files.rk32xx
+++ b/sys/arm/rockchip/files.rk32xx
@@ -27,3 +27,4 @@ dev/iicbus/pmic/act8846_regulator.c   standard
 dev/iicbus/pmic/fan53555.c standard
 dev/iicbus/rtc/hym8563.c   standard
 dev/mmc/host/dwmmc_rockchip.c  optionaldwmmc
+dev/sdhci/sdhci_fdt_rockchip.c optionalsdhci
diff --git a/sys/arm64/conf/std.xilinx b/sys/arm64/conf/std.xilinx
index 50ebf5ade53b..2283616e8cdf 100644
--- a/sys/arm64/conf/std.xilinx
+++ b/sys/arm64/conf/std.xilinx
@@ -15,6 +15,7 @@ devicecgem# Cadence GEM 
Gigabit Ethernet device
 
 # MMC/SD/SDIO Card slot support
 device sdhci
+device sdhci_xilinx
 
 # IICBUS
 device cdnc_i2c
diff --git a/sys/conf/files.arm64 b/sys/conf/files.arm64
index 4b73ebd1e6db..43da6e757b1c 100644
--- a/sys/conf/files.arm64
+++ b/sys/conf/files.arm64
@@ -436,6 +436,9 @@ dev/sdhci/sdhci_xenon.c 
optional sdhci_xenon sdhci
 dev/sdhci/sdhci_xenon_acpi.c   optional sdhci_xenon sdhci acpi
 dev/sdhci/sdhci_xenon_fdt.coptional sdhci_xenon sdhci fdt
 
+dev/sdhci/sdhci_fdt_xilinx.c   optional sdhci_xilinx sdhci fdt
+dev/sdhci/sdhci_fdt_rockchip.c optional sdhci fdt soc_rockchip
+
 dev/sram/mmio_sram.c   optional fdt mmio_sram
 dev/sram/mmio_sram_if.moptional fdt mmio_sram
 
diff --git a/sys/dev/sdhci/sdhci_fdt.c b/sys/dev/sdhci/sdhci_fdt.c
index 1d8013ee7088..efc12b54e10f 100644
--- a/sys/dev/sdhci/sdhci_fdt.c
+++ b/sys/dev/sdhci/sdhci_fdt.c
@@ -47,106 +47,37 @@
 
 #include 
 #include 
-#include 
 
-#include 
 #include 
 #include 
+#include 
+
+#include 
+#include 
 #include 
 #include 
 
 #include 
 
 #include 
+#include 
 
 #include "mmcbr_if.h"
 #include "sdhci_if.h"
 
 #include "opt_mmccam.h"
 
-#include "clkdev_if.h"
-#include "syscon_if.h"
-
-#defineMAX_SLOTS   6
 #defineSDHCI_FDT_ARMADA38X 1
 #defineSDHCI_FDT_XLNX_ZY7  2
 #defineSDHCI_FDT_QUALCOMM  3
-#defineSDHCI_FDT_RK33994
-#defineSDHCI_FDT_RK35685
-#defineSDHCI_FDT_XLNX_ZMP  6
-
-#defineRK3399_GRF_EMMCCORE_CON00xf000
-#define RK3399_CORECFG_BASECLKFREQ 0xff00
-#define RK3399_CORECFG_TIMEOUTCLKUNIT  (1 << 7)
-#define RK3399_CORECFG_TUNINGCOUNT 0x3f
-#defineRK3399_GRF_EMMCCORE_CON11   0xf02c
-#define RK3399_CORECFG_CLOCKMULTIPLIER 0xff
-
-#defineRK3568_EMMC_HOST_CTRL   0x0508
-#defineRK3568_EMMC_EMMC_CTRL   0x052c
-#defineRK3568_EMMC_ATCTRL  0x0540
-#defineRK3568_EMMC_DLL_CTRL0x0800
-#define DLL_CTRL_SRST  0x0001
-#define DLL_CTRL_START 0x0002
-#define DLL_CTRL_START_POINT_DEFAULT   0x0005
-#define DLL_CTRL_INCREMENT_DEFAULT 0x0200
-
-#defineRK3568_EMMC_DLL_RXCLK   0x0804
-#define DLL_RXCLK_DELAY_ENABLE 0x0800
-#define DLL_RXCLK_NO_INV   0x2000
-
-#defineRK3568_EMMC_DLL_TXCLK   0x0808
-#define DLL_TXCLK_DELAY_ENABLE 0x0800
-#define DLL_TXCLK_TAPNUM_DEFAULT   0x0008
-#define DLL_TXCLK_TAPN

git: 0c4fa0bdcf87 - main - x86: Add definitions for some Intel Processor Trace bits

2024-12-15 Thread Bojan Novković
The branch main has been updated by bnovkov:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=0c4fa0bdcf87bee66d749c7550da852717522bdf

commit 0c4fa0bdcf87bee66d749c7550da852717522bdf
Author: Bojan Novković 
AuthorDate: 2024-12-15 14:03:34 +
Commit: Bojan Novković 
CommitDate: 2024-12-15 15:39:36 +

x86: Add definitions for some Intel Processor Trace bits

This patch adds definitions for Intel PT-related MSRs and
several PT feature bits.

Reviewed by:kib, markj
Differential Revision:  https://reviews.freebsd.org/D46419
---
 sys/x86/include/specialreg.h | 9 +
 1 file changed, 9 insertions(+)

diff --git a/sys/x86/include/specialreg.h b/sys/x86/include/specialreg.h
index 9dc30e31e540..e9dde5c3b46a 100644
--- a/sys/x86/include/specialreg.h
+++ b/sys/x86/include/specialreg.h
@@ -123,6 +123,7 @@
 #defineXFEATURE_ENABLED_OPMASK 0x0020
 #defineXFEATURE_ENABLED_ZMM_HI256  0x0040
 #defineXFEATURE_ENABLED_HI16_ZMM   0x0080
+#defineXFEATURE_ENABLED_PT 0x0100
 #defineXFEATURE_ENABLED_PKRU   0x0200
 #defineXFEATURE_ENABLED_TILECONFIG 0x0002
 #defineXFEATURE_ENABLED_TILEDATA   0x0004
@@ -213,6 +214,7 @@
 #defineCPUPT_MTC   (1 << 3)/* MTC Supported */
 #defineCPUPT_PRW   (1 << 4)/* PTWRITE Supported */
 #defineCPUPT_PWR   (1 << 5)/* Power Event Trace 
Supported */
+#defineCPUPT_DIS_TNT   (1 << 8)/* TNT disable 
supported */
 
 /* Leaf 0 ecx. */
 #defineCPUPT_TOPA  (1 << 0)/* ToPA Output 
Supported */
@@ -654,6 +656,12 @@
 #defineMSR_PAT 0x277
 #defineMSR_MC0_CTL20x280
 #defineMSR_MTRRdefType 0x2ff
+#defineMSR_IA_GLOBAL_STATUS0x38E
+#defineMSR_IA_GLOBAL_CTRL  0x38F
+#defineMSR_IA_GLOBAL_OVF_CTRL  0x390
+#defineMSR_IA_GLOBAL_STATUS_RESET  0x390
+#defineMSR_IA_GLOBAL_STATUS_SET0x391
+#define GLOBAL_STATUS_FLAG_TRACETOPAPMI(1ULL << 55)
 #defineMSR_MC0_CTL 0x400
 #defineMSR_MC0_STATUS  0x401
 #defineMSR_MC0_ADDR0x402
@@ -781,6 +789,7 @@
 #define RTIT_CTL_ADDR2_CFG_M   (0xfULL << RTIT_CTL_ADDR2_CFG_S)
 #define RTIT_CTL_ADDR3_CFG_S   44
 #define RTIT_CTL_ADDR3_CFG_M   (0xfULL << RTIT_CTL_ADDR3_CFG_S)
+#defineRTIT_CTL_DIS_TNT(1ULL << 55)
 #defineMSR_IA32_RTIT_STATUS0x571   /* Tracing Status 
Register (R/W) */
 #define RTIT_STATUS_FILTEREN   (1 << 0)
 #define RTIT_STATUS_CONTEXTEN  (1 << 1)



git: 04e832672159 - main - x86: Allow sharing of perfomance counter interrupts

2024-12-15 Thread Bojan Novković
The branch main has been updated by bnovkov:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=04e832672159cae412e8984e0b0cabfa6e7428b7

commit 04e832672159cae412e8984e0b0cabfa6e7428b7
Author: Bojan Novković 
AuthorDate: 2024-12-15 14:00:19 +
Commit: Bojan Novković 
CommitDate: 2024-12-15 15:39:36 +

x86: Allow sharing of perfomance counter interrupts

This patch refactors the Performance Counter interrupt setup code to
allow sharing the interrupt line between multiple drivers.
More specifically, Performance Counter interrupts are used by both
hwpmc(4) and hwt(4)'s upcoming Intel Processor Trace backend.

Reviewed by:kib
Differential Revision:  https://reviews.freebsd.org/D46420
---
 sys/dev/hwpmc/hwpmc_core.c |  4 ++--
 sys/dev/hwpmc/hwpmc_x86.c  |  4 ++--
 sys/x86/include/apicvar.h  |  6 +++---
 sys/x86/x86/local_apic.c   | 33 ++---
 4 files changed, 21 insertions(+), 26 deletions(-)

diff --git a/sys/dev/hwpmc/hwpmc_core.c b/sys/dev/hwpmc/hwpmc_core.c
index bf224ded126f..83784b93718e 100644
--- a/sys/dev/hwpmc/hwpmc_core.c
+++ b/sys/dev/hwpmc/hwpmc_core.c
@@ -1051,7 +1051,7 @@ core_intr(struct trapframe *tf)
counter_u64_add(pmc_stats.pm_intr_ignored, 1);
 
if (found_interrupt)
-   lapic_reenable_pmc();
+   lapic_reenable_pcint();
 
return (found_interrupt);
 }
@@ -1150,7 +1150,7 @@ core2_intr(struct trapframe *tf)
counter_u64_add(pmc_stats.pm_intr_ignored, 1);
 
if (found_interrupt)
-   lapic_reenable_pmc();
+   lapic_reenable_pcint();
 
/*
 * Reenable all non-stalled PMCs.
diff --git a/sys/dev/hwpmc/hwpmc_x86.c b/sys/dev/hwpmc/hwpmc_x86.c
index 1d04a6610674..54cc919eec30 100644
--- a/sys/dev/hwpmc/hwpmc_x86.c
+++ b/sys/dev/hwpmc/hwpmc_x86.c
@@ -242,7 +242,7 @@ pmc_md_initialize(void)
return (NULL);
 
/* disallow sampling if we do not have an LAPIC */
-   if (md != NULL && !lapic_enable_pmc())
+   if (md != NULL && !lapic_enable_pcint())
for (i = 0; i < md->pmd_nclass; i++) {
if (i == PMC_CLASS_INDEX_SOFT)
continue;
@@ -256,7 +256,7 @@ void
 pmc_md_finalize(struct pmc_mdep *md)
 {
 
-   lapic_disable_pmc();
+   lapic_disable_pcint();
if (cpu_vendor_id == CPU_VENDOR_AMD ||
cpu_vendor_id == CPU_VENDOR_HYGON)
pmc_amd_finalize(md);
diff --git a/sys/x86/include/apicvar.h b/sys/x86/include/apicvar.h
index fc9bb0123539..c537d0ee0cdd 100644
--- a/sys/x86/include/apicvar.h
+++ b/sys/x86/include/apicvar.h
@@ -231,9 +231,9 @@ voidapic_enable_vector(u_int apic_id, u_int vector);
 void   apic_disable_vector(u_int apic_id, u_int vector);
 void   apic_free_vector(u_int apic_id, u_int vector, u_int irq);
 void   lapic_calibrate_timer(void);
-intlapic_enable_pmc(void);
-void   lapic_disable_pmc(void);
-void   lapic_reenable_pmc(void);
+intlapic_enable_pcint(void);
+void   lapic_disable_pcint(void);
+void   lapic_reenable_pcint(void);
 void   lapic_enable_cmc(void);
 intlapic_enable_mca_elvt(void);
 void   lapic_ipi_raw(register_t icrlo, u_int dest);
diff --git a/sys/x86/x86/local_apic.c b/sys/x86/x86/local_apic.c
index 6a913883cc5c..86cbe9a050dc 100644
--- a/sys/x86/x86/local_apic.c
+++ b/sys/x86/x86/local_apic.c
@@ -35,7 +35,6 @@
 
 #include 
 #include "opt_atpic.h"
-#include "opt_hwpmc_hooks.h"
 
 #include "opt_ddb.h"
 
@@ -50,6 +49,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -206,6 +206,7 @@ static uint64_t lapic_ipi_wait_mult;
 static int __read_mostly lapic_ds_idle_timeout = 100;
 #endif
 unsigned int max_apic_id;
+static int pcint_refcnt = 0;
 
 SYSCTL_NODE(_hw, OID_AUTO, apic, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
 "APIC options");
@@ -809,20 +810,19 @@ lapic_intrcnt(void *dummy __unused)
 SYSINIT(lapic_intrcnt, SI_SUB_INTR, SI_ORDER_MIDDLE, lapic_intrcnt, NULL);
 
 void
-lapic_reenable_pmc(void)
+lapic_reenable_pcint(void)
 {
-#ifdef HWPMC_HOOKS
uint32_t value;
 
+   if (refcount_load(&pcint_refcnt) == 0)
+   return;
value = lapic_read32(LAPIC_LVT_PCINT);
value &= ~APIC_LVT_M;
lapic_write32(LAPIC_LVT_PCINT, value);
-#endif
 }
 
-#ifdef HWPMC_HOOKS
 static void
-lapic_update_pmc(void *dummy)
+lapic_update_pcint(void *dummy)
 {
struct lapic *la;
 
@@ -830,7 +830,6 @@ lapic_update_pmc(void *dummy)
lapic_write32(LAPIC_LVT_PCINT, lvt_mode(la, APIC_LVT_PMC,
lapic_read32(LAPIC_LVT_PCINT)));
 }
-#endif
 
 void
 lapic_calibrate_timer(void)
@@ -858,9 +857,8 @@ lapic_calibrate_timer(void)
 }
 
 int
-lapic_enable_pmc(void)
+lapic_enable_pcint(void)
 {
-#ifdef HWPMC_HOOKS
u_int32_t maxlvt;
 
 #ifdef DEV_ATPIC
@@ -873,21 +871,18 @@ lapic_enable_pmc(void)

git: d5ce54dddf49 - main - hwpmc_x86: Register interrupt handler using the dynamic NMI registration interface

2024-12-15 Thread Bojan Novković
The branch main has been updated by bnovkov:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=d5ce54dddf4927a2edd3e57ee67722dfccb567a8

commit d5ce54dddf4927a2edd3e57ee67722dfccb567a8
Author: Bojan Novković 
AuthorDate: 2024-12-15 14:06:58 +
Commit: Bojan Novković 
CommitDate: 2024-12-15 15:39:36 +

hwpmc_x86: Register interrupt handler using the dynamic NMI registration 
interface

Register the PCINT handler using the nmi_{register, remove}_handler
interfaces (introduced in D46421) in preparation for hwt(4)'s
Intel Processor Trace backend. No functional change intended.

Reviewed by:kib
Differential Revision:  https://reviews.freebsd.org/D47989
---
 sys/dev/hwpmc/hwpmc_x86.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sys/dev/hwpmc/hwpmc_x86.c b/sys/dev/hwpmc/hwpmc_x86.c
index 54cc919eec30..2c6c4cd148bf 100644
--- a/sys/dev/hwpmc/hwpmc_x86.c
+++ b/sys/dev/hwpmc/hwpmc_x86.c
@@ -248,6 +248,7 @@ pmc_md_initialize(void)
continue;
md->pmd_classdep[i].pcd_caps &= ~PMC_CAP_INTERRUPT;
}
+   nmi_register_handler(md->pmd_intr);
 
return (md);
 }
@@ -257,6 +258,7 @@ pmc_md_finalize(struct pmc_mdep *md)
 {
 
lapic_disable_pcint();
+   nmi_remove_handler(md->pmd_intr);
if (cpu_vendor_id == CPU_VENDOR_AMD ||
cpu_vendor_id == CPU_VENDOR_HYGON)
pmc_amd_finalize(md);



git: 593e874e6124 - main - amd64: Add wrappers for XRSTORS and XSAVES

2024-12-15 Thread Bojan Novković
The branch main has been updated by bnovkov:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=593e874e61249c845ca83e9284e4d9061643e8fb

commit 593e874e61249c845ca83e9284e4d9061643e8fb
Author: Bojan Novković 
AuthorDate: 2024-12-15 14:02:13 +
Commit: Bojan Novković 
CommitDate: 2024-12-15 15:39:36 +

amd64: Add wrappers for XRSTORS and XSAVES

Reviewed by:kib
Differential Revision:  https://reviews.freebsd.org/D46984
---
 sys/amd64/include/cpufunc.h | 23 +++
 1 file changed, 23 insertions(+)

diff --git a/sys/amd64/include/cpufunc.h b/sys/amd64/include/cpufunc.h
index ca53d73b0186..d180f5c76afb 100644
--- a/sys/amd64/include/cpufunc.h
+++ b/sys/amd64/include/cpufunc.h
@@ -942,6 +942,29 @@ sgx_eremove(void *epc)
return (sgx_encls(SGX_EREMOVE, 0, (uint64_t)epc, 0));
 }
 
+static __inline void
+xrstors(uint8_t *save_area, uint64_t state_bitmap)
+{
+   uint32_t low, hi;
+
+   low = state_bitmap;
+   hi = state_bitmap >> 32;
+   __asm __volatile("xrstors %0" : : "m"(*save_area), "a"(low),
+   "d"(hi));
+}
+
+static __inline void
+xsaves(uint8_t *save_area, uint64_t state_bitmap)
+{
+   uint32_t low, hi;
+
+   low = state_bitmap;
+   hi = state_bitmap >> 32;
+   __asm __volatile("xsaves %0" : "=m"(*save_area) : "a"(low),
+   "d"(hi)
+   : "memory");
+}
+
 void   reset_dbregs(void);
 
 #ifdef _KERNEL



git: 459dc427873c - main - x86: Refactor kernel-mode NMI handling

2024-12-15 Thread Bojan Novković
The branch main has been updated by bnovkov:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=459dc427873c9a294387ec74a96e6f7824de7435

commit 459dc427873c9a294387ec74a96e6f7824de7435
Author: Bojan Novković 
AuthorDate: 2024-12-15 13:56:40 +
Commit: Bojan Novković 
CommitDate: 2024-12-15 15:39:36 +

x86: Refactor kernel-mode NMI handling

This refactor aims to add the ability to share performance counter
interrupts by refactoring the kernel-mode NMI handler. The handler now
allows multiple drivers to service the same interrupt (e.g. hwpmc(4)
and hwt(4)'s Intel Processor Trace backend).

Reviewed by:kib, avg
Differential Revision:  https://reviews.freebsd.org/D46421
---
 sys/amd64/amd64/trap.c| 32 ++--
 sys/i386/i386/trap.c  | 26 +++--
 sys/x86/include/x86_var.h |  4 +-
 sys/x86/x86/cpu_machdep.c | 95 +--
 4 files changed, 104 insertions(+), 53 deletions(-)

diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c
index 6ceeea41ea91..4590be501d64 100644
--- a/sys/amd64/amd64/trap.c
+++ b/sys/amd64/amd64/trap.c
@@ -230,38 +230,22 @@ trap(struct trapframe *frame)
VM_CNT_INC(v_trap);
type = frame->tf_trapno;
 
-#ifdef SMP
-   /* Handler for NMI IPIs used for stopping CPUs. */
-   if (type == T_NMI && ipi_nmi_handler() == 0)
-   return;
-#endif
-
 #ifdef KDB
if (kdb_active) {
kdb_reenter();
return;
}
 #endif
+   if (type == T_NMI) {
+   nmi_handle_intr(frame);
+   return;
+   }
 
if (type == T_RESERVED) {
trap_fatal(frame, 0);
return;
}
 
-   if (type == T_NMI) {
-#ifdef HWPMC_HOOKS
-   /*
-* CPU PMCs interrupt using an NMI.  If the PMC module is
-* active, pass the 'rip' value to the PMC module's interrupt
-* handler.  A non-zero return value from the handler means that
-* the NMI was consumed by it and we can return immediately.
-*/
-   if (pmc_intr != NULL &&
-   (*pmc_intr)(frame) != 0)
-   return;
-#endif
-   }
-
if ((frame->tf_rflags & PSL_I) == 0) {
/*
 * Buggy application or kernel code has disabled
@@ -392,10 +376,6 @@ trap(struct trapframe *frame)
signo = SIGFPE;
break;
 
-   case T_NMI:
-   nmi_handle_intr(type, frame);
-   return;
-
case T_OFLOW:   /* integer overflow fault */
ucode = FPE_INTOVF;
signo = SIGFPE;
@@ -619,10 +599,6 @@ trap(struct trapframe *frame)
return;
 #endif
break;
-
-   case T_NMI:
-   nmi_handle_intr(type, frame);
-   return;
}
 
trap_fatal(frame, 0);
diff --git a/sys/i386/i386/trap.c b/sys/i386/i386/trap.c
index 693e3a2f94b4..9e310c049daa 100644
--- a/sys/i386/i386/trap.c
+++ b/sys/i386/i386/trap.c
@@ -237,12 +237,6 @@ trap(struct trapframe *frame)
KASSERT((read_eflags() & PSL_I) == 0,
("trap: interrupts enabled, type %d frame %p", type, frame));
 
-#ifdef SMP
-   /* Handler for NMI IPIs used for stopping CPUs. */
-   if (type == T_NMI && ipi_nmi_handler() == 0)
-   return;
-#endif /* SMP */
-
 #ifdef KDB
if (kdb_active) {
kdb_reenter();
@@ -251,24 +245,14 @@ trap(struct trapframe *frame)
 #endif
trap_check_kstack();
 
-   if (type == T_RESERVED) {
-   trap_fatal(frame, 0);
+   if (type == T_NMI) {
+   nmi_handle_intr(frame);
return;
}
 
-   if (type == T_NMI) {
-#ifdef HWPMC_HOOKS
-   /*
-* CPU PMCs interrupt using an NMI so we check for that first.
-* If the HWPMC module is active, 'pmc_hook' will point to
-* the function to be called.  A non-zero return value from the
-* hook means that the NMI was consumed by it and that we can
-* return immediately.
-*/
-   if (pmc_intr != NULL &&
-   (*pmc_intr)(frame) != 0)
-   return;
-#endif
+   if (type == T_RESERVED) {
+   trap_fatal(frame, 0);
+   return;
}
 
if (type == T_MCHK) {
diff --git a/sys/x86/include/x86_var.h b/sys/x86/include/x86_var.h
index 6609871bf89e..dbb4e9557ed0 100644
--- a/sys/x86/include/x86_var.h
+++ b/sys/x86/include/x86_var.h
@@ -148,7 +148,9 @@ voidzenbleed_sanitize_enable(void);
 void   zenbleed_check_and_app

git: 7bcaff05223e - main - x86: Add routines for querying XSAVE feature information

2024-12-15 Thread Bojan Novković
The branch main has been updated by bnovkov:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=7bcaff05223eb81611372e341a120391925fa724

commit 7bcaff05223eb81611372e341a120391925fa724
Author: Bojan Novković 
AuthorDate: 2024-12-15 14:04:58 +
Commit: Bojan Novković 
CommitDate: 2024-12-15 15:39:36 +

x86: Add routines for querying XSAVE feature information

This patch adds several routines that track and expose information
about various XSAVE-related features. More specifically, it adds the
ability to check whether a given XFEATURE is supported and which XSAVE
extensions are supported. Furthermore, it adds several routines for
calculating the size and offsets within a save area given a XSAVE
feature bitmap.

Reviewed by:kib
Differential Revision:  https://reviews.freebsd.org/D47394
---
 sys/amd64/amd64/fpu.c | 111 +-
 sys/x86/include/fpu.h |   6 +++
 2 files changed, 116 insertions(+), 1 deletion(-)

diff --git a/sys/amd64/amd64/fpu.c b/sys/amd64/amd64/fpu.c
index 58a135e827a8..591bd196ca7d 100644
--- a/sys/amd64/amd64/fpu.c
+++ b/sys/amd64/amd64/fpu.c
@@ -164,12 +164,14 @@ SYSCTL_INT(_hw, HW_FLOATINGPT, floatingpoint, CTLFLAG_RD,
 
 int use_xsave; /* non-static for cpu_switch.S */
 uint64_t xsave_mask;   /* the same */
+static uint64_t xsave_extensions;
 static uma_zone_t fpu_save_area_zone;
 static struct savefpu *fpu_initialstate;
 
 static struct xsave_area_elm_descr {
u_int   offset;
u_int   size;
+   u_int   flags;
 } *xsave_area_desc;
 
 static void
@@ -452,6 +454,9 @@ fpuinitstate(void *arg __unused)
 * Region of an XSAVE Area" for the source of offsets/sizes.
 */
if (use_xsave) {
+   cpuid_count(0xd, 1, cp);
+   xsave_extensions = cp[0];
+
xstate_bv = (uint64_t *)((char *)(fpu_initialstate + 1) +
offsetof(struct xstate_hdr, xstate_bv));
*xstate_bv = XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE;
@@ -465,8 +470,9 @@ fpuinitstate(void *arg __unused)
 
for (i = 2; i < max_ext_n; i++) {
cpuid_count(0xd, i, cp);
-   xsave_area_desc[i].offset = cp[1];
xsave_area_desc[i].size = cp[0];
+   xsave_area_desc[i].offset = cp[1];
+   xsave_area_desc[i].flags = cp[2];
}
}
 
@@ -1285,3 +1291,106 @@ fpu_save_area_reset(struct savefpu *fsa)
 
bcopy(fpu_initialstate, fsa, cpu_max_ext_state_size);
 }
+
+static __inline void
+xsave_extfeature_check(uint64_t feature)
+{
+
+   KASSERT((feature & (feature - 1)) == 0,
+   ("%s: invalid XFEATURE 0x%lx", __func__, feature));
+   KASSERT(feature < flsl(xsave_mask),
+   ("%s: unsupported XFEATURE 0x%lx", __func__, feature));
+}
+
+static __inline void
+xsave_extstate_bv_check(uint64_t xstate_bv)
+{
+   KASSERT(xstate_bv != 0 && ilog2(xstate_bv) < flsl(xsave_mask),
+   ("%s: invalid XSTATE_BV 0x%lx", __func__, xstate_bv));
+}
+
+/*
+ * Returns whether the XFEATURE 'feature' is supported as a user state
+ * or supervisor state component.
+ */
+bool
+xsave_extfeature_supported(uint64_t feature, bool supervisor)
+{
+   int idx;
+
+   KASSERT(use_xsave, ("%s: XSAVE not supported", __func__));
+   xsave_extfeature_check(feature);
+
+   if ((xsave_mask & feature) == 0)
+   return (false);
+   idx = ilog2(feature);
+   return (((xsave_area_desc[idx].flags & CPUID_EXTSTATE_SUPERVISOR) != 0) 
==
+   supervisor);
+}
+
+/*
+ * Returns whether the given XSAVE extension is supported.
+ */
+bool
+xsave_extension_supported(uint64_t extension)
+{
+   KASSERT(use_xsave, ("%s: XSAVE not supported", __func__));
+
+   return ((xsave_extensions & extension) != 0);
+}
+
+/*
+ * Returns offset for XFEATURE 'feature' given the requested feature bitmap
+ * 'xstate_bv', and extended region format ('compact').
+ */
+size_t
+xsave_area_offset(uint64_t xstate_bv, uint64_t feature,
+bool compact)
+{
+   int i, idx;
+   size_t offs;
+   struct xsave_area_elm_descr *xep;
+
+   KASSERT(use_xsave, ("%s: XSAVE not supported", __func__));
+   xsave_extstate_bv_check(xstate_bv);
+   xsave_extfeature_check(feature);
+
+   idx = ilog2(feature);
+   if (!compact)
+   return (xsave_area_desc[idx].offset);
+   offs = sizeof(struct savefpu) + sizeof(struct xstate_hdr);
+   xstate_bv &= ~(XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE);
+   while ((i = ffs(xstate_bv) - 1) > 0 && i < idx) {
+   xep = &xsave_area_desc[i];
+   if ((xep->flags & CPUID_EXTSTATE_ALIGNED) != 0)
+

git: 3342e5967dc7 - main - i386: Fix incorrect NMI handler invocations

2024-12-15 Thread Bojan Novković
The branch main has been updated by bnovkov:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=3342e5967dc7193d97f99a92b81824db81efe2f1

commit 3342e5967dc7193d97f99a92b81824db81efe2f1
Author: Bojan Novković 
AuthorDate: 2024-12-15 17:44:34 +
Commit: Bojan Novković 
CommitDate: 2024-12-15 17:47:52 +

i386: Fix incorrect NMI handler invocations

Fixes:  459dc42
---
 sys/i386/i386/trap.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sys/i386/i386/trap.c b/sys/i386/i386/trap.c
index 9e310c049daa..a8b7df42a283 100644
--- a/sys/i386/i386/trap.c
+++ b/sys/i386/i386/trap.c
@@ -428,7 +428,7 @@ user_trctrap_out:
}
return;
 #else /* !POWERFAIL_NMI */
-   nmi_handle_intr(type, frame);
+   nmi_handle_intr(frame);
return;
 #endif /* POWERFAIL_NMI */
 
@@ -685,7 +685,7 @@ kernel_trctrap:
}
return;
 #else /* !POWERFAIL_NMI */
-   nmi_handle_intr(type, frame);
+   nmi_handle_intr(frame);
return;
 #endif /* POWERFAIL_NMI */
}



git: b9951017bab3 - main - amd64/fpu: Track supervisor state XSAVE components

2025-01-22 Thread Bojan Novković
The branch main has been updated by bnovkov:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=b9951017bab396e24042e85632e2cc34ee0329ff

commit b9951017bab396e24042e85632e2cc34ee0329ff
Author: Bojan Novković 
AuthorDate: 2025-01-15 16:41:24 +
Commit: Bojan Novković 
CommitDate: 2025-01-22 12:58:34 +

amd64/fpu: Track supervisor state XSAVE components

The amd64/fpu.c xsave_* routines track supported XSAVE components and
features. However, they only track supported user state components, and
there is currently no way for a consumer to check whether the CPU
supports a supervisor state component. Fix this by saving the supported
supervisor state components, enumerated by CPUID function 0DH,
sub-function 1, in a separate mask.

Reviewed by:kib
Differential Revision:  https://reviews.freebsd.org/D48466
---
 sys/amd64/amd64/fpu.c | 38 --
 sys/x86/include/fpu.h |  5 +++--
 2 files changed, 27 insertions(+), 16 deletions(-)

diff --git a/sys/amd64/amd64/fpu.c b/sys/amd64/amd64/fpu.c
index 591bd196ca7d..79d1722268b7 100644
--- a/sys/amd64/amd64/fpu.c
+++ b/sys/amd64/amd64/fpu.c
@@ -164,6 +164,7 @@ SYSCTL_INT(_hw, HW_FLOATINGPT, floatingpoint, CTLFLAG_RD,
 
 int use_xsave; /* non-static for cpu_switch.S */
 uint64_t xsave_mask;   /* the same */
+static uint64_t xsave_mask_supervisor;
 static uint64_t xsave_extensions;
 static uma_zone_t fpu_save_area_zone;
 static struct savefpu *fpu_initialstate;
@@ -324,6 +325,7 @@ fpuinit_bsp1(void)
ctx_switch_xsave[3] |= 0x10;
restore_wp(old_wp);
}
+   xsave_mask_supervisor = ((uint64_t)cp[3] << 32) | cp[2];
 }
 
 /*
@@ -421,7 +423,7 @@ fpuinitstate(void *arg __unused)
XSAVE_AREA_ALIGN - 1, 0);
fpu_initialstate = uma_zalloc(fpu_save_area_zone, M_WAITOK | M_ZERO);
if (use_xsave) {
-   max_ext_n = flsl(xsave_mask);
+   max_ext_n = flsl(xsave_mask | xsave_mask_supervisor);
xsave_area_desc = malloc(max_ext_n * sizeof(struct
xsave_area_elm_descr), M_DEVBUF, M_WAITOK | M_ZERO);
}
@@ -1293,19 +1295,25 @@ fpu_save_area_reset(struct savefpu *fsa)
 }
 
 static __inline void
-xsave_extfeature_check(uint64_t feature)
+xsave_extfeature_check(uint64_t feature, bool supervisor)
 {
+   uint64_t mask;
 
+   mask = supervisor ? xsave_mask_supervisor : xsave_mask;
KASSERT((feature & (feature - 1)) == 0,
("%s: invalid XFEATURE 0x%lx", __func__, feature));
-   KASSERT(feature < flsl(xsave_mask),
-   ("%s: unsupported XFEATURE 0x%lx", __func__, feature));
+   KASSERT(ilog2(feature) <= ilog2(mask),
+   ("%s: unsupported %s XFEATURE 0x%lx", __func__,
+   supervisor ? "supervisor" : "user", feature));
 }
 
 static __inline void
-xsave_extstate_bv_check(uint64_t xstate_bv)
+xsave_extstate_bv_check(uint64_t xstate_bv, bool supervisor)
 {
-   KASSERT(xstate_bv != 0 && ilog2(xstate_bv) < flsl(xsave_mask),
+   uint64_t mask;
+
+   mask = supervisor ? xsave_mask_supervisor : xsave_mask;
+   KASSERT(xstate_bv != 0 && ilog2(xstate_bv) <= ilog2(mask),
("%s: invalid XSTATE_BV 0x%lx", __func__, xstate_bv));
 }
 
@@ -1317,11 +1325,13 @@ bool
 xsave_extfeature_supported(uint64_t feature, bool supervisor)
 {
int idx;
+   uint64_t mask;
 
KASSERT(use_xsave, ("%s: XSAVE not supported", __func__));
-   xsave_extfeature_check(feature);
+   xsave_extfeature_check(feature, supervisor);
 
-   if ((xsave_mask & feature) == 0)
+   mask = supervisor ? xsave_mask_supervisor : xsave_mask;
+   if ((mask & feature) == 0)
return (false);
idx = ilog2(feature);
return (((xsave_area_desc[idx].flags & CPUID_EXTSTATE_SUPERVISOR) != 0) 
==
@@ -1345,15 +1355,15 @@ xsave_extension_supported(uint64_t extension)
  */
 size_t
 xsave_area_offset(uint64_t xstate_bv, uint64_t feature,
-bool compact)
+bool compact, bool supervisor)
 {
int i, idx;
size_t offs;
struct xsave_area_elm_descr *xep;
 
KASSERT(use_xsave, ("%s: XSAVE not supported", __func__));
-   xsave_extstate_bv_check(xstate_bv);
-   xsave_extfeature_check(feature);
+   xsave_extstate_bv_check(xstate_bv, supervisor);
+   xsave_extfeature_check(feature, supervisor);
 
idx = ilog2(feature);
if (!compact)
@@ -1376,16 +1386,16 @@ xsave_area_offset(uint64_t xstate_bv, uint64_t feature,
  * 'xstate_bv' and extended region format ('compact').
  */
 size_t
-xsave_area_size(uint64_t xstate_bv, bool compact)
+xsave_area_size(uint64_t xstate_bv, bool compact, bool supervisor)
 {
int last_idx;
 
KASSERT(use_xsave, ("%s: XSAVE not suppo

git: f0d036cf6655 - main - amd64/fpu: Fix build for NODEBUG kernels

2025-01-22 Thread Bojan Novković
The branch main has been updated by bnovkov:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=f0d036cf665520cead4970b4337d72b077ed5eea

commit f0d036cf665520cead4970b4337d72b077ed5eea
Author: Bojan Novković 
AuthorDate: 2025-01-22 15:02:17 +
Commit: Bojan Novković 
CommitDate: 2025-01-22 15:15:32 +

amd64/fpu: Fix build for NODEBUG kernels

Fixes:  b995101
Reported by:Michael Butler (i...@protected-networks.net)
---
 sys/amd64/amd64/fpu.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/sys/amd64/amd64/fpu.c b/sys/amd64/amd64/fpu.c
index 79d1722268b7..48bfaa53c7b4 100644
--- a/sys/amd64/amd64/fpu.c
+++ b/sys/amd64/amd64/fpu.c
@@ -1297,6 +1297,7 @@ fpu_save_area_reset(struct savefpu *fsa)
 static __inline void
 xsave_extfeature_check(uint64_t feature, bool supervisor)
 {
+#ifdef INVARIANTS
uint64_t mask;
 
mask = supervisor ? xsave_mask_supervisor : xsave_mask;
@@ -1305,16 +1306,19 @@ xsave_extfeature_check(uint64_t feature, bool 
supervisor)
KASSERT(ilog2(feature) <= ilog2(mask),
("%s: unsupported %s XFEATURE 0x%lx", __func__,
supervisor ? "supervisor" : "user", feature));
+#endif
 }
 
 static __inline void
 xsave_extstate_bv_check(uint64_t xstate_bv, bool supervisor)
 {
+#ifdef INVARIANTS
uint64_t mask;
 
mask = supervisor ? xsave_mask_supervisor : xsave_mask;
KASSERT(xstate_bv != 0 && ilog2(xstate_bv) <= ilog2(mask),
("%s: invalid XSTATE_BV 0x%lx", __func__, xstate_bv));
+#endif
 }
 
 /*



git: 19f202f859b1 - main - sdhci: Fixes for sdhci_fdt_rockchip.c

2025-02-16 Thread Bojan Novković
The branch main has been updated by bnovkov:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=19f202f859b101507e26fe181aaf9f0b99f4d59c

commit 19f202f859b101507e26fe181aaf9f0b99f4d59c
Author: Bojan Novković 
AuthorDate: 2025-02-16 10:41:43 +
Commit: Bojan Novković 
CommitDate: 2025-02-16 10:41:43 +

sdhci: Fixes for sdhci_fdt_rockchip.c

This change fixes a couple of issues in the Rockchip SDHCI driver:
 - Fix a panic caused by sdhci_fdt_rockchip_attach not populating the
   softc's dev variable before initializing clocks
 - Fix a bug where sdhci_fdt_rockchip_set_clock fails to call
   sdhci_fdt_set_clock

Fixes:  e17e33f997d6
Reported by:Alonso Cárdenas Márquez (acarde...@bsd-peru.org)
---
 sys/dev/sdhci/sdhci_fdt_rockchip.c | 13 ++---
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/sys/dev/sdhci/sdhci_fdt_rockchip.c 
b/sys/dev/sdhci/sdhci_fdt_rockchip.c
index b3311d3e8a48..44a5e2ffe271 100644
--- a/sys/dev/sdhci/sdhci_fdt_rockchip.c
+++ b/sys/dev/sdhci/sdhci_fdt_rockchip.c
@@ -217,7 +217,7 @@ sdhci_fdt_rockchip_set_clock(device_t dev, struct 
sdhci_slot *slot, int clock)
DLL_STRBIN_TAPNUM_FROM_SW);
}
}
-   return (sdhci_fdt_rockchip_set_clock(dev, slot, clock));
+   return (sdhci_fdt_set_clock(dev, slot, clock));
 }
 
 static int
@@ -226,6 +226,7 @@ sdhci_fdt_rockchip_attach(device_t dev)
struct sdhci_fdt_softc *sc = device_get_softc(dev);
int err, compat;
 
+   sc->dev = dev;
compat = ofw_bus_search_compatible(dev, compat_data)->ocd_data;
switch (compat) {
case SDHCI_FDT_RK3399:
@@ -243,12 +244,10 @@ sdhci_fdt_rockchip_attach(device_t dev)
device_printf(dev, "Cannot get syscon handle\n");
return (err);
}
-   if (compat == SDHCI_FDT_RK3399) {
-   err = sdhci_init_rk3399(dev);
-   if (err != 0) {
-   device_printf(dev, "Cannot init RK3399 
SDHCI\n");
-   return (err);
-   }
+   err = sdhci_init_rk3399(dev);
+   if (err != 0) {
+   device_printf(dev, "Cannot init RK3399 SDHCI\n");
+   return (err);
}
break;
case SDHCI_FDT_RK3568:



git: df436036e82b - main - umtx: Don't sleep after casueword32 failure in do_sem2_wake

2025-02-17 Thread Bojan Novković
The branch main has been updated by bnovkov:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=df436036e82b895a6233d803bc8bf14d2cfe90d7

commit df436036e82b895a6233d803bc8bf14d2cfe90d7
Author: Bojan Novković 
AuthorDate: 2025-01-30 15:10:04 +
Commit: Bojan Novković 
CommitDate: 2025-02-17 16:40:34 +

umtx: Don't sleep after casueword32 failure in do_sem2_wake

When a casueword32 operation fails, 'do_sem2_wake' will call
'thread_check_susp' to avoid a potential livelock. However, it
instructs 'thread_check_susp' to sleep while holding a previously busied
umtxq key. This is explicitly discouraged by the comments in
'thread_check_susp' which state that a thread shouldn't sleep if it
owns a kernel resource. Fix this by passing 'false'
to 'thread_check_susp'.

Reviewed by:kib
PR: 282713
Differential Revision:  https://reviews.freebsd.org/D48728
Sponsored by:   Klara Inc.
---
 sys/kern/kern_umtx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sys/kern/kern_umtx.c b/sys/kern/kern_umtx.c
index a9294c324cb4..938dcf2ff1cb 100644
--- a/sys/kern/kern_umtx.c
+++ b/sys/kern/kern_umtx.c
@@ -3789,7 +3789,7 @@ do_sem2_wake(struct thread *td, struct _usem2 *sem)
rv = casueword32(&sem->_count, count, &count,
count & ~USEM_HAS_WAITERS);
if (rv == 1) {
-   rv = thread_check_susp(td, true);
+   rv = thread_check_susp(td, false);
if (rv != 0)
break;
}