Disregard this speculation about a possible 32bit int issue. I've reproduced the panic with a smaller pr_nget.
On Thu, Jun 6, 2024 at 11:37 PM Eric Grosse <gro...@gmail.com> wrote: > > Is the large (greater than 2^32) value of pr_nget below > exceptional? My crashes only happen for long-running > heavy workloads so a big value seems plausible but > maybe there is some limit I'm supposed to reconfigure > for such workloads? > > panic: pmap_enter: failed to allocate pted > Stopped at panic+0x134: ori r0,r0,0x0 > TID PID UID PRFLAGS PFLAGS CPU COMMAND > > 234577 13111 8889 0x18000001 0 3 go > > 491287 13050 8889 0xa001007 0 2 asm > 331460 14455 8889 0x1a000003 0 5 asm > 66317 80047 8889 0x1a000003 0 1 compile > 331134 57997 8889 0x1a000003 0x4000000 0 go > 240615 91938 8889 0x1a000003 0x4000000 6 go > *247303 89969 8889 0x1a000003 0x4000000 4K go > 28756 33586 0 0x14000 0x200 7 pagedaemon > panic+0x134 > > pmap_enter+0x218 > > uvm_km_kmemalloc_pla+0x1f4 > uvm_uarea_alloc+0x70 > > thread_fork+0xd8 > > sys___tfork+0xc4 > syscall+0x530 > > trap+0x5dc > > trapagain+0x4 > --- syscall (number 8) --- > End of kernel: 0x457c99db0 lr 0x4bddd2604 > > > ddb{4}>show struct pool pmap_pted_pool > > struct pmap_pted_pool at 0x1089a98 (424 bytes) {pr_lock = {prl_mtx = > {mtx_owner > = (void *)0x0, mtx_wantipl = 7, mtx_oldipl = 0}, prl_rwlock = {rwl_owner = > 0, r > wl_name = (const unsigned char *)0x700000000}}, pr_lock_ops = (const > pool_lock_ > > ops *)0xfc4950, pr_poollist = {sqe_next = (struct pool *)0x10898f0}, > pr_emptypa > ges = {tqh_first = (struct pool_page_header *)0x0, tqh_last = 0x1089ab8}, > pr_fu > llpages = {tqh_first = (struct pool_page_header *)0xc00000003e3b2f90, > tqh_last = > 0xc00000014b344f90}, pr_partpages = {tqh_first = (struct pool_page_header > *)0x > c000000140250f90, tqh_last = 0xc000000146821f90}, pr_curpage = (struct > pool_pag > e_header *)0x0, pr_size = 56, pr_minitems = 20, pr_minpages = 1, pr_maxpages > = 8 > , pr_npages = 4632, pr_itemsperpage = 71, pr_slack = 0, pr_nitems = 328872, > pr_ > nout = 328870, pr_hardlimit = 4294967295, pr_serial = 14, pr_pgsize = 4096, > pr_ > pgmask = 18446744073709547520, pr_alloc = (struct pool_allocator *)0xfe9b40, > pr > _wchan = (const unsigned char *)0xeb08e4, pr_flags = 0, pr_ipl = 7, pr_phtree > = > {rbh_root = {rbt_root = (struct rb_entry *)0x0}}, pr_cache = (struct cpumem > *) > 0x0, pr_cache_magic = [0,0], pr_cache_lock = {prl_mtx = {mtx_owner = (void > *)0x > 0, mtx_wantipl = 0, mtx_oldipl = 0}, prl_rwlock = {rwl_owner = 0, rwl_name = > (c > onst unsigned char *)0x0}}, pr_cache_lists = {tqh_first = (struct > pool_cache_it > em *)0x0, tqh_last = 0x0}, pr_cache_nitems = 0, pr_cache_items = 0, > pr_cache_co > ntention = 0, pr_cache_contention_prev = 0, pr_cache_timestamp = 0, > pr_cache_ng > c = 0, pr_cache_nout = 0, pr_align = 8, pr_maxcolors = 2, pr_phoffset = 3984, > p > r_hardlimit_warning = (const unsigned char *)0x0, pr_hardlimit_ratecap = > {tv_se > > c = 0, tv_usec = 0}, pr_hardlimit_warning_last = {tv_sec = 0, tv_usec = 0}, > pr_ > requests_lock = {prl_mtx = {mtx_owner = (void *)0x0, mtx_wantipl = 7, > mtx_oldip > l = 0}, prl_rwlock = {rwl_owner = 0, rwl_name = (const unsigned char > *)0x700000 > 000}}, pr_requests = {tqh_first = (struct pool_request *)0x0, tqh_last = > 0x1089 > be8}, pr_requesting = 0, pr_nget = 5523479116, pr_nfail = 13, pr_nput = > 5523150 > 246, pr_npagealloc = 1387529, pr_npagefree = 1382897, pr_hiwat = 10072, > pr_nidl > e = 0, pr_crange = (const kmem_pa_mode *)0xfc60b0} > > > ddb{4}>show struct pool pmap_vp_pool > > struct pmap_vp_pool at 0x10898f0 (424 bytes) {pr_lock = {prl_mtx = {mtx_owner > = > (void *)0x0, mtx_wantipl = 7, mtx_oldipl = 7}, prl_rwlock = {rwl_owner = 0, > rw > l_name = (const unsigned char *)0x700000007}}, pr_lock_ops = (const > pool_lock_o > > ps *)0xfc4950, pr_poollist = {sqe_next = (struct pool *)0x1089c40}, > pr_emptypag > es = {tqh_first = (struct pool_page_header *)0xc0000001449f7af0, tqh_last = > 0xc > 000000142a861d8}, pr_fullpages = {tqh_first = (struct pool_page_header > *)0xc000 > 00003e3bbd20, tqh_last = 0xc00000014d3f9248}, pr_partpages = {tqh_first = > (stru > ct pool_page_header *)0xc00000003c3e9250, tqh_last = 0xc00000014a4e4d28}, > pr_cu > rpage = (struct pool_page_header *)0xc00000014a4e4d28, pr_size = 2048, > pr_minit > ems = 10, pr_minpages = 5, pr_maxpages = 8, pr_npages = 17150, > pr_itemsperpage = > 2, pr_slack = 0, pr_nitems = 34300, pr_nout = 34255, pr_hardlimit = > 4294967295 > , pr_serial = 13, pr_pgsize = 4096, pr_pgmask = 18446744073709547520, > pr_alloc = > > (struct pool_allocator *)0xfe9b40, pr_wchan = (const unsigned char > *)0xee10ef, > pr_flags = 0, pr_ipl = 7, pr_phtree = {rbh_root = {rbt_root = (struct > rb_entry > *)0xc00000013f618a08}}, pr_cache = (struct cpumem *)0x0, pr_cache_magic = > [0,0 > ], pr_cache_lock = {prl_mtx = {mtx_owner = (void *)0x0, mtx_wantipl = 0, > mtx_ol > dipl = 0}, prl_rwlock = {rwl_owner = 0, rwl_name = (const unsigned char > *)0x0}} > , pr_cache_lists = {tqh_first = (struct pool_cache_item *)0x0, tqh_last = > 0x0}, > pr_cache_nitems = 0, pr_cache_items = 0, pr_cache_contention = 0, > pr_cache_con > tention_prev = 0, pr_cache_timestamp = 0, pr_cache_ngc = 0, pr_cache_nout = > 0, p > r_align = 8, pr_maxcolors = 1, pr_phoffset = 0, pr_hardlimit_warning = (const > u > nsigned char *)0x0, pr_hardlimit_ratecap = {tv_sec = 0, tv_usec = 0}, > pr_hardli > > mit_warning_last = {tv_sec = 0, tv_usec = 0}, pr_requests_lock = {prl_mtx = > {mt > x_owner = (void *)0x0, mtx_wantipl = 7, mtx_oldipl = 0}, prl_rwlock = > {rwl_owne > r = 0, rwl_name = (const unsigned char *)0x700000000}}, pr_requests = > {tqh_firs > t = (struct pool_request *)0x0, tqh_last = 0x1089a40}, pr_requesting = 0, > pr_ng > > et = 147054103, pr_nfail = 0, pr_nput = 147019848, pr_npagealloc = 6817420, > pr_ > npagefree = 6800270, pr_hiwat = 20556, pr_nidle = 17, pr_crange = (const > kmem_p > > a_mode *)0xfc60b0} > > > ddb{4}> show struct pool pmap_pmap_pool > > struct pmap_pmap_pool at 0x1089c40 (424 bytes) {pr_lock = {prl_mtx = > {mtx_owner > = (void *)0x0, mtx_wantipl = 7, mtx_oldipl = 0}, prl_rwlock = {rwl_owner = > 0, r > > wl_name = (const unsigned char *)0x700000000}}, pr_lock_ops = (const > pool_lock_ > ops *)0xfc4950, pr_poollist = {sqe_next = (struct pool *)0x104b5c0}, > pr_emptypa > > ges = {tqh_first = (struct pool_page_header *)0xc00000014ef78f90, tqh_last = > 0x > c00000014ef78f90}, pr_fullpages = {tqh_first = (struct pool_page_header > *)0x0, t > qh_last = 0x1089c70}, pr_partpages = {tqh_first = (struct pool_page_header > *)0x > c00000003e3aef90, tqh_last = 0xc000000149c6ef90}, pr_curpage = (struct > pool_pag > e_header *)0xc00000003e3aef90, pr_size = 48, pr_minitems = 2, pr_minpages = > 1, p > r_maxpages = 8, pr_npages = 3, pr_itemsperpage = 83, pr_slack = 0, pr_nitems > = 2 > 49, pr_nout = 138, pr_hardlimit = 4294967295, pr_serial = 12, pr_pgsize = > 4096, > pr_pgmask = 18446744073709547520, pr_alloc = (struct pool_allocator > *)0xfe9b40 > , pr_wchan = (const unsigned char *)0xe91044, pr_flags = 0, pr_ipl = 7, > pr_phtr > ee = {rbh_root = {rbt_root = (struct rb_entry *)0x0}}, pr_cache = (struct > cpume > m *)0x0, pr_cache_magic = [0,0], pr_cache_lock = {prl_mtx = {mtx_owner = > (void * > )0x0, mtx_wantipl = 0, mtx_oldipl = 0}, prl_rwlock = {rwl_owner = 0, rwl_name > = > (const unsigned char *)0x0}}, pr_cache_lists = {tqh_first = (struct > pool_cache > _item *)0x0, tqh_last = 0x0}, pr_cache_nitems = 0, pr_cache_items = 0, > pr_cache > _contention = 0, pr_cache_contention_prev = 0, pr_cache_timestamp = 0, > pr_cache > _ngc = 0, pr_cache_nout = 0, pr_align = 8, pr_maxcolors = 1, pr_phoffset = > 3984 > , pr_hardlimit_warning = (const unsigned char *)0x0, pr_hardlimit_ratecap = > {tv > _sec = 0, tv_usec = 0}, pr_hardlimit_warning_last = {tv_sec = 0, tv_usec = > 0}, p > r_requests_lock = {prl_mtx = {mtx_owner = (void *)0x0, mtx_wantipl = 7, > mtx_old > ipl = 0}, prl_rwlock = {rwl_owner = 0, rwl_name = (const unsigned char > *)0x7000 > 00000}}, pr_requests = {tqh_first = (struct pool_request *)0x0, tqh_last = > 0x10 > 89d90}, pr_requesting = 0, pr_nget = 1113457, pr_nfail = 0, pr_nput = > 1113319, p > r_npagealloc = 3, pr_npagefree = 0, pr_hiwat = 3, pr_nidle = 1, pr_crange = > (co > nst kmem_pa_mode *)0xfc60b0} > > ddb{4}> > > On Wed, Jun 5, 2024 at 12:16 PM Miod Vallat <m...@online.fr> wrote: > > > > > There's a corruption... > > > > > > > ddb{7}> show panic > > > > cpu6: kernel diagnostic assertion "((flags & PGO_LOCKED) != 0 && > > > > rw_lock_held( > > > > uobj->vmobjlock)) || (flags & PGO_LOCKED) == 0" failed: file > > > > "/sys/uvm/uvm_vnod > > > > e.c", line 953 > > > > > > > > *cpu7: assertwaitok: non-zero mutex count: 1 > > > > ddb{7}> trace > > > > panic+0x134 > > > > assertwaitok+0xf8 > > > > mi_switch+0x5c > > > > sleep_finish+0x160 > > > > rw_enter+0x1cc > > > > vm_map_lock_read_ln+0x38 > > > > uvmfault_lookup+0x114 > > > > uvm_fault_check+0x68 > > > > uvm_fault+0x12c > > > > trap+0x7a4 > > > > trapagain+0x4 > > > > --- trap (type 0x300) --- > > > > phtree_RBT_COMPARE+0x28 > > > > pool_do_put+0x94 > > > > pool_put+0x94 > > > ^^^^ > > > ...inside this pool. Which of the 3 is it? Can someone with a ppc64 > > > figure out? > > > > It's pmap_vp_pool.