Disregard this speculation about a possible 32bit int issue. I've
reproduced the panic with a smaller pr_nget.

On Thu, Jun 6, 2024 at 11:37 PM Eric Grosse <gro...@gmail.com> wrote:
>
> Is the large (greater than 2^32) value of pr_nget below
> exceptional? My crashes only happen for long-running
> heavy workloads so a big value seems plausible but
> maybe there is some limit I'm supposed to reconfigure
> for such workloads?
>
> panic: pmap_enter: failed to allocate pted
> Stopped at      panic+0x134:    ori r0,r0,0x0
>     TID    PID    UID     PRFLAGS     PFLAGS  CPU  COMMAND
>
>  234577  13111   8889  0x18000001          0    3  go
>
>  491287  13050   8889   0xa001007          0    2  asm
>  331460  14455   8889  0x1a000003          0    5  asm
>   66317  80047   8889  0x1a000003          0    1  compile
>  331134  57997   8889  0x1a000003  0x4000000    0  go
>  240615  91938   8889  0x1a000003  0x4000000    6  go
> *247303  89969   8889  0x1a000003  0x4000000    4K go
>   28756  33586      0     0x14000      0x200    7  pagedaemon
> panic+0x134
>
> pmap_enter+0x218
>
> uvm_km_kmemalloc_pla+0x1f4
> uvm_uarea_alloc+0x70
>
> thread_fork+0xd8
>
> sys___tfork+0xc4
> syscall+0x530
>
> trap+0x5dc
>
> trapagain+0x4
> --- syscall (number 8) ---
> End of kernel: 0x457c99db0 lr 0x4bddd2604
>
>
> ddb{4}>show struct pool pmap_pted_pool
>
> struct pmap_pted_pool at 0x1089a98 (424 bytes) {pr_lock = {prl_mtx = 
> {mtx_owner
>  = (void *)0x0, mtx_wantipl = 7, mtx_oldipl = 0}, prl_rwlock = {rwl_owner = 
> 0, r
> wl_name = (const unsigned char *)0x700000000}}, pr_lock_ops = (const 
> pool_lock_
>
> ops *)0xfc4950, pr_poollist = {sqe_next = (struct pool *)0x10898f0}, 
> pr_emptypa
> ges = {tqh_first = (struct pool_page_header *)0x0, tqh_last = 0x1089ab8}, 
> pr_fu
> llpages = {tqh_first = (struct pool_page_header *)0xc00000003e3b2f90, 
> tqh_last =
>  0xc00000014b344f90}, pr_partpages = {tqh_first = (struct pool_page_header 
> *)0x
> c000000140250f90, tqh_last = 0xc000000146821f90}, pr_curpage = (struct 
> pool_pag
> e_header *)0x0, pr_size = 56, pr_minitems = 20, pr_minpages = 1, pr_maxpages 
> = 8
> , pr_npages = 4632, pr_itemsperpage = 71, pr_slack = 0, pr_nitems = 328872, 
> pr_
> nout = 328870, pr_hardlimit = 4294967295, pr_serial = 14, pr_pgsize = 4096, 
> pr_
> pgmask = 18446744073709547520, pr_alloc = (struct pool_allocator *)0xfe9b40, 
> pr
> _wchan = (const unsigned char *)0xeb08e4, pr_flags = 0, pr_ipl = 7, pr_phtree 
> =
>  {rbh_root = {rbt_root = (struct rb_entry *)0x0}}, pr_cache = (struct cpumem 
> *)
> 0x0, pr_cache_magic = [0,0], pr_cache_lock = {prl_mtx = {mtx_owner = (void 
> *)0x
> 0, mtx_wantipl = 0, mtx_oldipl = 0}, prl_rwlock = {rwl_owner = 0, rwl_name = 
> (c
> onst unsigned char *)0x0}}, pr_cache_lists = {tqh_first = (struct 
> pool_cache_it
> em *)0x0, tqh_last = 0x0}, pr_cache_nitems = 0, pr_cache_items = 0, 
> pr_cache_co
> ntention = 0, pr_cache_contention_prev = 0, pr_cache_timestamp = 0, 
> pr_cache_ng
> c = 0, pr_cache_nout = 0, pr_align = 8, pr_maxcolors = 2, pr_phoffset = 3984, 
> p
> r_hardlimit_warning = (const unsigned char *)0x0, pr_hardlimit_ratecap = 
> {tv_se
>
> c = 0, tv_usec = 0}, pr_hardlimit_warning_last = {tv_sec = 0, tv_usec = 0}, 
> pr_
> requests_lock = {prl_mtx = {mtx_owner = (void *)0x0, mtx_wantipl = 7, 
> mtx_oldip
> l = 0}, prl_rwlock = {rwl_owner = 0, rwl_name = (const unsigned char 
> *)0x700000
> 000}}, pr_requests = {tqh_first = (struct pool_request *)0x0, tqh_last = 
> 0x1089
> be8}, pr_requesting = 0, pr_nget = 5523479116, pr_nfail = 13, pr_nput = 
> 5523150
> 246, pr_npagealloc = 1387529, pr_npagefree = 1382897, pr_hiwat = 10072, 
> pr_nidl
> e = 0, pr_crange = (const kmem_pa_mode *)0xfc60b0}
>
>
> ddb{4}>show struct pool pmap_vp_pool
>
> struct pmap_vp_pool at 0x10898f0 (424 bytes) {pr_lock = {prl_mtx = {mtx_owner 
> =
>  (void *)0x0, mtx_wantipl = 7, mtx_oldipl = 7}, prl_rwlock = {rwl_owner = 0, 
> rw
> l_name = (const unsigned char *)0x700000007}}, pr_lock_ops = (const 
> pool_lock_o
>
> ps *)0xfc4950, pr_poollist = {sqe_next = (struct pool *)0x1089c40}, 
> pr_emptypag
> es = {tqh_first = (struct pool_page_header *)0xc0000001449f7af0, tqh_last = 
> 0xc
> 000000142a861d8}, pr_fullpages = {tqh_first = (struct pool_page_header 
> *)0xc000
> 00003e3bbd20, tqh_last = 0xc00000014d3f9248}, pr_partpages = {tqh_first = 
> (stru
> ct pool_page_header *)0xc00000003c3e9250, tqh_last = 0xc00000014a4e4d28}, 
> pr_cu
> rpage = (struct pool_page_header *)0xc00000014a4e4d28, pr_size = 2048, 
> pr_minit
> ems = 10, pr_minpages = 5, pr_maxpages = 8, pr_npages = 17150, 
> pr_itemsperpage =
>  2, pr_slack = 0, pr_nitems = 34300, pr_nout = 34255, pr_hardlimit = 
> 4294967295
> , pr_serial = 13, pr_pgsize = 4096, pr_pgmask = 18446744073709547520, 
> pr_alloc =
>
>  (struct pool_allocator *)0xfe9b40, pr_wchan = (const unsigned char 
> *)0xee10ef,
>  pr_flags = 0, pr_ipl = 7, pr_phtree = {rbh_root = {rbt_root = (struct 
> rb_entry
>  *)0xc00000013f618a08}}, pr_cache = (struct cpumem *)0x0, pr_cache_magic = 
> [0,0
> ], pr_cache_lock = {prl_mtx = {mtx_owner = (void *)0x0, mtx_wantipl = 0, 
> mtx_ol
> dipl = 0}, prl_rwlock = {rwl_owner = 0, rwl_name = (const unsigned char 
> *)0x0}}
> , pr_cache_lists = {tqh_first = (struct pool_cache_item *)0x0, tqh_last = 
> 0x0},
>  pr_cache_nitems = 0, pr_cache_items = 0, pr_cache_contention = 0, 
> pr_cache_con
> tention_prev = 0, pr_cache_timestamp = 0, pr_cache_ngc = 0, pr_cache_nout = 
> 0, p
> r_align = 8, pr_maxcolors = 1, pr_phoffset = 0, pr_hardlimit_warning = (const 
> u
> nsigned char *)0x0, pr_hardlimit_ratecap = {tv_sec = 0, tv_usec = 0}, 
> pr_hardli
>
> mit_warning_last = {tv_sec = 0, tv_usec = 0}, pr_requests_lock = {prl_mtx = 
> {mt
> x_owner = (void *)0x0, mtx_wantipl = 7, mtx_oldipl = 0}, prl_rwlock = 
> {rwl_owne
> r = 0, rwl_name = (const unsigned char *)0x700000000}}, pr_requests = 
> {tqh_firs
> t = (struct pool_request *)0x0, tqh_last = 0x1089a40}, pr_requesting = 0, 
> pr_ng
>
> et = 147054103, pr_nfail = 0, pr_nput = 147019848, pr_npagealloc = 6817420, 
> pr_
> npagefree = 6800270, pr_hiwat = 20556, pr_nidle = 17, pr_crange = (const 
> kmem_p
>
> a_mode *)0xfc60b0}
>
>
> ddb{4}> show struct pool pmap_pmap_pool
>
> struct pmap_pmap_pool at 0x1089c40 (424 bytes) {pr_lock = {prl_mtx = 
> {mtx_owner
>  = (void *)0x0, mtx_wantipl = 7, mtx_oldipl = 0}, prl_rwlock = {rwl_owner = 
> 0, r
>
> wl_name = (const unsigned char *)0x700000000}}, pr_lock_ops = (const 
> pool_lock_
> ops *)0xfc4950, pr_poollist = {sqe_next = (struct pool *)0x104b5c0}, 
> pr_emptypa
>
> ges = {tqh_first = (struct pool_page_header *)0xc00000014ef78f90, tqh_last = 
> 0x
> c00000014ef78f90}, pr_fullpages = {tqh_first = (struct pool_page_header 
> *)0x0, t
> qh_last = 0x1089c70}, pr_partpages = {tqh_first = (struct pool_page_header 
> *)0x
> c00000003e3aef90, tqh_last = 0xc000000149c6ef90}, pr_curpage = (struct 
> pool_pag
> e_header *)0xc00000003e3aef90, pr_size = 48, pr_minitems = 2, pr_minpages = 
> 1, p
> r_maxpages = 8, pr_npages = 3, pr_itemsperpage = 83, pr_slack = 0, pr_nitems 
> = 2
> 49, pr_nout = 138, pr_hardlimit = 4294967295, pr_serial = 12, pr_pgsize = 
> 4096,
>  pr_pgmask = 18446744073709547520, pr_alloc = (struct pool_allocator 
> *)0xfe9b40
> , pr_wchan = (const unsigned char *)0xe91044, pr_flags = 0, pr_ipl = 7, 
> pr_phtr
> ee = {rbh_root = {rbt_root = (struct rb_entry *)0x0}}, pr_cache = (struct 
> cpume
> m *)0x0, pr_cache_magic = [0,0], pr_cache_lock = {prl_mtx = {mtx_owner = 
> (void *
> )0x0, mtx_wantipl = 0, mtx_oldipl = 0}, prl_rwlock = {rwl_owner = 0, rwl_name 
> =
>  (const unsigned char *)0x0}}, pr_cache_lists = {tqh_first = (struct 
> pool_cache
> _item *)0x0, tqh_last = 0x0}, pr_cache_nitems = 0, pr_cache_items = 0, 
> pr_cache
> _contention = 0, pr_cache_contention_prev = 0, pr_cache_timestamp = 0, 
> pr_cache
> _ngc = 0, pr_cache_nout = 0, pr_align = 8, pr_maxcolors = 1, pr_phoffset = 
> 3984
> , pr_hardlimit_warning = (const unsigned char *)0x0, pr_hardlimit_ratecap = 
> {tv
> _sec = 0, tv_usec = 0}, pr_hardlimit_warning_last = {tv_sec = 0, tv_usec = 
> 0}, p
> r_requests_lock = {prl_mtx = {mtx_owner = (void *)0x0, mtx_wantipl = 7, 
> mtx_old
> ipl = 0}, prl_rwlock = {rwl_owner = 0, rwl_name = (const unsigned char 
> *)0x7000
> 00000}}, pr_requests = {tqh_first = (struct pool_request *)0x0, tqh_last = 
> 0x10
> 89d90}, pr_requesting = 0, pr_nget = 1113457, pr_nfail = 0, pr_nput = 
> 1113319, p
> r_npagealloc = 3, pr_npagefree = 0, pr_hiwat = 3, pr_nidle = 1, pr_crange = 
> (co
> nst kmem_pa_mode *)0xfc60b0}
>
> ddb{4}>
>
> On Wed, Jun 5, 2024 at 12:16 PM Miod Vallat <m...@online.fr> wrote:
> >
> > > There's a corruption...
> > >
> > > > ddb{7}> show panic
> > > >  cpu6: kernel diagnostic assertion "((flags & PGO_LOCKED) != 0 && 
> > > > rw_lock_held(
> > > > uobj->vmobjlock)) || (flags & PGO_LOCKED) == 0" failed: file 
> > > > "/sys/uvm/uvm_vnod
> > > > e.c", line 953
> > > >
> > > > *cpu7: assertwaitok: non-zero mutex count: 1
> > > > ddb{7}> trace
> > > > panic+0x134
> > > > assertwaitok+0xf8
> > > > mi_switch+0x5c
> > > > sleep_finish+0x160
> > > > rw_enter+0x1cc
> > > > vm_map_lock_read_ln+0x38
> > > > uvmfault_lookup+0x114
> > > > uvm_fault_check+0x68
> > > > uvm_fault+0x12c
> > > > trap+0x7a4
> > > > trapagain+0x4
> > > > --- trap (type 0x300) ---
> > > > phtree_RBT_COMPARE+0x28
> > > > pool_do_put+0x94
> > > > pool_put+0x94
> > >            ^^^^
> > > ...inside this pool.  Which of the 3 is it?  Can someone with a ppc64
> > > figure out?
> >
> > It's pmap_vp_pool.

Reply via email to