In message <201910062213.x96mdzv3085...@repo.freebsd.org>, Mateusz Guzik writes : > Author: mjg > Date: Sun Oct 6 22:13:35 2019 > New Revision: 353149 > URL: https://svnweb.freebsd.org/changeset/base/353149 > > Log: > amd64 pmap: implement per-superpage locks > > The current 256-lock sized array is a problem in the following ways: > - it's way too small > - there are 2 locks per cacheline > - it is not NUMA-aware > > Solve these issues by introducing per-superpage locks backed by pages > allocated from respective domains. > > This significantly reduces contention e.g. during poudriere -j 104. > See the review for results. > > Reviewed by: kib > Discussed with: jeff > Sponsored by: The FreeBSD Foundation > Differential Revision: https://reviews.freebsd.org/D21833 > > Modified: > head/sys/amd64/amd64/pmap.c > > Modified: head/sys/amd64/amd64/pmap.c > ============================================================================= > = > --- head/sys/amd64/amd64/pmap.c Sun Oct 6 20:36:25 2019 (r35314 > 8) > +++ head/sys/amd64/amd64/pmap.c Sun Oct 6 22:13:35 2019 (r35314 > 9) > @@ -316,13 +316,25 @@ pmap_pku_mask_bit(pmap_t pmap) > #define PV_STAT(x) do { } while (0) > #endif > > -#define pa_index(pa) ((pa) >> PDRSHIFT) > +#undef pa_index > +#define pa_index(pa) ({ \ > + KASSERT((pa) <= vm_phys_segs[vm_phys_nsegs - 1].end, \ > + ("address %lx beyond the last segment", (pa))); \ > + (pa) >> PDRSHIFT; \ > +}) > +#if VM_NRESERVLEVEL > 0 > +#define pa_to_pmdp(pa) (&pv_table[pa_index(pa)]) > +#define pa_to_pvh(pa) (&(pa_to_pmdp(pa)->pv_page)) > +#define PHYS_TO_PV_LIST_LOCK(pa) \ > + (&(pa_to_pmdp(pa)->pv_lock)) > +#else > #define pa_to_pvh(pa) (&pv_table[pa_index(pa)]) > > #define NPV_LIST_LOCKS MAXCPU > > #define PHYS_TO_PV_LIST_LOCK(pa) \ > (&pv_list_locks[pa_index(pa) % NPV_LIST_LOCKS]) > +#endif > > #define CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa) do { \ > struct rwlock **_lockp = (lockp); \ > @@ -400,14 +412,22 @@ static int pmap_initialized; > > /* > * Data for the pv entry allocation mechanism. > - * Updates to pv_invl_gen are protected by the pv_list_locks[] > - * elements, but reads are not. > + * Updates to pv_invl_gen are protected by the pv list lock but reads are no > t. > */ > static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunk > s); > static struct mtx __exclusive_cache_line pv_chunks_mutex; > +#if VM_NRESERVLEVEL > 0 > +struct pmap_large_md_page { > + struct rwlock pv_lock; > + struct md_page pv_page; > + u_long pv_invl_gen; > +}; > +static struct pmap_large_md_page *pv_table; > +#else > static struct rwlock __exclusive_cache_line pv_list_locks[NPV_LIST_LOCKS]; > static u_long pv_invl_gen[NPV_LIST_LOCKS]; > static struct md_page *pv_table; > +#endif > static struct md_page pv_dummy; > > /* > @@ -918,12 +938,21 @@ SYSCTL_LONG(_vm_pmap, OID_AUTO, invl_wait_slow, CTLFLA > "Number of slow invalidation waits for lockless DI"); > #endif > > +#if VM_NRESERVLEVEL > 0 > static u_long * > pmap_delayed_invl_genp(vm_page_t m) > { > > + return (&pa_to_pmdp(VM_PAGE_TO_PHYS(m))->pv_invl_gen); > +} > +#else > +static u_long * > +pmap_delayed_invl_genp(vm_page_t m) > +{ > + > return (&pv_invl_gen[pa_index(VM_PAGE_TO_PHYS(m)) % NPV_LIST_LOCKS]); > } > +#endif > > static void > pmap_delayed_invl_callout_func(void *arg __unused) > @@ -1803,6 +1832,112 @@ pmap_page_init(vm_page_t m) > m->md.pat_mode = PAT_WRITE_BACK; > } > > +#if VM_NRESERVLEVEL > 0 > +static void > +pmap_init_pv_table(void) > +{ > + struct pmap_large_md_page *pvd; > + vm_size_t s; > + long start, end, highest, pv_npg; > + int domain, i, j, pages; > + > + /* > + * We strongly depend on the size being a power of two, so the assert > + * is overzealous. However, should the struct be resized to a > + * different power of two, the code below needs to be revisited. > + */ > + CTASSERT((sizeof(*pvd) == 64)); > + > + /* > + * Calculate the size of the array. > + */ > + pv_npg = howmany(vm_phys_segs[vm_phys_nsegs - 1].end, NBPDR); > + s = (vm_size_t)pv_npg * sizeof(struct pmap_large_md_page); > + s = round_page(s); > + pv_table = (struct pmap_large_md_page *)kva_alloc(s); > + if (pv_table == NULL) > + panic("%s: kva_alloc failed\n", __func__); > + > + /* > + * Iterate physical segments to allocate space for respective pages. > + */ > + highest = -1; > + s = 0; > + for (i = 0; i < vm_phys_nsegs; i++) { > + start = vm_phys_segs[i].start / NBPDR; > + end = vm_phys_segs[i].end / NBPDR; > + domain = vm_phys_segs[i].domain; > + > + if (highest >= end) > + continue; > + > + if (start < highest) { > + start = highest + 1; > + pvd = &pv_table[start]; > + } else { > + /* > + * The lowest address may land somewhere in the middle > + * of our page. Simplify the code by pretending it is > + * at the beginning. > + */ > + pvd = pa_to_pmdp(vm_phys_segs[i].start); > + pvd = (struct pmap_large_md_page *)trunc_page(pvd); > + start = pvd - pv_table; > + } > + > + pages = end - start + 1; > + s = round_page(pages * sizeof(*pvd)); > + highest = start + (s / sizeof(*pvd)) - 1; > + > + for (j = 0; j < s; j += PAGE_SIZE) { > + vm_page_t m = vm_page_alloc_domain(NULL, 0, > + domain, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ); > + if (m == NULL) > + panic("vm_page_alloc_domain failed for %lx\n", > (vm_offset_t)pvd + j); > + pmap_qenter((vm_offset_t)pvd + j, &m, 1); > + } > + > + for (j = 0; j < s / sizeof(*pvd); j++) { > + rw_init_flags(&pvd->pv_lock, "pmap pv list", RW_NEW); > + TAILQ_INIT(&pvd->pv_page.pv_list); > + pvd->pv_page.pv_gen = 0; > + pvd->pv_page.pat_mode = 0; > + pvd->pv_invl_gen = 0; > + pvd++; > + } > + } > + TAILQ_INIT(&pv_dummy.pv_list); > +} > +#else > +static void > +pmap_init_pv_table(void) > +{ > + vm_size_t s; > + long i, pv_npg; > + > + /* > + * Initialize the pool of pv list locks. > + */ > + for (i = 0; i < NPV_LIST_LOCKS; i++) > + rw_init(&pv_list_locks[i], "pmap pv list"); > + > + /* > + * Calculate the size of the pv head table for superpages. > + */ > + pv_npg = howmany(vm_phys_segs[vm_phys_nsegs - 1].end, NBPDR); > + > + /* > + * Allocate memory for the pv head table for superpages. > + */ > + s = (vm_size_t)pv_npg * sizeof(struct md_page); > + s = round_page(s); > + pv_table = (struct md_page *)kmem_malloc(s, M_WAITOK | M_ZERO); > + for (i = 0; i < pv_npg; i++) > + TAILQ_INIT(&pv_table[i].pv_list); > + TAILQ_INIT(&pv_dummy.pv_list); > +} > +#endif > + > /* > * Initialize the pmap module. > * Called by vm_init, to initialize any structures that the pmap > @@ -1813,8 +1948,7 @@ pmap_init(void) > { > struct pmap_preinit_mapping *ppim; > vm_page_t m, mpte; > - vm_size_t s; > - int error, i, pv_npg, ret, skz63; > + int error, i, ret, skz63; > > /* L1TF, reserve page @0 unconditionally */ > vm_page_blacklist_add(0, bootverbose); > @@ -1902,26 +2036,7 @@ pmap_init(void) > */ > mtx_init(&pv_chunks_mutex, "pmap pv chunk list", NULL, MTX_DEF); > > - /* > - * Initialize the pool of pv list locks. > - */ > - for (i = 0; i < NPV_LIST_LOCKS; i++) > - rw_init(&pv_list_locks[i], "pmap pv list"); > - > - /* > - * Calculate the size of the pv head table for superpages. > - */ > - pv_npg = howmany(vm_phys_segs[vm_phys_nsegs - 1].end, NBPDR); > - > - /* > - * Allocate memory for the pv head table for superpages. > - */ > - s = (vm_size_t)(pv_npg * sizeof(struct md_page)); > - s = round_page(s); > - pv_table = (struct md_page *)kmem_malloc(s, M_WAITOK | M_ZERO); > - for (i = 0; i < pv_npg; i++) > - TAILQ_INIT(&pv_table[i].pv_list); > - TAILQ_INIT(&pv_dummy.pv_list); > + pmap_init_pv_table(); > > pmap_initialized = 1; > for (i = 0; i < PMAP_PREINIT_MAPPING_COUNT; i++) { >
This causes a page fault during X (xdm) startup, which loads drm-current-kmod. db_trace_self_wrapper() at db_trace_self_wrapper+0x2b/frame 0xfffffe0093e9c260 vpanic() at vpanic+0x19d/frame 0xfffffe0093e9c2b0 panic() at panic+0x43/frame 0xfffffe0093e9c310 vm_fault() at vm_fault+0x2126/frame 0xfffffe0093e9c460 vm_fault_trap() at vm_fault_trap+0x73/frame 0xfffffe0093e9c4b0 trap_pfault() at trap_pfault+0x1b6/frame 0xfffffe0093e9c510 trap() at trap+0x2a1/frame 0xfffffe0093e9c620 calltrap() at calltrap+0x8/frame 0xfffffe0093e9c620 --- trap 0xc, rip = 0xffffffff80a054b1, rsp = 0xfffffe0093e9c6f0, rbp = 0xfffffe0093e9c7a0 --- pmap_enter() at pmap_enter+0x861/frame 0xfffffe0093e9c7a0 vm_fault() at vm_fault+0x1c69/frame 0xfffffe0093e9c8f0 vm_fault_trap() at vm_fault_trap+0x73/frame 0xfffffe0093e9c940 trap_pfault() at trap_pfault+0x1b6/frame 0xfffffe0093e9c9a0 trap() at trap+0x438/frame 0xfffffe0093e9cab0 calltrap() at calltrap+0x8/frame 0xfffffe0093e9cab0 --- trap 0xc, rip = 0x30e2a9c3, rsp = 0x7fffffffea50, rbp = 0x7fffffffeaa0 --- Uptime: 3m33s Dumping 945 out of 7974 MB:..2%..11%..21%..31%..41%..51%..61%..72%..82%..92% (kgdb) bt #0 doadump (textdump=1) at pcpu_aux.h:55 #1 0xffffffff8068c5ed in kern_reboot (howto=260) at /opt/src/svn-current/sys/kern/kern_shutdown.c:479 #2 0xffffffff8068caa9 in vpanic (fmt=<value optimized out>, ap=<value optimized out>) at /opt/src/svn-current/sys/kern/kern_shutdown.c:908 #3 0xffffffff8068c8a3 in panic (fmt=<value optimized out>) at /opt/src/svn-current/sys/kern/kern_shutdown.c:835 #4 0xffffffff8098c966 in vm_fault (map=<value optimized out>, vaddr=<value optimized out>, fault_type=<value optimized out>, fault_flags=<value optimized out>, m_hold=<value optimized out>) at /opt/src/svn-current/sys/vm/vm_fault.c:672 #5 0xffffffff8098a723 in vm_fault_trap (map=0xfffff80002001000, vaddr=<value optimized out>, fault_type=2 '\002', fault_flags=<value optimized out>, signo=0x0, ucode=0x0) at /opt/src/svn-current/sys/vm/vm_fault.c:568 #6 0xffffffff80a18326 in trap_pfault (frame=0xfffffe0093e9c630, signo=<value optimized out>, ucode=<value optimized out>) at /opt/src/svn-current/sys/amd64/amd64/trap.c:828 #7 0xffffffff80a177f1 in trap (frame=0xfffffe0093e9c630) at /opt/src/svn-current/sys/amd64/amd64/trap.c:407 #8 0xffffffff809f1aac in calltrap () at /opt/src/svn-current/sys/amd64/amd64/exception.S:289 ---Type <return> to continue, or q <return> to quit--- #9 0xffffffff80a054b1 in pmap_enter (pmap=<value optimized out>, va=851443712, m=0xfffffe0005b25ce8, prot=<value optimized out>, flags=2677542912, psind=<value optimized out>) at atomic.h:221 #10 0xffffffff8098c4a9 in vm_fault (map=<value optimized out>, vaddr=<value optimized out>, fault_type=232 '\ufffd', fault_flags=<value optimized out>, m_hold=0x0) at /opt/src/svn-current/sys/vm/vm_fault.c:489 #11 0xffffffff8098a723 in vm_fault_trap (map=0xfffff80173eb5000, vaddr=<value optimized out>, fault_type=2 '\002', fault_flags=<value optimized out>, signo=0xfffffe0093e9ca84, ucode=0xfffffe0093e9ca80) at /opt/src/svn-current/sys/vm/vm_fault.c:568 #12 0xffffffff80a18326 in trap_pfault (frame=0xfffffe0093e9cac0, signo=<value optimized out>, ucode=<value optimized out>) at /opt/src/svn-current/sys/amd64/amd64/trap.c:828 #13 0xffffffff80a17988 in trap (frame=0xfffffe0093e9cac0) at /opt/src/svn-current/sys/amd64/amd64/trap.c:347 #14 0xffffffff809f1aac in calltrap () at /opt/src/svn-current/sys/amd64/amd64/exception.S:289 #15 0x0000000030e2a9c3 in ?? () Previous frame inner to this frame (corrupt stack?) Current language: auto; currently minimal (kgdb) frame 9 #9 0xffffffff80a054b1 in pmap_enter (pmap=<value optimized out>, va=851443712, m=0xfffffe0005b25ce8, prot=<value optimized out>, flags=2677542912, psind=<value optimized out>) at atomic.h:221 221 ATOMIC_CMPSET(long); (kgdb) l 216 } 217 218 ATOMIC_CMPSET(char); 219 ATOMIC_CMPSET(short); 220 ATOMIC_CMPSET(int); 221 ATOMIC_CMPSET(long); 222 223 /* 224 * Atomically add the value of v to the integer pointed to by p and return 225 * the previous value of *p. (kgdb) -- Cheers, Cy Schubert <cy.schub...@cschubert.com> FreeBSD UNIX: <c...@freebsd.org> Web: http://www.FreeBSD.org The need of the many outweighs the greed of the few. _______________________________________________ svn-src-all@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"