Module Name: src Committed By: skrll Date: Wed Oct 26 07:35:20 UTC 2022
Modified Files: src/sys/arch/mips/include: pmap.h src/sys/arch/mips/mips: pmap_machdep.c src/sys/arch/powerpc/booke: booke_pmap.c trap.c src/sys/uvm/pmap: pmap.c pmap.h pmap_segtab.c pmap_tlb.c pmap_tlb.h Log Message: MI PMAP hardware page table walker support. This is based on code given to me by Matt Thomas a long time ago with many updates and bugs fixes from me. To generate a diff of this commit: cvs rdiff -u -r1.76 -r1.77 src/sys/arch/mips/include/pmap.h cvs rdiff -u -r1.37 -r1.38 src/sys/arch/mips/mips/pmap_machdep.c cvs rdiff -u -r1.35 -r1.36 src/sys/arch/powerpc/booke/booke_pmap.c cvs rdiff -u -r1.38 -r1.39 src/sys/arch/powerpc/booke/trap.c cvs rdiff -u -r1.68 -r1.69 src/sys/uvm/pmap/pmap.c cvs rdiff -u -r1.21 -r1.22 src/sys/uvm/pmap/pmap.h cvs rdiff -u -r1.28 -r1.29 src/sys/uvm/pmap/pmap_segtab.c cvs rdiff -u -r1.53 -r1.54 src/sys/uvm/pmap/pmap_tlb.c cvs rdiff -u -r1.15 -r1.16 src/sys/uvm/pmap/pmap_tlb.h Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/arch/mips/include/pmap.h diff -u src/sys/arch/mips/include/pmap.h:1.76 src/sys/arch/mips/include/pmap.h:1.77 --- src/sys/arch/mips/include/pmap.h:1.76 Tue Jan 4 05:39:12 2022 +++ src/sys/arch/mips/include/pmap.h Wed Oct 26 07:35:19 2022 @@ -1,4 +1,4 @@ -/* $NetBSD: pmap.h,v 1.76 2022/01/04 05:39:12 skrll Exp $ */ +/* $NetBSD: pmap.h,v 1.77 2022/10/26 07:35:19 skrll Exp $ */ /* * Copyright (c) 1992, 1993 @@ -158,6 +158,28 @@ pmap_md_xtab_deactivate(struct pmap *pm) #endif /* __PMAP_PRIVATE */ +// these use register_t so we can pass XKPHYS addresses to them on N32 +bool pmap_md_direct_mapped_vaddr_p(register_t); +paddr_t pmap_md_direct_mapped_vaddr_to_paddr(register_t); +bool pmap_md_io_vaddr_p(vaddr_t); + +/* + * Alternate mapping hooks for pool pages. Avoids thrashing the TLB. + */ +vaddr_t pmap_md_map_poolpage(paddr_t, size_t); +paddr_t pmap_md_unmap_poolpage(vaddr_t, size_t); +struct vm_page *pmap_md_alloc_poolpage(int); + +/* + * Other hooks for the pool allocator. + */ +paddr_t pmap_md_pool_vtophys(vaddr_t); +vaddr_t pmap_md_pool_phystov(paddr_t); +#define POOL_VTOPHYS(va) pmap_md_pool_vtophys((vaddr_t)va) +#define POOL_PHYSTOV(pa) pmap_md_pool_phystov((paddr_t)pa) + +#define pmap_md_direct_map_paddr(pa) pmap_md_pool_phystov((paddr_t)pa) + struct tlbmask { vaddr_t tlb_hi; #ifdef __mips_o32 @@ -241,26 +263,6 @@ void pmap_prefer(vaddr_t, vaddr_t *, vsi #define PMAP_ENABLE_PMAP_KMPAGE /* enable the PMAP_KMPAGE flag */ -// these use register_t so we can pass XKPHYS addresses to them on N32 -bool pmap_md_direct_mapped_vaddr_p(register_t); -paddr_t pmap_md_direct_mapped_vaddr_to_paddr(register_t); -bool pmap_md_io_vaddr_p(vaddr_t); - -/* - * Alternate mapping hooks for pool pages. Avoids thrashing the TLB. - */ -vaddr_t pmap_md_map_poolpage(paddr_t, size_t); -paddr_t pmap_md_unmap_poolpage(vaddr_t, size_t); -struct vm_page *pmap_md_alloc_poolpage(int); - -/* - * Other hooks for the pool allocator. - */ -paddr_t pmap_md_pool_vtophys(vaddr_t); -vaddr_t pmap_md_pool_phystov(paddr_t); -#define POOL_VTOPHYS(va) pmap_md_pool_vtophys((vaddr_t)va) -#define POOL_PHYSTOV(pa) pmap_md_pool_phystov((paddr_t)pa) - #ifdef MIPS64_SB1 /* uncached accesses are bad; all accesses should be cached (and coherent) */ #undef PMAP_PAGEIDLEZERO Index: src/sys/arch/mips/mips/pmap_machdep.c diff -u src/sys/arch/mips/mips/pmap_machdep.c:1.37 src/sys/arch/mips/mips/pmap_machdep.c:1.38 --- src/sys/arch/mips/mips/pmap_machdep.c:1.37 Sun Sep 25 06:21:58 2022 +++ src/sys/arch/mips/mips/pmap_machdep.c Wed Oct 26 07:35:20 2022 @@ -1,4 +1,4 @@ -/* $NetBSD: pmap_machdep.c,v 1.37 2022/09/25 06:21:58 skrll Exp $ */ +/* $NetBSD: pmap_machdep.c,v 1.38 2022/10/26 07:35:20 skrll Exp $ */ /*- * Copyright (c) 1998, 2001 The NetBSD Foundation, Inc. @@ -67,7 +67,7 @@ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: pmap_machdep.c,v 1.37 2022/09/25 06:21:58 skrll Exp $"); +__KERNEL_RCSID(0, "$NetBSD: pmap_machdep.c,v 1.38 2022/10/26 07:35:20 skrll Exp $"); /* * Manages physical address maps. @@ -470,7 +470,7 @@ pmap_bootstrap(void) /* * Now set the page table pointer... */ - stb->seg_tab[j] = &sysmap[i]; + stb->seg_ppg[j] = (pmap_ptpage_t *)&sysmap[i]; #ifdef _LP64 /* * If we are at end of this XSEG, terminate the loop Index: src/sys/arch/powerpc/booke/booke_pmap.c diff -u src/sys/arch/powerpc/booke/booke_pmap.c:1.35 src/sys/arch/powerpc/booke/booke_pmap.c:1.36 --- src/sys/arch/powerpc/booke/booke_pmap.c:1.35 Sun Sep 25 06:21:58 2022 +++ src/sys/arch/powerpc/booke/booke_pmap.c Wed Oct 26 07:35:20 2022 @@ -1,4 +1,4 @@ -/* $NetBSD: booke_pmap.c,v 1.35 2022/09/25 06:21:58 skrll Exp $ */ +/* $NetBSD: booke_pmap.c,v 1.36 2022/10/26 07:35:20 skrll Exp $ */ /*- * Copyright (c) 2010, 2011 The NetBSD Foundation, Inc. * All rights reserved. @@ -37,7 +37,7 @@ #define __PMAP_PRIVATE #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: booke_pmap.c,v 1.35 2022/09/25 06:21:58 skrll Exp $"); +__KERNEL_RCSID(0, "$NetBSD: booke_pmap.c,v 1.36 2022/10/26 07:35:20 skrll Exp $"); #ifdef _KERNEL_OPT #include "opt_multiprocessor.h" @@ -100,7 +100,7 @@ pmap_md_page_syncicache(struct vm_page_m * the next time page is faulted, it will get icache * synched. But this is easier. :) */ - paddr_t pa = VM_PAGE_TO_PHYS(pg); + const paddr_t pa = VM_PAGE_TO_PHYS(pg); dcache_wb_page(pa); icache_inv_page(pa); } @@ -227,11 +227,12 @@ pmap_bootstrap(vaddr_t startkernel, vadd * an extra page for the segment table and allows the user/kernel * access to be common. */ - pt_entry_t **ptp = &stp->seg_tab[VM_MIN_KERNEL_ADDRESS >> SEGSHIFT]; - pt_entry_t *ptep = (void *)kv_segtabs; - memset(ptep, 0, NBPG * kv_nsegtabs); - for (size_t i = 0; i < kv_nsegtabs; i++, ptep += NPTEPG) { - *ptp++ = ptep; + + pmap_ptpage_t **ppg_p = &stp->seg_ppg[VM_MIN_KERNEL_ADDRESS >> SEGSHIFT]; + pmap_ptpage_t *ppg = (void *)kv_segtabs; + memset(ppg, 0, NBPG * kv_nsegtabs); + for (size_t i = 0; i < kv_nsegtabs; i++, ppg++) { + *ppg_p++ = ppg; } #if PMAP_MINIMALTLB @@ -246,10 +247,10 @@ pmap_bootstrap(vaddr_t startkernel, vadd endkernel += NBPG * dm_nsegtabs; ptp = stp->seg_tab; - ptep = (void *)dm_segtabs; - memset(ptep, 0, NBPG * dm_nsegtabs); - for (size_t i = 0; i < dm_nsegtabs; i++, ptp++, ptep += NPTEPG) { - *ptp = ptep; + ppg = (void *)dm_segtabs; + memset(ppg, 0, NBPG * dm_nsegtabs); + for (size_t i = 0; i < dm_nsegtabs; i++, ptp++, ppg ++) { + *ptp = ppg; } /* @@ -308,6 +309,7 @@ pmap_bootstrap(vaddr_t startkernel, vadd struct vm_page * pmap_md_alloc_poolpage(int flags) { + /* * Any managed page works for us. */ Index: src/sys/arch/powerpc/booke/trap.c diff -u src/sys/arch/powerpc/booke/trap.c:1.38 src/sys/arch/powerpc/booke/trap.c:1.39 --- src/sys/arch/powerpc/booke/trap.c:1.38 Sun Sep 25 06:21:58 2022 +++ src/sys/arch/powerpc/booke/trap.c Wed Oct 26 07:35:20 2022 @@ -1,4 +1,4 @@ -/* $NetBSD: trap.c,v 1.38 2022/09/25 06:21:58 skrll Exp $ */ +/* $NetBSD: trap.c,v 1.39 2022/10/26 07:35:20 skrll Exp $ */ /*- * Copyright (c) 2010, 2011 The NetBSD Foundation, Inc. * All rights reserved. @@ -35,7 +35,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(1, "$NetBSD: trap.c,v 1.38 2022/09/25 06:21:58 skrll Exp $"); +__KERNEL_RCSID(1, "$NetBSD: trap.c,v 1.39 2022/10/26 07:35:20 skrll Exp $"); #ifdef _KERNEL_OPT #include "opt_altivec.h" @@ -148,10 +148,13 @@ trap_pte_lookup(struct trapframe *tf, va pmap_segtab_t * const stb = stbs[(tf->tf_srr1 / psl_mask) & 1]; if (__predict_false(stb == NULL)) return NULL; - pt_entry_t * const ptep = stb->seg_tab[va >> SEGSHIFT]; - if (__predict_false(ptep == NULL)) + + pmap_ptpage_t * const ppg = stb->seg_ppg[va >> SEGSHIFT]; + if (__predict_false(ppg == NULL)) return NULL; - return ptep + ((va & SEGOFSET) >> PAGE_SHIFT); + const size_t pte_idx = (va >> PGSHIFT) & (NPTEPG - 1); + + return ppg->ppg_ptes + pte_idx; } static int Index: src/sys/uvm/pmap/pmap.c diff -u src/sys/uvm/pmap/pmap.c:1.68 src/sys/uvm/pmap/pmap.c:1.69 --- src/sys/uvm/pmap/pmap.c:1.68 Sun Oct 23 06:37:15 2022 +++ src/sys/uvm/pmap/pmap.c Wed Oct 26 07:35:20 2022 @@ -1,4 +1,4 @@ -/* $NetBSD: pmap.c,v 1.68 2022/10/23 06:37:15 skrll Exp $ */ +/* $NetBSD: pmap.c,v 1.69 2022/10/26 07:35:20 skrll Exp $ */ /*- * Copyright (c) 1998, 2001 The NetBSD Foundation, Inc. @@ -67,7 +67,7 @@ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.68 2022/10/23 06:37:15 skrll Exp $"); +__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.69 2022/10/26 07:35:20 skrll Exp $"); /* * Manages physical address maps. @@ -95,9 +95,11 @@ __KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.6 * and to when physical maps must be made correct. */ +#include "opt_ddb.h" #include "opt_modular.h" #include "opt_multiprocessor.h" #include "opt_sysv.h" +#include "opt_uvmhist.h" #define __PMAP_PRIVATE @@ -194,6 +196,18 @@ PMAP_COUNTER(page_protect, "page_protect #define PMAP_ASID_RESERVED 0 CTASSERT(PMAP_ASID_RESERVED == 0); +#ifdef PMAP_HWPAGEWALKER +#ifndef PMAP_PDETAB_ALIGN +#define PMAP_PDETAB_ALIGN /* nothing */ +#endif + +#ifdef _LP64 +pmap_pdetab_t pmap_kstart_pdetab PMAP_PDETAB_ALIGN; /* first mid-level pdetab for kernel */ +#endif +pmap_pdetab_t pmap_kern_pdetab PMAP_PDETAB_ALIGN; +#endif + +#if !defined(PMAP_HWPAGEWALKER) || !defined(PMAP_MAP_PDETABPAGE) #ifndef PMAP_SEGTAB_ALIGN #define PMAP_SEGTAB_ALIGN /* nothing */ #endif @@ -205,11 +219,17 @@ pmap_segtab_t pmap_kern_segtab PMAP_SEGT .seg_seg[(VM_MIN_KERNEL_ADDRESS >> XSEGSHIFT) & (NSEGPG - 1)] = &pmap_kstart_segtab, #endif }; +#endif struct pmap_kernel kernel_pmap_store = { .kernel_pmap = { .pm_count = 1, +#ifdef PMAP_HWPAGEWALKER + .pm_pdetab = PMAP_INVALID_PDETAB_ADDRESS, +#endif +#if !defined(PMAP_HWPAGEWALKER) || !defined(PMAP_MAP_PDETABPAGE) .pm_segtab = &pmap_kern_segtab, +#endif .pm_minaddr = VM_MIN_KERNEL_ADDRESS, .pm_maxaddr = VM_MAX_KERNEL_ADDRESS, }, @@ -228,10 +248,10 @@ struct pmap_limits pmap_limits = { /* VA #ifdef UVMHIST static struct kern_history_ent pmapexechistbuf[10000]; static struct kern_history_ent pmaphistbuf[10000]; -static struct kern_history_ent pmapsegtabhistbuf[1000]; +static struct kern_history_ent pmapxtabhistbuf[5000]; UVMHIST_DEFINE(pmapexechist) = UVMHIST_INITIALIZER(pmapexechist, pmapexechistbuf); UVMHIST_DEFINE(pmaphist) = UVMHIST_INITIALIZER(pmaphist, pmaphistbuf); -UVMHIST_DEFINE(pmapsegtabhist) = UVMHIST_INITIALIZER(pmapsegtabhist, pmapsegtabhistbuf); +UVMHIST_DEFINE(pmapxtabhist) = UVMHIST_INITIALIZER(pmapxtabhist, pmapxtabhistbuf); #endif /* @@ -370,6 +390,7 @@ bool pmap_page_clear_attributes(struct vm_page_md *mdpg, u_int clear_attributes) { volatile unsigned long * const attrp = &mdpg->mdpg_attrs; + #ifdef MULTIPROCESSOR for (;;) { u_int old_attr = *attrp; @@ -454,7 +475,6 @@ pmap_page_syncicache(struct vm_page *pg) void pmap_virtual_space(vaddr_t *vstartp, vaddr_t *vendp) { - *vstartp = pmap_limits.virtual_start; *vendp = pmap_limits.virtual_end; } @@ -597,6 +617,29 @@ pmap_steal_memory(vsize_t size, vaddr_t void pmap_bootstrap_common(void) { + UVMHIST_LINK_STATIC(pmapexechist); + UVMHIST_LINK_STATIC(pmaphist); + UVMHIST_LINK_STATIC(pmapxtabhist); + + static const struct uvm_pagerops pmap_pager = { + /* nothing */ + }; + + pmap_t pm = pmap_kernel(); + + rw_init(&pm->pm_obj_lock); + uvm_obj_init(&pm->pm_uobject, &pmap_pager, false, 1); + uvm_obj_setlock(&pm->pm_uobject, &pm->pm_obj_lock); + + TAILQ_INIT(&pm->pm_ppg_list); + +#if defined(PMAP_HWPAGEWALKER) + TAILQ_INIT(&pm->pm_pdetab_list); +#endif +#if !defined(PMAP_HWPAGEWALKER) || !defined(PMAP_MAP_PDETABPAGE) + TAILQ_INIT(&pm->pm_segtab_list); +#endif + pmap_tlb_miss_lock_init(); } @@ -608,10 +651,6 @@ pmap_bootstrap_common(void) void pmap_init(void) { - UVMHIST_LINK_STATIC(pmapexechist); - UVMHIST_LINK_STATIC(pmaphist); - UVMHIST_LINK_STATIC(pmapsegtabhist); - UVMHIST_FUNC(__func__); UVMHIST_CALLED(pmaphist); @@ -659,6 +698,10 @@ pmap_create(void) UVMHIST_CALLED(pmaphist); PMAP_COUNT(create); + static const struct uvm_pagerops pmap_pager = { + /* nothing */ + }; + pmap_t pmap = pool_get(&pmap_pmap_pool, PR_WAITOK); memset(pmap, 0, PMAP_SIZE); @@ -668,6 +711,18 @@ pmap_create(void) pmap->pm_minaddr = VM_MIN_ADDRESS; pmap->pm_maxaddr = VM_MAXUSER_ADDRESS; + rw_init(&pmap->pm_obj_lock); + uvm_obj_init(&pmap->pm_uobject, &pmap_pager, false, 1); + uvm_obj_setlock(&pmap->pm_uobject, &pmap->pm_obj_lock); + + TAILQ_INIT(&pmap->pm_ppg_list); +#if defined(PMAP_HWPAGEWALKER) + TAILQ_INIT(&pmap->pm_pdetab_list); +#endif +#if !defined(PMAP_HWPAGEWALKER) || !defined(PMAP_MAP_PDETABPAGE) + TAILQ_INIT(&pmap->pm_segtab_list); +#endif + pmap_segtab_init(pmap); #ifdef MULTIPROCESSOR @@ -693,11 +748,13 @@ pmap_destroy(pmap_t pmap) { UVMHIST_FUNC(__func__); UVMHIST_CALLARGS(pmaphist, "(pmap=%#jx)", (uintptr_t)pmap, 0, 0, 0); + UVMHIST_CALLARGS(pmapxtabhist, "(pmap=%#jx)", (uintptr_t)pmap, 0, 0, 0); membar_release(); if (atomic_dec_uint_nv(&pmap->pm_count) > 0) { PMAP_COUNT(dereference); UVMHIST_LOG(pmaphist, " <-- done (deref)", 0, 0, 0, 0); + UVMHIST_LOG(pmapxtabhist, " <-- done (deref)", 0, 0, 0, 0); return; } membar_acquire(); @@ -710,6 +767,21 @@ pmap_destroy(pmap_t pmap) pmap_segtab_destroy(pmap, NULL, 0); pmap_tlb_miss_lock_exit(); + KASSERT(TAILQ_EMPTY(&pmap->pm_ppg_list)); + +#ifdef _LP64 +#if defined(PMAP_HWPAGEWALKER) + KASSERT(TAILQ_EMPTY(&pmap->pm_pdetab_list)); +#endif +#if !defined(PMAP_HWPAGEWALKER) || !defined(PMAP_MAP_PDETABPAGE) + KASSERT(TAILQ_EMPTY(&pmap->pm_segtab_list)); +#endif +#endif + KASSERT(pmap->pm_uobject.uo_npages == 0); + + uvm_obj_destroy(&pmap->pm_uobject, false); + rw_destroy(&pmap->pm_obj_lock); + #ifdef MULTIPROCESSOR kcpuset_destroy(pmap->pm_active); kcpuset_destroy(pmap->pm_onproc); @@ -721,6 +793,7 @@ pmap_destroy(pmap_t pmap) kpreempt_enable(); UVMHIST_LOG(pmaphist, " <-- done (freed)", 0, 0, 0, 0); + UVMHIST_LOG(pmapxtabhist, " <-- done (freed)", 0, 0, 0, 0); } /* @@ -1016,7 +1089,6 @@ pmap_pte_remove(pmap_t pmap, vaddr_t sva pmap_tlb_miss_lock_enter(); pte_set(ptep, npte); if (__predict_true(!(pmap->pm_flags & PMAP_DEFERRED_ACTIVATE))) { - /* * Flush the TLB for the given address. */ @@ -1467,7 +1539,8 @@ pmap_kenter_pa(vaddr_t va, paddr_t pa, v pt_entry_t npte = pte_make_kenter_pa(pa, mdpg, prot, flags); kpreempt_disable(); - pt_entry_t * const ptep = pmap_pte_lookup(pmap, va); + pt_entry_t * const ptep = pmap_pte_reserve(pmap, va, 0); + KASSERTMSG(ptep != NULL, "%#"PRIxVADDR " %#"PRIxVADDR, va, pmap_limits.virtual_end); KASSERT(!pte_valid_p(*ptep)); @@ -2206,11 +2279,11 @@ pmap_pvlist_lock_addr(struct vm_page_md void * pmap_pv_page_alloc(struct pool *pp, int flags) { - struct vm_page * const pg = PMAP_ALLOC_POOLPAGE(UVM_PGA_USERESERVE); + struct vm_page * const pg = pmap_md_alloc_poolpage(UVM_PGA_USERESERVE); if (pg == NULL) return NULL; - return (void *)pmap_map_poolpage(VM_PAGE_TO_PHYS(pg)); + return (void *)pmap_md_map_poolpage(VM_PAGE_TO_PHYS(pg), PAGE_SIZE); } /* @@ -2296,3 +2369,78 @@ pmap_unmap_poolpage(vaddr_t va) return pa; } #endif /* PMAP_MAP_POOLPAGE */ + +#ifdef DDB +void +pmap_db_mdpg_print(struct vm_page *pg, void (*pr)(const char *, ...) __printflike(1, 2)) +{ + struct vm_page_md * const mdpg = VM_PAGE_TO_MD(pg); + pv_entry_t pv = &mdpg->mdpg_first; + + if (pv->pv_pmap == NULL) { + pr(" no mappings\n"); + return; + } + + int lcount = 0; + if (VM_PAGEMD_VMPAGE_P(mdpg)) { + pr(" vmpage"); + lcount++; + } + if (VM_PAGEMD_POOLPAGE_P(mdpg)) { + if (lcount != 0) + pr(","); + pr(" pool"); + lcount++; + } +#ifdef PMAP_VIRTUAL_CACHE_ALIASES + if (VM_PAGEMD_UNCACHED_P(mdpg)) { + if (lcount != 0) + pr(","); + pr(" uncached\n"); + } +#endif + pr("\n"); + + lcount = 0; + if (VM_PAGEMD_REFERENCED_P(mdpg)) { + pr(" referened"); + lcount++; + } + if (VM_PAGEMD_MODIFIED_P(mdpg)) { + if (lcount != 0) + pr(","); + pr(" modified"); + lcount++; + } + if (VM_PAGEMD_EXECPAGE_P(mdpg)) { + if (lcount != 0) + pr(","); + pr(" exec"); + lcount++; + } + pr("\n"); + + for (size_t i = 0; pv != NULL; pv = pv->pv_next) { + pr(" pv[%zu] pv=%p\n", i, pv); + pr(" pv[%zu].pv_pmap = %p", i, pv->pv_pmap); + pr(" pv[%zu].pv_va = %" PRIxVADDR " (kenter=%s)\n", + i, trunc_page(pv->pv_va), PV_ISKENTER_P(pv) ? "true" : "false"); + i++; + } +} + +void +pmap_db_pmap_print(struct pmap *pm, + void (*pr)(const char *, ...) __printflike(1, 2)) +{ +#if defined(PMAP_HWPAGEWALKER) + pr(" pm_pdetab = %p\n", pm->pm_pdetab); +#endif +#if !defined(PMAP_HWPAGEWALKER) || !defined(PMAP_MAP_PDETABPAGE) + pr(" pm_segtab = %p\n", pm->pm_segtab); +#endif + + pmap_db_tlb_print(pm, pr); +} +#endif /* DDB */ Index: src/sys/uvm/pmap/pmap.h diff -u src/sys/uvm/pmap/pmap.h:1.21 src/sys/uvm/pmap/pmap.h:1.22 --- src/sys/uvm/pmap/pmap.h:1.21 Sat May 7 06:53:16 2022 +++ src/sys/uvm/pmap/pmap.h Wed Oct 26 07:35:20 2022 @@ -1,4 +1,4 @@ -/* $NetBSD: pmap.h,v 1.21 2022/05/07 06:53:16 rin Exp $ */ +/* $NetBSD: pmap.h,v 1.22 2022/10/26 07:35:20 skrll Exp $ */ /* * Copyright (c) 1992, 1993 @@ -74,12 +74,44 @@ #ifndef _UVM_PMAP_PMAP_H_ #define _UVM_PMAP_PMAP_H_ +#include <sys/rwlock.h> +#include <uvm/uvm_object.h> #include <uvm/uvm_stat.h> + #ifdef UVMHIST UVMHIST_DECL(pmapexechist); UVMHIST_DECL(pmaphist); -UVMHIST_DECL(pmapsegtabhist); +UVMHIST_DECL(pmapxtabhist); +#endif + +/* + * Alternate mapping hooks for pool pages. Avoids thrashing the TLB. + */ +struct vm_page *pmap_md_alloc_poolpage(int); + +#if !defined(KASAN) +vaddr_t pmap_map_poolpage(paddr_t); +paddr_t pmap_unmap_poolpage(vaddr_t); +#define PMAP_ALLOC_POOLPAGE(flags) pmap_md_alloc_poolpage(flags) +#define PMAP_MAP_POOLPAGE(pa) pmap_map_poolpage(pa) +#define PMAP_UNMAP_POOLPAGE(va) pmap_unmap_poolpage(va) + +#if defined(_LP64) +#define PMAP_DIRECT +static __inline int +pmap_direct_process(paddr_t pa, voff_t pgoff, size_t len, + int (*process)(void *, size_t, void *), void *arg) +{ + vaddr_t va = pmap_md_direct_map_paddr(pa); + + return process((void *)(va + pgoff), len, arg); +} #endif +#endif + +#define PMAP_MAP_PDETABPAGE(pa) pmap_md_map_poolpage(pa, PAGE_SIZE) +#define PMAP_MAP_SEGTABPAGE(pa) pmap_md_map_poolpage(pa, PAGE_SIZE) +#define PMAP_MAP_PTEPAGE(pa) pmap_md_map_poolpage(pa, PAGE_SIZE) /* * The user address space is mapped using a two level structure where @@ -93,12 +125,33 @@ UVMHIST_DECL(pmapsegtabhist); #define pmap_round_seg(x) (((vaddr_t)(x) + SEGOFSET) & ~SEGOFSET) /* - * Each seg_tab point an array of pt_entry [NPTEPG] + * Each ptpage maps a "segment" worth of address space. That is + * NPTEPG * PAGE_SIZE. */ + +typedef struct { + pt_entry_t ppg_ptes[NPTEPG]; +} pmap_ptpage_t; + +#if defined(PMAP_HWPAGEWALKER) +typedef union pmap_pdetab { + pd_entry_t pde_pde[PMAP_PDETABSIZE]; + union pmap_pdetab * pde_next; +} pmap_pdetab_t; +#endif +#if !defined(PMAP_HWPAGEWALKER) || !defined(PMAP_MAP_PDETABPAGE) typedef union pmap_segtab { +#ifdef _LP64 union pmap_segtab * seg_seg[PMAP_SEGTABSIZE]; - pt_entry_t * seg_tab[PMAP_SEGTABSIZE]; +#endif + pmap_ptpage_t * seg_ppg[PMAP_SEGTABSIZE]; +#ifdef PMAP_HWPAGEWALKER + pd_entry_t seg_pde[PMAP_PDETABSIZE]; +#endif + union pmap_segtab * seg_next; } pmap_segtab_t; +#endif + #ifdef _KERNEL struct pmap; @@ -110,6 +163,7 @@ typedef bool (*pte_callback_t)(struct pm * virtual memory. */ void pmap_bootstrap_common(void); + pt_entry_t *pmap_pte_lookup(struct pmap *, vaddr_t); pt_entry_t *pmap_pte_reserve(struct pmap *, vaddr_t, int); void pmap_pte_process(struct pmap *, vaddr_t, vaddr_t, pte_callback_t, @@ -118,6 +172,10 @@ void pmap_segtab_activate(struct pmap *, void pmap_segtab_deactivate(struct pmap *); void pmap_segtab_init(struct pmap *); void pmap_segtab_destroy(struct pmap *, pte_callback_t, uintptr_t); +#ifdef PMAP_HWPAGEWALKER +pd_entry_t *pmap_pde_lookup(struct pmap *, vaddr_t, paddr_t *); +bool pmap_pdetab_fixup(struct pmap *, vaddr_t); +#endif extern kmutex_t pmap_segtab_lock; #endif /* _KERNEL */ @@ -130,13 +188,32 @@ extern kmutex_t pmap_segtab_lock; * Machine dependent pmap structure. */ struct pmap { + struct uvm_object pm_uobject; +#define pm_count pm_uobject.uo_refs /* pmap reference count */ +#define pm_pvp_list pm_uobject.memq + + krwlock_t pm_obj_lock; /* lock for pm_uobject */ +#define pm_lock pm_uobject.vmobjlock + + struct pglist pm_ppg_list; +#if defined(PMAP_HWPAGEWALKER) + struct pglist pm_pdetab_list; +#endif +#if !defined(PMAP_HWPAGEWALKER) || !defined(PMAP_MAP_PDETABPAGE) + struct pglist pm_segtab_list; +#endif #ifdef MULTIPROCESSOR kcpuset_t *pm_active; /* pmap was active on ... */ kcpuset_t *pm_onproc; /* pmap is active on ... */ volatile u_int pm_shootdown_pending; #endif - pmap_segtab_t * pm_segtab; /* pointers to pages of PTEs */ - u_int pm_count; /* pmap reference count */ +#if defined(PMAP_HWPAGEWALKER) + pmap_pdetab_t * pm_pdetab; /* pointer to HW PDEs */ +#endif +#if !defined(PMAP_HWPAGEWALKER) || !defined(PMAP_MAP_PDETABPAGE) + pmap_segtab_t * pm_segtab; /* pointers to pages of PTEs; or */ + /* virtual shadow of HW PDEs */ +#endif u_int pm_flags; #define PMAP_DEFERRED_ACTIVATE __BIT(0) struct pmap_statistics pm_stats; /* pmap statistics */ @@ -148,6 +225,20 @@ struct pmap { struct pmap_asid_info pm_pai[1]; }; +static inline void +pmap_lock(struct pmap *pm) +{ + + rw_enter(pm->pm_lock, RW_WRITER); +} + +static inline void +pmap_unlock(struct pmap *pm) +{ + + rw_exit(pm->pm_lock); +} + #ifdef _KERNEL struct pmap_kernel { struct pmap kernel_pmap; @@ -184,13 +275,17 @@ extern struct pmap_limits pmap_limits; extern u_int pmap_page_colormask; -extern pmap_segtab_t pmap_kern_segtab; - /* * The current top of kernel VM */ extern vaddr_t pmap_curmaxkvaddr; +#if defined(PMAP_HWPAGEWALKER) +extern pmap_pdetab_t pmap_kern_pdetab; +#else +extern pmap_segtab_t pmap_kern_segtab; +#endif + #define pmap_wired_count(pmap) ((pmap)->pm_stats.wired_count) #define pmap_resident_count(pmap) ((pmap)->pm_stats.resident_count) @@ -211,27 +306,37 @@ void pmap_pv_protect(paddr_t, vm_prot_t) #define PMAP_WBINV 1 #define PMAP_INV 2 -//uint16_t pmap_pvlist_lock(struct vm_page_md *, bool); kmutex_t *pmap_pvlist_lock_addr(struct vm_page_md *); #define PMAP_STEAL_MEMORY /* enable pmap_steal_memory() */ #define PMAP_GROWKERNEL /* enable pmap_growkernel() */ -/* - * Alternate mapping hooks for pool pages. Avoids thrashing the TLB. - */ -vaddr_t pmap_map_poolpage(paddr_t); -paddr_t pmap_unmap_poolpage(vaddr_t); -struct vm_page *pmap_md_alloc_poolpage(int); -#define PMAP_ALLOC_POOLPAGE(flags) pmap_md_alloc_poolpage(flags) -#define PMAP_MAP_POOLPAGE(pa) pmap_map_poolpage(pa) -#define PMAP_UNMAP_POOLPAGE(va) pmap_unmap_poolpage(va) - #define PMAP_COUNT(name) (pmap_evcnt_##name.ev_count++ + 0) #define PMAP_COUNTER(name, desc) \ struct evcnt pmap_evcnt_##name = \ EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "pmap", desc); \ EVCNT_ATTACH_STATIC(pmap_evcnt_##name) + +static inline pt_entry_t * +kvtopte(vaddr_t va) +{ + + return pmap_pte_lookup(pmap_kernel(), va); +} + +/* for ddb */ +void pmap_db_pmap_print(struct pmap *, void (*)(const char *, ...) __printflike(1, 2)); +void pmap_db_mdpg_print(struct vm_page *, void (*)(const char *, ...) __printflike(1, 2)); + +#if defined(EFI_RUNTIME) +struct pmap * + pmap_efirt(void); + +#define pmap_activate_efirt() pmap_md_activate_efirt() +#define pmap_deactivate_efirt() pmap_md_deactivate_efirt() + +#endif + #endif /* _KERNEL */ #endif /* _UVM_PMAP_PMAP_H_ */ Index: src/sys/uvm/pmap/pmap_segtab.c diff -u src/sys/uvm/pmap/pmap_segtab.c:1.28 src/sys/uvm/pmap/pmap_segtab.c:1.29 --- src/sys/uvm/pmap/pmap_segtab.c:1.28 Sun Sep 25 06:21:58 2022 +++ src/sys/uvm/pmap/pmap_segtab.c Wed Oct 26 07:35:20 2022 @@ -1,4 +1,4 @@ -/* $NetBSD: pmap_segtab.c,v 1.28 2022/09/25 06:21:58 skrll Exp $ */ +/* $NetBSD: pmap_segtab.c,v 1.29 2022/10/26 07:35:20 skrll Exp $ */ /*- * Copyright (c) 1998, 2001 The NetBSD Foundation, Inc. @@ -67,7 +67,7 @@ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: pmap_segtab.c,v 1.28 2022/09/25 06:21:58 skrll Exp $"); +__KERNEL_RCSID(0, "$NetBSD: pmap_segtab.c,v 1.29 2022/10/26 07:35:20 skrll Exp $"); /* * Manages physical address maps. @@ -107,35 +107,94 @@ __KERNEL_RCSID(0, "$NetBSD: pmap_segtab. #include <sys/systm.h> #include <uvm/uvm.h> +#include <uvm/pmap/pmap.h> -CTASSERT(NBPG >= sizeof(pmap_segtab_t)); +#if defined(XSEGSHIFT) && XSEGSHIFT == SEGSHIFT +#undef XSEGSHIFT +#undef XSEGLENGTH +#undef NBXSEG +#undef NXSEGPG +#endif -struct pmap_segtab_info { +#define MULT_CTASSERT(a,b) __CTASSERT((a) < (b) || ((a) % (b) == 0)) + +__CTASSERT(sizeof(pmap_ptpage_t) == NBPG); + +#if defined(PMAP_HWPAGEWALKER) +#ifdef _LP64 +MULT_CTASSERT(PMAP_PDETABSIZE, NPDEPG); +MULT_CTASSERT(NPDEPG, PMAP_PDETABSIZE); +#endif /* _LP64 */ +MULT_CTASSERT(sizeof(pmap_pdetab_t *), sizeof(pd_entry_t)); +MULT_CTASSERT(sizeof(pd_entry_t), sizeof(pmap_pdetab_t)); + +#if 0 +#ifdef _LP64 +static const bool separate_pdetab_root_p = NPDEPG != PMAP_PDETABSIZE; +#else +static const bool separate_pdetab_root_p = true; +#endif /* _LP64 */ +#endif + +typedef struct { + pmap_pdetab_t *free_pdetab0; /* free list kept locally */ + pmap_pdetab_t *free_pdetab; /* free list kept locally */ +#ifdef DEBUG + uint32_t nget; + uint32_t nput; + uint32_t npage; +#define PDETAB_ADD(n, v) (pmap_segtab_info.pdealloc.n += (v)) +#else +#define PDETAB_ADD(n, v) ((void) 0) +#endif /* DEBUG */ +} pmap_pdetab_alloc_t; +#endif /* PMAP_HWPAGEWALKER */ + +#if !defined(PMAP_HWPAGEWALKER) || !defined(PMAP_MAP_PDETABPAGE) +#ifdef _LP64 +__CTASSERT(NSEGPG >= PMAP_SEGTABSIZE); +__CTASSERT(NSEGPG % PMAP_SEGTABSIZE == 0); +#endif +__CTASSERT(NBPG >= sizeof(pmap_segtab_t)); + +typedef struct { + pmap_segtab_t *free_segtab0; /* free list kept locally */ pmap_segtab_t *free_segtab; /* free list kept locally */ #ifdef DEBUG - uint32_t nget_segtab; - uint32_t nput_segtab; - uint32_t npage_segtab; -#define SEGTAB_ADD(n, v) (pmap_segtab_info.n ## _segtab += (v)) + uint32_t nget; + uint32_t nput; + uint32_t npage; +#define SEGTAB_ADD(n, v) (pmap_segtab_info.segalloc.n += (v)) #else #define SEGTAB_ADD(n, v) ((void) 0) #endif -#ifdef PMAP_PTP_CACHE +} pmap_segtab_alloc_t; +#endif /* !PMAP_HWPAGEWALKER || !PMAP_MAP_PDETABPAGE */ + +struct pmap_segtab_info { +#if defined(PMAP_HWPAGEWALKER) + pmap_pdetab_alloc_t pdealloc; +#endif +#if !defined(PMAP_HWPAGEWALKER) || !defined(PMAP_MAP_PDETABPAGE) + pmap_segtab_alloc_t segalloc; +#endif +#ifdef PMAP_PPG_CACHE struct pgflist ptp_pgflist; /* Keep a list of idle page tables. */ #endif } pmap_segtab_info = { -#ifdef PMAP_PTP_CACHE +#ifdef PMAP_PPG_CACHE .ptp_pgflist = LIST_HEAD_INITIALIZER(pmap_segtab_info.ptp_pgflist), #endif }; kmutex_t pmap_segtab_lock __cacheline_aligned; +#ifndef PMAP_HWPAGEWALKER /* - * Check that a seg_tab[] array is empty. + * Check that a seg_ppg[] array is empty. * * This is used when allocating or freeing a pmap_segtab_t. The stb - * should be unused -- meaning, none of the seg_tab[] pointers are + * should be unused -- meaning, none of the seg_ppg[] pointers are * not NULL, as it transitions from either freshly allocated segtab from * pmap pool, an unused allocated page segtab alloc from the SMP case, * where two CPUs attempt to allocate the same underlying segtab, the @@ -147,62 +206,31 @@ pmap_check_stb(pmap_segtab_t *stb, const { #ifdef DEBUG for (size_t i = 0; i < PMAP_SEGTABSIZE; i++) { - if (stb->seg_tab[i] != NULL) { + if (stb->seg_ppg[i] != NULL) { #define DEBUG_NOISY #ifdef DEBUG_NOISY UVMHIST_FUNC(__func__); - UVMHIST_CALLARGS(pmapsegtabhist, "stb=%#jx", + UVMHIST_CALLARGS(pmapxtabhist, "stb=%#jx", (uintptr_t)stb, 0, 0, 0); for (size_t j = i; j < PMAP_SEGTABSIZE; j++) - if (stb->seg_tab[j] != NULL) - printf("%s: stb->seg_tab[%zu] = %p\n", - caller, j, stb->seg_tab[j]); + if (stb->seg_ppg[j] != NULL) + printf("%s: stb->seg_ppg[%zu] = %p\n", + caller, j, stb->seg_ppg[j]); #endif - panic("%s: pm_segtab.seg_tab[%zu] != 0 (%p): %s", - caller, i, stb->seg_tab[i], why); + panic("%s: pm_segtab.seg_ppg[%zu] != 0 (%p): %s", + caller, i, stb->seg_ppg[i], why); } } #endif } - -/* - * Check that an array of ptes is actually zero. - */ -static void -pmap_check_ptes(pt_entry_t *pte, const char *caller) -{ - /* - * All pte arrays should be page aligned. - */ - if (((uintptr_t)pte & PAGE_MASK) != 0) { - panic("%s: pte entry at %p not page aligned", caller, pte); - } - -#ifdef DEBUG - for (size_t i = 0; i < NPTEPG; i++) - if (pte[i] != 0) { -#ifdef DEBUG_NOISY - UVMHIST_FUNC(__func__); - UVMHIST_CALLARGS(pmapsegtabhist, "pte=%#jx", - (uintptr_t)pte, 0, 0, 0); - for (size_t j = i + 1; j < NPTEPG; j++) - if (pte[j] != 0) - UVMHIST_LOG(pmapsegtabhist, - "pte[%zu] = %#"PRIxPTE, - j, pte_value(pte[j]), 0, 0); -#endif - panic("%s: pte[%zu] entry at %p not 0 (%#"PRIxPTE")", - caller, i, &pte[i], pte_value(pte[i])); - } -#endif -} +#endif /* PMAP_HWPAGEWALKER */ static inline struct vm_page * pmap_pte_pagealloc(void) { struct vm_page *pg; - pg = PMAP_ALLOC_POOLPAGE(UVM_PGA_ZERO|UVM_PGA_USERESERVE); + pg = pmap_md_alloc_poolpage(UVM_PGA_ZERO | UVM_PGA_USERESERVE); if (pg) { #ifdef UVM_PAGE_TRKOWN pg->owner_tag = NULL; @@ -213,113 +241,385 @@ pmap_pte_pagealloc(void) return pg; } -static inline pt_entry_t * -pmap_segmap(struct pmap *pmap, vaddr_t va) +#if defined(PMAP_HWPAGEWALKER) && defined(PMAP_MAP_PDETABPAGE) +static vaddr_t +pmap_pde_to_va(pd_entry_t pde) +{ + if (!pte_pde_valid_p(pde)) + return 0; + + paddr_t pa = pte_pde_to_paddr(pde); + return pmap_md_direct_map_paddr(pa); +} + +#ifdef _LP64 +static pmap_pdetab_t * +pmap_pde_to_pdetab(pd_entry_t pde) { + + return (pmap_pdetab_t *)pmap_pde_to_va(pde); +} +#endif + +static pmap_ptpage_t * +pmap_pde_to_ptpage(pd_entry_t pde) +{ + + return (pmap_ptpage_t *)pmap_pde_to_va(pde); +} +#endif + +#ifdef _LP64 +__CTASSERT((XSEGSHIFT - SEGSHIFT) % (PGSHIFT-3) == 0); +#endif + +static inline pmap_ptpage_t * +pmap_ptpage(struct pmap *pmap, vaddr_t va) +{ +#if defined(PMAP_HWPAGEWALKER) && defined(PMAP_MAP_PDETABPAGE) + vaddr_t pdetab_mask = PMAP_PDETABSIZE - 1; + pmap_pdetab_t *ptb = pmap->pm_pdetab; + +// UVMHIST_LOG(pmaphist, "pm_pdetab %#jx", ptb, 0, 0, 0); + + KASSERT(pmap != pmap_kernel() || !pmap_md_direct_mapped_vaddr_p(va)); + +#ifdef _LP64 + for (size_t segshift = XSEGSHIFT; + segshift > SEGSHIFT; + segshift -= PGSHIFT - 3, pdetab_mask = NSEGPG - 1) { + ptb = pmap_pde_to_pdetab(ptb->pde_pde[(va >> segshift) & pdetab_mask]); + if (ptb == NULL) + return NULL; + } +#endif + return pmap_pde_to_ptpage(ptb->pde_pde[(va >> SEGSHIFT) & pdetab_mask]); +#else + vaddr_t segtab_mask = PMAP_SEGTABSIZE - 1; pmap_segtab_t *stb = pmap->pm_segtab; + KASSERTMSG(pmap != pmap_kernel() || !pmap_md_direct_mapped_vaddr_p(va), "pmap %p va %#" PRIxVADDR, pmap, va); #ifdef _LP64 - stb = stb->seg_seg[(va >> XSEGSHIFT) & (NSEGPG - 1)]; - if (stb == NULL) - return NULL; + for (size_t segshift = XSEGSHIFT; + segshift > SEGSHIFT; + segshift -= PGSHIFT - 3, segtab_mask = NSEGPG - 1) { + stb = stb->seg_seg[(va >> segshift) & segtab_mask]; + if (stb == NULL) + return NULL; + } +#endif + return stb->seg_ppg[(va >> SEGSHIFT) & segtab_mask]; +#endif +} + +#if defined(PMAP_HWPAGEWALKER) +bool +pmap_pdetab_fixup(struct pmap *pmap, vaddr_t va) +{ + struct pmap * const kpm = pmap_kernel(); + pmap_pdetab_t * const kptb = kpm->pm_pdetab; + pmap_pdetab_t * const uptb = pmap->pm_pdetab; + size_t idx = PMAP_PDETABSIZE - 1; +#if !defined(PMAP_MAP_PDETABPAGE) + __CTASSERT(PMAP_PDETABSIZE == PMAP_SEGTABSIZE); + pmap_segtab_t * const kstb = &pmap_kern_segtab; + pmap_segtab_t * const ustb = pmap->pm_segtab; +#endif + + // Regardless of how many levels deep this page table is, we only + // need to verify the first level PDEs match up. +#ifdef XSEGSHIFT + idx &= va >> XSEGSHIFT; +#else + idx &= va >> SEGSHIFT; +#endif + if (uptb->pde_pde[idx] != kptb->pde_pde[idx]) { + pte_pde_set(&uptb->pde_pde[idx], kptb->pde_pde[idx]); +#if !defined(PMAP_MAP_PDETABPAGE) + ustb->seg_seg[idx] = kstb->seg_seg[idx]; // copy KVA of PTP +#endif + return true; + } + return false; +} +#endif /* PMAP_HWPAGEWALKER */ + + +static void +pmap_page_attach(pmap_t pmap, vaddr_t kva, struct vm_page *pg, + struct pglist *pglist, voff_t off) +{ + UVMHIST_FUNC(__func__); + UVMHIST_CALLARGS(pmapxtabhist, "pm %#jx kva %#jx pg %#jx list %#jx", + (uintptr_t)pmap, (uintptr_t)kva, (uintptr_t)pg, (uintptr_t)pglist); + + struct uvm_object * const uobj = &pmap->pm_uobject; + if (pg == NULL) { + paddr_t pa; + + bool ok __diagused = pmap_extract(pmap_kernel(), kva, &pa); + KASSERT(ok); + + pg = PHYS_TO_VM_PAGE(pa); + KASSERT(pg != NULL); + } + + UVMHIST_LOG(pmapxtabhist, "kva %#jx uobj %#jx pg %#jx list %#jx", + (uintptr_t)kva, (uintptr_t)uobj, (uintptr_t)pg, (uintptr_t)pglist); + + pmap_lock(pmap); + TAILQ_INSERT_TAIL(pglist, pg, pageq.queue); + uobj->uo_npages++; + pmap_unlock(pmap); + + /* + * Now set each vm_page that maps this page to point to the + * pmap and set the offset to what we want. + */ + KASSERTMSG(pg->uobject == NULL, "pg %p pg->uobject %p", pg, pg->uobject); + pg->uobject = uobj; + pg->offset = off; +} + +static struct vm_page * +pmap_page_detach(pmap_t pmap, struct pglist *list, vaddr_t va) +{ + UVMHIST_FUNC(__func__); + UVMHIST_CALLARGS(pmapxtabhist, "pm %#jx kva %#jx list %#jx", + (uintptr_t)pmap, (uintptr_t)va, (uintptr_t)list, 0); + + paddr_t pa; + bool ok __diagused = pmap_extract(pmap_kernel(), va, &pa); + KASSERT(ok); + + struct vm_page * const pg = PHYS_TO_VM_PAGE(pa); + struct uvm_object * const uobj = &pmap->pm_uobject; + + UVMHIST_LOG(pmapxtabhist, "kva %#jx uobj %#jx pg %#jx list %#jx", + (uintptr_t)va, (uintptr_t)uobj, (uintptr_t)pg, (uintptr_t)list); + + KASSERTMSG(pg->uobject == uobj, "pg->uobject %p vs uobj %p", + pg->uobject, uobj); + + pmap_lock(pmap); + TAILQ_REMOVE(list, pg, pageq.queue); + uobj->uo_npages--; + pmap_unlock(pmap); + + pg->uobject = NULL; + pg->offset = 0; + + return pg; +} + +#ifndef PMAP_PPG_CACHE +static void +pmap_segtab_pagefree(pmap_t pmap, struct pglist *list, vaddr_t kva, size_t size) +{ +#ifdef PMAP_MAP_PTEPAGE + UVMHIST_FUNC(__func__); + UVMHIST_CALLARGS(pmapxtabhist, "pm %#jx list %#jx kva %#jx size %#jx", + (uintptr_t)pmap, (uintptr_t)list, kva, size); + KASSERT(size == PAGE_SIZE); + if (size == PAGE_SIZE) { + UVMHIST_LOG(pmapxtabhist, "about to detach (kva %#jx)", + kva, 0, 0, 0); + uvm_pagefree(pmap_page_detach(pmap, list, kva)); + return; + } #endif + for (size_t i = 0; i < size; i += PAGE_SIZE) { + (void)pmap_page_detach(pmap, list, kva + i); + } - return stb->seg_tab[(va >> SEGSHIFT) & (PMAP_SEGTABSIZE - 1)]; + uvm_km_free(kernel_map, kva, size, UVM_KMF_WIRED); } +#endif pt_entry_t * pmap_pte_lookup(pmap_t pmap, vaddr_t va) { - pt_entry_t *pte = pmap_segmap(pmap, va); - if (pte == NULL) + pmap_ptpage_t * const ppg = pmap_ptpage(pmap, va); + if (ppg == NULL) return NULL; - return pte + ((va >> PGSHIFT) & (NPTEPG - 1)); + const size_t pte_idx = (va >> PGSHIFT) & (NPTEPG - 1); + + return ppg->ppg_ptes + pte_idx; } -/* - * Insert the segtab into the segtab freelist. - */ -static void -pmap_segtab_free(pmap_segtab_t *stb) + +static pmap_ptpage_t * +pmap_ptpage_alloc(pmap_t pmap, int flags, paddr_t *pa_p) { UVMHIST_FUNC(__func__); + UVMHIST_CALLARGS(pmapxtabhist, "pm %#jx flags %#jx pa_p %#jx", (uintptr_t)pmap, + (uintptr_t)flags, (uintptr_t)pa_p, 0); - UVMHIST_CALLARGS(pmapsegtabhist, "stb=%#jx", (uintptr_t)stb, 0, 0, 0); + pmap_ptpage_t *ppg = NULL; - mutex_spin_enter(&pmap_segtab_lock); - stb->seg_seg[0] = pmap_segtab_info.free_segtab; - pmap_segtab_info.free_segtab = stb; - SEGTAB_ADD(nput, 1); - mutex_spin_exit(&pmap_segtab_lock); +#ifdef PMAP_MAP_PTEPAGE + struct vm_page *pg = NULL; + paddr_t pa; +#ifdef PMAP_PPG_CACHE + ppg = pmap_pgcache_alloc(&pmap_segtab_info.ppg_flist); +#endif + if (ppg == NULL) { + pg = pmap_pte_pagealloc(); + if (pg == NULL) { + if (flags & PMAP_CANFAIL) + return NULL; + panic("%s: cannot allocate page table page ", + __func__); + } + pa = VM_PAGE_TO_PHYS(pg); + ppg = (pmap_ptpage_t *)PMAP_MAP_PTEPAGE(pa); + } else { + bool ok __diagused = pmap_extract(pmap_kernel(), (vaddr_t)ppg, &pa); + KASSERT(ok); + } + + UVMHIST_LOG(pmapxtabhist, "about to attach", 0, 0, 0, 0); + pmap_page_attach(pmap, (vaddr_t)ppg, pg, &pmap->pm_ppg_list, 0); + + *pa_p = pa; +#else + vaddr_t kva = uvm_km_alloc(kernel_map, PAGE_SIZE, PAGE_SIZE, + UVM_KMF_WIRED | UVM_KMF_WAITVA + | (flags & PMAP_CANFAIL ? UVM_KMF_CANFAIL : 0)); + if (kva == 0) { + if (flags & PMAP_CANFAIL) + return NULL; + panic("%s: cannot allocate page table page", __func__); + } + UVMHIST_LOG(pmapxtabhist, "about to attach", 0, 0, 0, 0); + pmap_page_attach(pmap, kva, NULL, &pmap->pm_ppg_list, 0); + ppg = (pmap_ptpage_t *)kva; +#endif + + UVMHIST_LOG(pmapxtabhist, "... ppg %#jx", (uintptr_t)ppg, 0, 0, 0); + + return ppg; } static void -pmap_segtab_release(pmap_t pmap, pmap_segtab_t **stb_p, bool free_stb, - pte_callback_t callback, uintptr_t flags, - vaddr_t va, vsize_t vinc) +pmap_ptpage_free(pmap_t pmap, pmap_ptpage_t *ppg, const char *caller) { - pmap_segtab_t *stb = *stb_p; - UVMHIST_FUNC(__func__); - UVMHIST_CALLARGS(pmapsegtabhist, "pm=%#jx stb_p=%#jx free=%jd", - (uintptr_t)pmap, (uintptr_t)stb_p, free_stb, 0); - UVMHIST_LOG(pmapsegtabhist, " callback=%#jx flags=%#jx va=%#jx vinc=%#jx", - (uintptr_t)callback, flags, (uintptr_t)va, (uintptr_t)vinc); - for (size_t i = (va / vinc) & (PMAP_SEGTABSIZE - 1); - i < PMAP_SEGTABSIZE; - i++, va += vinc) { -#ifdef _LP64 - if (vinc > NBSEG) { - if (stb->seg_seg[i] != NULL) { - UVMHIST_LOG(pmapsegtabhist, - " recursing %jd", i, 0, 0, 0); - pmap_segtab_release(pmap, &stb->seg_seg[i], - true, callback, flags, va, vinc / NSEGPG); - KASSERT(stb->seg_seg[i] == NULL); - } - continue; + UVMHIST_CALLARGS(pmapxtabhist, "pm %#jx va %#jx", (uintptr_t)pmap, + (uintptr_t)ppg, 0, 0); + + const vaddr_t kva = (vaddr_t)ppg; + /* + * All pte arrays should be page aligned. + */ + if ((kva & PAGE_MASK) != 0) { + panic("%s: pte entry at %p not page aligned", caller, ppg); + } + +#ifdef DEBUG + for (size_t j = 0; j < NPTEPG; j++) { + if (ppg->ppg_ptes[j] != 0) { + UVMHIST_LOG(pmapxtabhist, + "pte entry %#jx not 0 (%#jx)", + (uintptr_t)&ppg->ppg_ptes[j], + (uintptr_t)ppg->ppg_ptes[j], 0, 0); + for (size_t i = j + 1; i < NPTEPG; i++) + if (ppg->ppg_ptes[i] != 0) + UVMHIST_LOG(pmapxtabhist, + "pte[%zu] = %#"PRIxPTE, + i, ppg->ppg_ptes[i], 0, 0); + + panic("%s: pte entry at %p not 0 (%#" PRIxPTE ")", + __func__, &ppg->ppg_ptes[j], + ppg->ppg_ptes[j]); } + } #endif - KASSERT(vinc == NBSEG); + //pmap_md_vca_clean(pg, (vaddr_t)ppg, NBPG); +#ifdef PMAP_PPG_CACHE + UVMHIST_LOG(pmapxtabhist, "about to detach", 0, 0, 0, 0); + pmap_page_detach(pmap, &pmap->pm_ppg_list, kva); + pmap_segtab_pagecache(&pmap_segtab_info.ppg_flist, ppg); +#else + pmap_segtab_pagefree(pmap, &pmap->pm_ppg_list, kva, PAGE_SIZE); +#endif /* PMAP_PPG_CACHE */ +} - /* get pointer to segment map */ - pt_entry_t *pte = stb->seg_tab[i]; - if (pte == NULL) - continue; - pmap_check_ptes(pte, __func__); - /* - * If our caller wants a callback, do so. - */ - if (callback != NULL) { - (*callback)(pmap, va, va + vinc, pte, flags); - } +#if defined(PMAP_HWPAGEWALKER) && defined(PMAP_MAP_PDETABPAGE) - // PMAP_UNMAP_POOLPAGE should handle any VCA issues itself - paddr_t pa = PMAP_UNMAP_POOLPAGE((vaddr_t)pte); - struct vm_page *pg = PHYS_TO_VM_PAGE(pa); -#ifdef PMAP_PTP_CACHE - mutex_spin_enter(&pmap_segtab_lock); - LIST_INSERT_HEAD(&pmap_segtab_info.ptp_pgflist, pg, pageq.list); - mutex_spin_exit(&pmap_segtab_lock); -#else - uvm_pagefree(pg); +static pmap_pdetab_t * +pmap_pdetab_alloc(struct pmap *pmap) +{ + UVMHIST_FUNC(__func__); + UVMHIST_CALLARGS(pmapxtabhist, "pm %#jx", (uintptr_t)pmap, 0, 0, 0); + + pmap_pdetab_t *ptb; +#ifdef KERNHIST + bool found_on_freelist = false; #endif - stb->seg_tab[i] = NULL; - UVMHIST_LOG(pmapsegtabhist, " zeroing tab[%jd]", i, 0, 0, 0); + again: + mutex_spin_enter(&pmap_segtab_lock); + UVMHIST_LOG(pmapxtabhist, "free_pdetab %#jx", + (uintptr_t)pmap_segtab_info.pdealloc.free_pdetab, 0, 0, 0); + if (__predict_true((ptb = pmap_segtab_info.pdealloc.free_pdetab) != NULL)) { + pmap_segtab_info.pdealloc.free_pdetab = ptb->pde_next; + + UVMHIST_LOG(pmapxtabhist, "freelist ptb=%#jx", + (uintptr_t)ptb, 0, 0, 0); + + PDETAB_ADD(nget, 1); + ptb->pde_next = NULL; +#ifdef KERNHIST + found_on_freelist = true; +#endif } + mutex_spin_exit(&pmap_segtab_lock); - if (free_stb) { - pmap_check_stb(stb, __func__, - vinc == NBSEG ? "release seg" : "release xseg"); - pmap_segtab_free(stb); - *stb_p = NULL; + struct vm_page *ptb_pg = NULL; + if (__predict_false(ptb == NULL)) { + ptb_pg = pmap_pte_pagealloc(); + + UVMHIST_LOG(pmapxtabhist, "ptb_pg=%#jx", + (uintptr_t)ptb_pg, 0, 0, 0); + if (__predict_false(ptb_pg == NULL)) { + /* + * XXX What else can we do? Could we deadlock here? + */ + uvm_wait("pdetab"); + goto again; + } + + UVMHIST_LOG(pmapxtabhist, "ptb_pg=%#jx 2", + (uintptr_t)ptb_pg, 0, 0, 0); + PDETAB_ADD(npage, 1); + const paddr_t ptb_pa = VM_PAGE_TO_PHYS(ptb_pg); + UVMHIST_LOG(pmapxtabhist, "ptb_pa=%#jx", (uintptr_t)ptb_pa, 0, 0, 0); + ptb = (pmap_pdetab_t *)PMAP_MAP_PDETABPAGE(ptb_pa); + UVMHIST_LOG(pmapxtabhist, "new ptb=%#jx", (uintptr_t)ptb, 0, + 0, 0); + + if (pte_invalid_pde() != 0) { + for (size_t i = 0; i < NPDEPG; i++) { + ptb->pde_pde[i] = pte_invalid_pde(); + } + } } + + UVMHIST_LOG(pmapxtabhist, "about to attach", 0, 0, 0, 0); + pmap_page_attach(pmap, (vaddr_t)ptb, ptb_pg, &pmap->pm_pdetab_list, 0); + + UVMHIST_LOG(pmapxtabhist, "... ptb %#jx found on freelist %d", + (uintptr_t)ptb, found_on_freelist, 0, 0); + + return ptb; } + +#else /* * Create and return a physical map. * @@ -333,26 +633,29 @@ pmap_segtab_release(pmap_t pmap, pmap_se * is bounded by that size. */ static pmap_segtab_t * -pmap_segtab_alloc(void) +pmap_segtab_alloc(struct pmap *pmap) { + UVMHIST_FUNC(__func__); + UVMHIST_CALLARGS(pmapxtabhist, "pm %#jx", (uintptr_t)pmap, 0, 0, 0); + pmap_segtab_t *stb; bool found_on_freelist = false; - UVMHIST_FUNC(__func__); again: mutex_spin_enter(&pmap_segtab_lock); - if (__predict_true((stb = pmap_segtab_info.free_segtab) != NULL)) { - pmap_segtab_info.free_segtab = stb->seg_seg[0]; - stb->seg_seg[0] = NULL; + if (__predict_true((stb = pmap_segtab_info.segalloc.free_segtab) != NULL)) { + pmap_segtab_info.segalloc.free_segtab = stb->seg_next; SEGTAB_ADD(nget, 1); + stb->seg_next = NULL; found_on_freelist = true; - UVMHIST_CALLARGS(pmapsegtabhist, "freelist stb=%#jx", + UVMHIST_LOG(pmapxtabhist, "freelist stb=%#jx", (uintptr_t)stb, 0, 0, 0); } mutex_spin_exit(&pmap_segtab_lock); + struct vm_page *stb_pg = NULL; if (__predict_false(stb == NULL)) { - struct vm_page * const stb_pg = pmap_pte_pagealloc(); + stb_pg = pmap_pte_pagealloc(); if (__predict_false(stb_pg == NULL)) { /* @@ -364,33 +667,208 @@ pmap_segtab_alloc(void) SEGTAB_ADD(npage, 1); const paddr_t stb_pa = VM_PAGE_TO_PHYS(stb_pg); - stb = (pmap_segtab_t *)PMAP_MAP_POOLPAGE(stb_pa); - UVMHIST_CALLARGS(pmapsegtabhist, "new stb=%#jx", - (uintptr_t)stb, 0, 0, 0); + stb = (pmap_segtab_t *)PMAP_MAP_SEGTABPAGE(stb_pa); + UVMHIST_LOG(pmapxtabhist, "new stb=%#jx", (uintptr_t)stb, 0, + 0, 0); +#if 0 +CTASSERT(NBPG / sizeof(*stb) == 1); const size_t n = NBPG / sizeof(*stb); if (n > 1) { /* * link all the segtabs in this page together */ for (size_t i = 1; i < n - 1; i++) { - stb[i].seg_seg[0] = &stb[i+1]; + stb[i].seg_next = &stb[i + 1]; } /* * Now link the new segtabs into the free segtab list. */ mutex_spin_enter(&pmap_segtab_lock); - stb[n-1].seg_seg[0] = pmap_segtab_info.free_segtab; - pmap_segtab_info.free_segtab = stb + 1; + stb[n - 1].seg_next = pmap_segtab_info.segalloc.free_segtab; + pmap_segtab_info.segalloc.free_segtab = stb + 1; SEGTAB_ADD(nput, n - 1); mutex_spin_exit(&pmap_segtab_lock); } +#endif } + UVMHIST_LOG(pmapxtabhist, "about to attach", 0, 0, 0, 0); + pmap_page_attach(pmap, (vaddr_t)stb, stb_pg, &pmap->pm_segtab_list, 0); + pmap_check_stb(stb, __func__, - found_on_freelist ? "from free list" : "allocated"); + found_on_freelist ? "from free list" : "allocated"); + + UVMHIST_LOG(pmapxtabhist, "... stb %#jx found on freelist %zu", + (uintptr_t)stb, found_on_freelist, 0, 0); return stb; } +#endif + +#if defined(PMAP_HWPAGEWALKER) +static void +pmap_pdetab_free(pmap_pdetab_t *ptb) +{ + UVMHIST_FUNC(__func__); + UVMHIST_CALLARGS(pmaphist, "ptb %#jx", (uintptr_t)ptb, 0, 0, 0); + /* + * Insert the pdetab into the pdetab freelist. + */ + mutex_spin_enter(&pmap_segtab_lock); + ptb->pde_next = pmap_segtab_info.pdealloc.free_pdetab; + pmap_segtab_info.pdealloc.free_pdetab = ptb; + PDETAB_ADD(nput, 1); + mutex_spin_exit(&pmap_segtab_lock); + +} +#endif + + +#if !defined(PMAP_HWPAGEWALKER) || !defined(PMAP_MAP_PDETABPAGE) +/* + * Insert the segtab into the segtab freelist. + */ +static void +pmap_segtab_free(pmap_segtab_t *stb) +{ + UVMHIST_FUNC(__func__); + UVMHIST_CALLARGS(pmaphist, "stb %#jx", (uintptr_t)stb, 0, 0, 0); + + /* + * Insert the segtab into the segtab freelist. + */ + mutex_spin_enter(&pmap_segtab_lock); + stb->seg_next = pmap_segtab_info.segalloc.free_segtab; + pmap_segtab_info.segalloc.free_segtab = stb; + SEGTAB_ADD(nput, 1); + mutex_spin_exit(&pmap_segtab_lock); +} +#endif + +#if defined(PMAP_HWPAGEWALKER) +static void +pmap_pdetab_release(pmap_t pmap, pmap_pdetab_t **ptb_p, bool free_ptb, + vaddr_t va, vsize_t vinc) +{ + const vaddr_t pdetab_mask = PMAP_PDETABSIZE - 1; + pmap_pdetab_t *ptb = *ptb_p; + + UVMHIST_FUNC(__func__); + UVMHIST_CALLARGS(pmapxtabhist, "pm %#jx ptb_p %#jx ptb %#jx free %jd", + (uintptr_t)pmap, (uintptr_t)ptb_p, (uintptr_t)ptb, free_ptb); + UVMHIST_LOG(pmapxtabhist, " va=%#jx vinc=%#jx", + (uintptr_t)va, (uintptr_t)vinc, 0, 0); + + for (size_t i = (va / vinc) & pdetab_mask; + i < PMAP_PDETABSIZE; + i++, va += vinc) { +#ifdef _LP64 + if (vinc > NBSEG) { + if (pte_pde_valid_p(ptb->pde_pde[i])) { + pmap_pdetab_t *nptb = + pmap_pde_to_pdetab(ptb->pde_pde[i]); + UVMHIST_LOG(pmapxtabhist, + " va %#jx ptp->pde_pde[%jd] (*%#jx) = %#jx " + "recursing", va, i, &ptb->pde_pde[i], + ptb->pde_pde[i]); + pmap_pdetab_release(pmap, &nptb, true, + va, vinc / NPDEPG); + ptb->pde_pde[i] = pte_invalid_pde(); + KASSERT(nptb == NULL); + } + continue; + } +#endif + KASSERT(vinc == NBSEG); + + /* get pointer to PT page */ + pmap_ptpage_t *ppg = pmap_pde_to_ptpage(ptb->pde_pde[i]); + UVMHIST_LOG(pmapxtabhist, + " va %#jx ptb->pde_pde[%jd] (*%#jx) = %#jx", va, i, + (uintptr_t)&ptb->pde_pde[i], ptb->pde_pde[i]); + if (ppg == NULL) + continue; + + UVMHIST_LOG(pmapxtabhist, " zeroing tab (%#jx)[%jd] (%#jx)", + (uintptr_t)ptb->pde_pde, i, (uintptr_t)&ptb->pde_pde[i], 0); + + ptb->pde_pde[i] = pte_invalid_pde(); + + pmap_ptpage_free(pmap, ppg, __func__); + } + + if (free_ptb) { + UVMHIST_LOG(pmapxtabhist, " ptbp %#jx ptb %#jx", + (uintptr_t)ptb_p, (uintptr_t)ptb, 0, 0); + const vaddr_t kva = (vaddr_t)ptb; + UVMHIST_LOG(pmapxtabhist, "about to detach", 0, 0, 0, 0); + pmap_page_detach(pmap, &pmap->pm_pdetab_list, kva); + pmap_pdetab_free(ptb); + *ptb_p = NULL; + } +} +#endif + +#if !defined(PMAP_HWPAGEWALKER) || !defined(PMAP_MAP_PDETABPAGE) +static void +pmap_segtab_release(pmap_t pmap, pmap_segtab_t **stb_p, bool free_stb, + pte_callback_t callback, uintptr_t flags, vaddr_t va, vsize_t vinc) +{ + pmap_segtab_t *stb = *stb_p; + + UVMHIST_FUNC(__func__); + UVMHIST_CALLARGS(pmapxtabhist, "pm=%#jx stb_p=%#jx free=%jd", + (uintptr_t)pmap, (uintptr_t)stb, free_stb, 0); + UVMHIST_LOG(pmapxtabhist, " callback=%#jx flags=%#jx va=%#jx vinc=%#jx", + (uintptr_t)callback, flags, (uintptr_t)va, (uintptr_t)vinc); + + for (size_t i = (va / vinc) & (PMAP_SEGTABSIZE - 1); + i < PMAP_SEGTABSIZE; + i++, va += vinc) { +#ifdef _LP64 + if (vinc > NBSEG) { + if (stb->seg_seg[i] != NULL) { + UVMHIST_LOG(pmapxtabhist, + " recursing %jd", i, 0, 0, 0); + pmap_segtab_release(pmap, &stb->seg_seg[i], + true, callback, flags, va, vinc / NSEGPG); + KASSERT(stb->seg_seg[i] == NULL); + } + continue; + } +#endif + KASSERT(vinc == NBSEG); + + /* get pointer to segment map */ + pmap_ptpage_t *ppg = stb->seg_ppg[i]; + if (ppg == NULL) + continue; + + /* + * If our caller wants a callback, do so. + */ + if (callback != NULL) { + (*callback)(pmap, va, va + vinc, ppg->ppg_ptes, flags); + } + pmap_ptpage_free(pmap, ppg, __func__); + stb->seg_ppg[i] = NULL; + UVMHIST_LOG(pmapxtabhist, " zeroing tab[%jd]", i, 0, 0, 0); + } + + if (free_stb) { + pmap_check_stb(stb, __func__, + vinc == NBSEG ? "release seg" : "release xseg"); + + const vaddr_t kva = (vaddr_t)stb; + UVMHIST_LOG(pmapxtabhist, "about to detach", 0, 0, 0, 0); + pmap_page_detach(pmap, &pmap->pm_segtab_list, kva); + pmap_segtab_free(stb); + *stb_p = NULL; + } +} +#endif + + /* * Allocate the top segment table for the pmap. @@ -398,8 +876,22 @@ pmap_segtab_alloc(void) void pmap_segtab_init(pmap_t pmap) { + UVMHIST_FUNC(__func__); + UVMHIST_CALLARGS(pmaphist, "pm %#jx", (uintptr_t)pmap, 0, 0, 0); - pmap->pm_segtab = pmap_segtab_alloc(); +#if !defined(PMAP_HWPAGEWALKER) || !defined(PMAP_MAP_PDETABPAGE) + /* + * Constantly converting from extracted PA to VA is somewhat expensive + * for systems with hardware page walkers and without an inexpensive + * way to access arbitrary virtual addresses, so we allocate an extra + * root segtab so that it can contain non-virtual addresses. + */ + pmap->pm_segtab = pmap_segtab_alloc(pmap); +#endif +#if defined(PMAP_HWPAGEWALKER) + pmap->pm_pdetab = pmap_pdetab_alloc(pmap); + pmap_md_pdetab_init(pmap); +#endif } /* @@ -410,16 +902,36 @@ pmap_segtab_init(pmap_t pmap) void pmap_segtab_destroy(pmap_t pmap, pte_callback_t func, uintptr_t flags) { - if (pmap->pm_segtab == NULL) - return; - + KASSERT(pmap != pmap_kernel()); #ifdef _LP64 const vsize_t vinc = NBXSEG; #else const vsize_t vinc = NBSEG; #endif - pmap_segtab_release(pmap, &pmap->pm_segtab, - func == NULL, func, flags, pmap->pm_minaddr, vinc); + +#if defined(PMAP_HWPAGEWALKER) + if (pmap->pm_pdetab != NULL) { + pmap_pdetab_release(pmap, &pmap->pm_pdetab, + true, pmap->pm_minaddr, vinc); + } +#endif +#if !defined(PMAP_HWPAGEWALKER) || !defined(PMAP_MAP_PDETABPAGE) + if (pmap->pm_segtab != NULL) { + pmap_segtab_release(pmap, &pmap->pm_segtab, + func == NULL, func, flags, pmap->pm_minaddr, vinc); + } +#endif + +#if defined(PMAP_HWPAGEWALKER) +#if !defined(PMAP_MAP_PDETABPAGE) + KASSERT((pmap->pm_segtab == NULL) == (pmap->pm_pdetab == NULL)); +#endif + KASSERT(pmap->pm_pdetab == NULL); +#endif +#if !defined(PMAP_HWPAGEWALKER) || !defined(PMAP_MAP_PDETABPAGE) + KASSERT(pmap->pm_segtab == NULL); +#endif + } /* @@ -429,9 +941,10 @@ void pmap_segtab_activate(struct pmap *pm, struct lwp *l) { if (l == curlwp) { - struct cpu_info * const ci = l->l_cpu; - pmap_md_xtab_activate(pm, l); KASSERT(pm == l->l_proc->p_vmspace->vm_map.pmap); + pmap_md_xtab_activate(pm, l); +#if !defined(PMAP_HWPAGEWALKER) || !defined(PMAP_MAP_PDETABPAGE) + struct cpu_info * const ci = l->l_cpu; if (pm == pmap_kernel()) { ci->ci_pmap_user_segtab = PMAP_INVALID_SEGTAB_ADDRESS; #ifdef _LP64 @@ -443,21 +956,21 @@ pmap_segtab_activate(struct pmap *pm, st ci->ci_pmap_user_seg0tab = pm->pm_segtab->seg_seg[0]; #endif } +#endif } } - void pmap_segtab_deactivate(pmap_t pm) { - pmap_md_xtab_deactivate(pm); +#if !defined(PMAP_HWPAGEWALKER) || !defined(PMAP_MAP_PDETABPAGE) curcpu()->ci_pmap_user_segtab = PMAP_INVALID_SEGTAB_ADDRESS; #ifdef _LP64 curcpu()->ci_pmap_user_seg0tab = NULL; #endif - +#endif } /* @@ -498,89 +1011,184 @@ pmap_pte_process(pmap_t pmap, vaddr_t sv } } -/* - * Return a pointer for the pte that corresponds to the specified virtual - * address (va) in the target physical map, allocating if needed. - */ -pt_entry_t * -pmap_pte_reserve(pmap_t pmap, vaddr_t va, int flags) +#if defined(PMAP_HWPAGEWALKER) && defined(PMAP_MAP_PDETABPAGE) +static pd_entry_t * +pmap_pdetab_reserve(struct pmap *pmap, vaddr_t va) +#elif defined(PMAP_HWPAGEWALKER) +static pmap_ptpage_t ** +pmap_segtab_reserve(struct pmap *pmap, vaddr_t va, pd_entry_t **pde_p) +#else +static pmap_ptpage_t ** +pmap_segtab_reserve(struct pmap *pmap, vaddr_t va) +#endif { - pmap_segtab_t *stb = pmap->pm_segtab; - pt_entry_t *pte; UVMHIST_FUNC(__func__); + UVMHIST_CALLARGS(pmaphist, "pm %#jx va %#jx", (uintptr_t)pmap, + (uintptr_t)va, 0, 0); + +#if defined(PMAP_HWPAGEWALKER) + pmap_pdetab_t *ptb = pmap->pm_pdetab; + UVMHIST_LOG(pmaphist, "pm_pdetab %#jx", (uintptr_t)ptb, 0, 0, 0); +#endif +#if defined(PMAP_HWPAGEWALKER) && defined(PMAP_MAP_PDETABPAGE) + vaddr_t segtab_mask = PMAP_PDETABSIZE - 1; +#ifdef _LP64 + for (size_t segshift = XSEGSHIFT; + segshift > SEGSHIFT; + segshift -= PGSHIFT - 3, segtab_mask = NSEGPG - 1) { + pd_entry_t * const pde_p = + &ptb->pde_pde[(va >> segshift) & segtab_mask]; + pd_entry_t opde = *pde_p; + + UVMHIST_LOG(pmaphist, + "ptb %#jx segshift %jd pde_p %#jx opde %#jx", + ptb, segshift, pde_p, opde); + + if (__predict_false(!pte_pde_valid_p(opde))) { + ptb = pmap_pdetab_alloc(pmap); + pd_entry_t npde = pte_pde_pdetab( + pmap_md_direct_mapped_vaddr_to_paddr((vaddr_t)ptb), + pmap == pmap_kernel()); + opde = pte_pde_cas(pde_p, opde, npde); + if (__predict_false(pte_pde_valid_p(opde))) { + const vaddr_t kva = (vaddr_t)ptb; + UVMHIST_LOG(pmapxtabhist, "about to detach", + 0, 0, 0, 0); + pmap_page_detach(pmap, &pmap->pm_pdetab_list, + kva); + pmap_pdetab_free(ptb); + } else { + opde = npde; + } + } + ptb = pmap_pde_to_pdetab(opde); + UVMHIST_LOG(pmaphist, "opde %#jx ptb %#jx", opde, ptb, 0, 0); + } +#elif defined(XSEGSHIFT) + size_t segshift = XSEGSHIFT; + + pd_entry_t opde = ptb->pde_pde[(va >> segshift) & segtab_mask]; + KASSERT(pte_pde_valid_p(opde)); + ptb = pmap_pde_to_pdetab(opde); + segtab_mask = NSEGPG - 1; +#endif /* _LP64 */ + const size_t idx = (va >> SEGSHIFT) & segtab_mask; - pte = pmap_pte_lookup(pmap, va); - if (__predict_false(pte == NULL)) { + UVMHIST_LOG(pmaphist, "... returning %#jx (idx %jd)", (uintptr_t)&ptb->pde_pde[idx], idx, 0, 0); + + return &ptb->pde_pde[idx]; +#else /* PMAP_HWPAGEWALKER && PMAP_MAP_PDETABPAGE */ + pmap_segtab_t *stb = pmap->pm_segtab; + vaddr_t segtab_mask = PMAP_SEGTABSIZE - 1; #ifdef _LP64 - pmap_segtab_t ** const stb_p = - &stb->seg_seg[(va >> XSEGSHIFT) & (NSEGPG - 1)]; + for (size_t segshift = XSEGSHIFT; + segshift > SEGSHIFT; + segshift -= PGSHIFT - 3, segtab_mask = NSEGPG - 1) { + size_t idx = (va >> segshift) & segtab_mask; + pmap_segtab_t ** const stb_p = &stb->seg_seg[idx]; +#if defined(PMAP_HWPAGEWALKER) + pmap_pdetab_t ** const ptb_p = &ptb->pde_pde[idx]; +#endif /* PMAP_HWPAGEWALKER */ if (__predict_false((stb = *stb_p) == NULL)) { - pmap_segtab_t *nstb = pmap_segtab_alloc(); + stb = pmap_segtab_alloc(pmap); #ifdef MULTIPROCESSOR - pmap_segtab_t *ostb = atomic_cas_ptr(stb_p, NULL, nstb); + pmap_segtab_t *ostb = atomic_cas_ptr(stb_p, NULL, stb); if (__predict_false(ostb != NULL)) { - pmap_check_stb(nstb, __func__, "reserve"); - pmap_segtab_free(nstb); - nstb = ostb; + const vaddr_t kva = (vaddr_t)stb; + UVMHIST_LOG(pmapxtabhist, "about to detach", + 0, 0, 0, 0); + pmap_page_detach(pmap, &pmap->pm_segtab_list, + kva); + pmap_segtab_free(stb); + stb = ostb; } #else - *stb_p = nstb; + *stb_p = stb; #endif /* MULTIPROCESSOR */ - stb = nstb; } - KASSERT(stb == pmap->pm_segtab->seg_seg[(va >> XSEGSHIFT) & (NSEGPG - 1)]); + } +#elif defined(PMAP_HWPAGEWALKER) + pmap_segtab_t opde = ptb->pde_pde[(va >> segshift) & segtab_mask]; + KASSERT(pte_pde_valid_p(opde)); + ptb = pmap_pde_to_pdetab(opde); + segtab_mask = NSEGPG - 1; + #endif /* _LP64 */ - struct vm_page *pg = NULL; -#ifdef PMAP_PTP_CACHE - mutex_spin_enter(&pmap_segtab_lock); - if ((pg = LIST_FIRST(&pmap_segtab_info.ptp_pgflist)) != NULL) { - LIST_REMOVE(pg, pageq.list); - KASSERT(LIST_FIRST(&pmap_segtab_info.ptp_pgflist) != pg); - } - mutex_spin_exit(&pmap_segtab_lock); + size_t idx = (va >> SEGSHIFT) & segtab_mask; +#if defined(PMAP_HWPAGEWALKER) +#if defined(XSEGSHIFT) && (XSEGSHIFT != SEGSHIFT) + *pte_p = &pmap->pm_segtab +#else /* XSEGSHIFT */ + *pde_p = &ptb->pde_pde[idx]; +#endif /* XSEGSHIFT */ +#endif /* PMAP_HWPAGEWALKER */ + return &stb->seg_ppg[idx]; #endif - if (pg == NULL) - pg = pmap_pte_pagealloc(); - if (pg == NULL) { - if (flags & PMAP_CANFAIL) - return NULL; - panic("%s: cannot allocate page table page " - "for va %" PRIxVADDR, __func__, va); - } +} + - const paddr_t pa = VM_PAGE_TO_PHYS(pg); - pte = (pt_entry_t *)PMAP_MAP_POOLPAGE(pa); - pt_entry_t ** const pte_p = - &stb->seg_tab[(va >> SEGSHIFT) & (PMAP_SEGTABSIZE - 1)]; +/* + * Return a pointer for the pte that corresponds to the specified virtual + * address (va) in the target physical map, allocating if needed. + */ +pt_entry_t * +pmap_pte_reserve(pmap_t pmap, vaddr_t va, int flags) +{ + UVMHIST_FUNC(__func__); + UVMHIST_CALLARGS(pmaphist, "pm=%#jx va=%#jx flags=%#jx", + (uintptr_t)pmap, (uintptr_t)va, flags, 0); + pmap_ptpage_t *ppg; + paddr_t pa = 0; + +#if defined(PMAP_HWPAGEWALKER) && defined(PMAP_MAP_PDETABPAGE) + pd_entry_t * const pde_p = pmap_pdetab_reserve(pmap, va); + ppg = pmap_pde_to_ptpage(*pde_p); +#elif defined(PMAP_HWPAGEWALKER) + pd_entry_t *pde_p; + pmap_ptpage_t ** const ppg_p = pmap_segtab_reserve(pmap, va, &pde_p); + ppg = *ppg_p; +#else + pmap_ptpage_t ** const ppg_p = pmap_segtab_reserve(pmap, va); + ppg = *ppg_p; +#endif + + if (__predict_false(ppg == NULL)) { + ppg = pmap_ptpage_alloc(pmap, flags, &pa); + if (__predict_false(ppg == NULL)) + return NULL; + +#if defined(PMAP_HWPAGEWALKER) + pd_entry_t npde = pte_pde_ptpage(pa, pmap == pmap_kernel()); +#endif +#if defined(PMAP_HWPAGEWALKER) && defined(PMAP_MAP_PDETABPAGE) + pd_entry_t opde = *pde_p; + opde = pte_pde_cas(pde_p, opde, npde); + if (__predict_false(pte_pde_valid_p(opde))) { + pmap_ptpage_free(pmap, ppg, __func__); + ppg = pmap_pde_to_ptpage(opde); + } +#else #ifdef MULTIPROCESSOR - pt_entry_t *opte = atomic_cas_ptr(pte_p, NULL, pte); + pmap_ptpage_t *oppg = atomic_cas_ptr(ppg_p, NULL, ppg); /* * If another thread allocated the segtab needed for this va * free the page we just allocated. */ - if (__predict_false(opte != NULL)) { -#ifdef PMAP_PTP_CACHE - mutex_spin_enter(&pmap_segtab_lock); - LIST_INSERT_HEAD(&pmap_segtab_info.ptp_pgflist, - pg, pageq.list); - mutex_spin_exit(&pmap_segtab_lock); -#else - PMAP_UNMAP_POOLPAGE((vaddr_t)pte); - uvm_pagefree(pg); + if (__predict_false(oppg != NULL)) { + pmap_ptpage_free(pmap, ppg, __func__); + ppg = oppg; +#if defined(PMAP_HWPAGEWALKER) + } else { + pte_pde_set(pde_p, npde); #endif - pte = opte; } -#else - *pte_p = pte; -#endif - KASSERT(pte == stb->seg_tab[(va >> SEGSHIFT) & (PMAP_SEGTABSIZE - 1)]); - UVMHIST_CALLARGS(pmapsegtabhist, "pm=%#jx va=%#jx -> tab[%jd]=%#jx", - (uintptr_t)pmap, (uintptr_t)va, - (va >> SEGSHIFT) & (PMAP_SEGTABSIZE - 1), (uintptr_t)pte); - - pmap_check_ptes(pte, __func__); - pte += (va >> PGSHIFT) & (NPTEPG - 1); +#else /* !MULTIPROCESSOR */ + *ppg_p = ppg; +#endif /* MULTIPROCESSOR */ +#endif /* PMAP_HWPAGEWALKER && PMAP_MAP_PDETABPAGE */ } - return pte; + const size_t pte_idx = (va >> PGSHIFT) & (NPTEPG - 1); + + return ppg->ppg_ptes + pte_idx; } Index: src/sys/uvm/pmap/pmap_tlb.c diff -u src/sys/uvm/pmap/pmap_tlb.c:1.53 src/sys/uvm/pmap/pmap_tlb.c:1.54 --- src/sys/uvm/pmap/pmap_tlb.c:1.53 Thu Oct 20 06:24:51 2022 +++ src/sys/uvm/pmap/pmap_tlb.c Wed Oct 26 07:35:20 2022 @@ -1,4 +1,4 @@ -/* $NetBSD: pmap_tlb.c,v 1.53 2022/10/20 06:24:51 skrll Exp $ */ +/* $NetBSD: pmap_tlb.c,v 1.54 2022/10/26 07:35:20 skrll Exp $ */ /*- * Copyright (c) 2010 The NetBSD Foundation, Inc. @@ -31,7 +31,7 @@ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: pmap_tlb.c,v 1.53 2022/10/20 06:24:51 skrll Exp $"); +__KERNEL_RCSID(0, "$NetBSD: pmap_tlb.c,v 1.54 2022/10/26 07:35:20 skrll Exp $"); /* * Manages address spaces in a TLB. @@ -549,20 +549,26 @@ pmap_tlb_shootdown_process(void) struct cpu_info * const ci = curcpu(); struct pmap_tlb_info * const ti = cpu_tlb_info(ci); + UVMHIST_FUNC(__func__); + UVMHIST_CALLED(maphist); + KASSERT(cpu_intr_p()); KASSERTMSG(ci->ci_cpl >= IPL_SCHED, "%s: cpl (%d) < IPL_SCHED (%d)", __func__, ci->ci_cpl, IPL_SCHED); TLBINFO_LOCK(ti); + UVMHIST_LOG(maphist, "ti %#jx", ti, 0, 0, 0); switch (ti->ti_tlbinvop) { case TLBINV_ONE: { /* * We only need to invalidate one user ASID. */ + UVMHIST_LOG(maphist, "TLBINV_ONE ti->ti_victim %#jx", ti->ti_victim, 0, 0, 0); struct pmap_asid_info * const pai = PMAP_PAI(ti->ti_victim, ti); KASSERT(ti->ti_victim != pmap_kernel()); if (pmap_tlb_intersecting_onproc_p(ti->ti_victim, ti)) { + UVMHIST_LOG(maphist, "pmap_tlb_intersecting_onproc_p", 0, 0, 0, 0); /* * The victim is an active pmap so we will just * invalidate its TLB entries. @@ -572,6 +578,7 @@ pmap_tlb_shootdown_process(void) tlb_invalidate_asids(pai->pai_asid, pai->pai_asid); pmap_tlb_asid_check(); } else if (pai->pai_asid) { + UVMHIST_LOG(maphist, "asid %jd", pai->pai_asid, 0, 0, 0); /* * The victim is no longer an active pmap for this TLB. * So simply clear its ASID and when pmap_activate is @@ -664,6 +671,7 @@ pmap_tlb_shootdown_bystanders(pmap_t pm) KASSERT(i < pmap_ntlbs); struct pmap_tlb_info * const ti = pmap_tlbs[i]; KASSERT(tlbinfo_index(ti) == i); + UVMHIST_LOG(maphist, "ti %#jx", ti, 0, 0, 0); /* * Skip this TLB if there are no active mappings for it. */ @@ -697,6 +705,8 @@ pmap_tlb_shootdown_bystanders(pmap_t pm) ti->ti_victim = NULL; } } + UVMHIST_LOG(maphist, "tlbinvop %jx victim %#jx", ti->ti_tlbinvop, + (uintptr_t)ti->ti_victim, 0, 0); TLBINFO_UNLOCK(ti); /* * Now we can send out the shootdown IPIs to a CPU @@ -712,6 +722,7 @@ pmap_tlb_shootdown_bystanders(pmap_t pm) continue; } if (!pmap_tlb_intersecting_active_p(pm, ti)) { + UVMHIST_LOG(maphist, "pm %#jx not active", (uintptr_t)pm, 0, 0, 0); /* * If this pmap has an ASID assigned but it's not * currently running, nuke its ASID. Next time the @@ -1066,14 +1077,20 @@ pmap_tlb_asid_release_all(struct pmap *p void pmap_tlb_asid_check(void) { + UVMHIST_FUNC(__func__); + UVMHIST_CALLED(pmaphist); + #ifdef DEBUG kpreempt_disable(); const tlb_asid_t asid __debugused = tlb_get_asid(); + UVMHIST_LOG(pmaphist, " asid %u vs pmap_cur_asid %u", asid, + curcpu()->ci_pmap_asid_cur, 0, 0); KDASSERTMSG(asid == curcpu()->ci_pmap_asid_cur, "%s: asid (%#x) != current asid (%#x)", __func__, asid, curcpu()->ci_pmap_asid_cur); kpreempt_enable(); #endif + UVMHIST_LOG(pmaphist, " <-- done", 0, 0, 0, 0); } #ifdef DEBUG @@ -1088,3 +1105,18 @@ pmap_tlb_check(pmap_t pm, bool (*func)(v TLBINFO_UNLOCK(ti); } #endif /* DEBUG */ + +#ifdef DDB +void +pmap_db_tlb_print(struct pmap *pm, + void (*pr)(const char *, ...) __printflike(1, 2)) +{ +#if PMAP_TLB_MAX == 1 + pr(" asid %5u\n", pm->pm_pai[0].pai_asid); +#else + for (size_t i = 0; i < (PMAP_TLB_MAX > 1 ? pmap_ntlbs : 1); i++) { + pr(" tlb %zu asid %5u\n", i, pm->pm_pai[i].pai_asid); + } +#endif +} +#endif /* DDB */ Index: src/sys/uvm/pmap/pmap_tlb.h diff -u src/sys/uvm/pmap/pmap_tlb.h:1.15 src/sys/uvm/pmap/pmap_tlb.h:1.16 --- src/sys/uvm/pmap/pmap_tlb.h:1.15 Wed Aug 19 06:11:49 2020 +++ src/sys/uvm/pmap/pmap_tlb.h Wed Oct 26 07:35:20 2022 @@ -1,4 +1,4 @@ -/* $NetBSD: pmap_tlb.h,v 1.15 2020/08/19 06:11:49 skrll Exp $ */ +/* $NetBSD: pmap_tlb.h,v 1.16 2022/10/26 07:35:20 skrll Exp $ */ /* * Copyright (c) 1992, 1993 @@ -185,5 +185,8 @@ void pmap_tlb_invalidate_addr(pmap_t, va void pmap_tlb_check(pmap_t, bool (*)(void *, vaddr_t, tlb_asid_t, pt_entry_t)); void pmap_tlb_asid_check(void); +/* for ddb */ +void pmap_db_tlb_print(struct pmap *, void (*)(const char *, ...) __printflike(1, 2)); + #endif /* _KERNEL */ #endif /* _UVM_PMAP_PMAP_TLB_H_ */