* i386/intel/pmap.c: switch to dynamic allocation of all the page tree map levels for the user-space address range, using a separate kmem cache for each level. This allows to extend the usable memory space on x86_64 to use more than one L3 page for user space. The kernel address map is left untouched for now as it needs a different initialization. * i386/intel/pmap.h: remove hardcoded user pages and add macro to recontruct the page-to-virtual mapping --- i386/intel/pmap.c | 544 ++++++++++++++++++++++------------------------ i386/intel/pmap.h | 21 +- 2 files changed, 277 insertions(+), 288 deletions(-)
diff --git a/i386/intel/pmap.c b/i386/intel/pmap.c index e867ed59..3a30271e 100644 --- a/i386/intel/pmap.c +++ b/i386/intel/pmap.c @@ -398,6 +398,7 @@ struct pmap kernel_pmap_store; pmap_t kernel_pmap; struct kmem_cache pmap_cache; /* cache of pmap structures */ +struct kmem_cache pt_cache; /* cache of page tables */ struct kmem_cache pd_cache; /* cache of page directories */ #if PAE struct kmem_cache pdpt_cache; /* cache of page directory pointer tables */ @@ -429,6 +430,14 @@ pt_entry_t *kernel_page_dir; */ static pmap_mapwindow_t mapwindows[PMAP_NMAPWINDOWS * NCPUS]; +#ifdef __x86_64__ +static inline pt_entry_t * +pmap_l4base(const pmap_t pmap, vm_offset_t lin_addr) +{ + return &pmap->l4base[lin2l4num(lin_addr)]; +} +#endif + #ifdef PAE static inline pt_entry_t * pmap_ptp(const pmap_t pmap, vm_offset_t lin_addr) @@ -443,7 +452,7 @@ pmap_ptp(const pmap_t pmap, vm_offset_t lin_addr) #else /* __x86_64__ */ pdp_table = pmap->pdpbase; #endif /* __x86_64__ */ - return pdp_table; + return &pdp_table[lin2pdpnum(lin_addr)]; } #endif @@ -456,7 +465,9 @@ pmap_pde(const pmap_t pmap, vm_offset_t addr) #if PAE pt_entry_t *pdp_table; pdp_table = pmap_ptp(pmap, addr); - pt_entry_t pde = pdp_table[lin2pdpnum(addr)]; + if (pdp_table == 0) + return(PT_ENTRY_NULL); + pt_entry_t pde = *pdp_table; if ((pde & INTEL_PTE_VALID) == 0) return PT_ENTRY_NULL; page_dir = (pt_entry_t *) ptetokv(pde); @@ -1092,15 +1103,18 @@ void pmap_init(void) */ s = (vm_size_t) sizeof(struct pmap); kmem_cache_init(&pmap_cache, "pmap", s, 0, NULL, 0); - kmem_cache_init(&pd_cache, "pd", + kmem_cache_init(&pt_cache, "pmap_L1", + INTEL_PGBYTES, INTEL_PGBYTES, NULL, + KMEM_CACHE_PHYSMEM); + kmem_cache_init(&pd_cache, "pmap_L2", INTEL_PGBYTES, INTEL_PGBYTES, NULL, KMEM_CACHE_PHYSMEM); #if PAE - kmem_cache_init(&pdpt_cache, "pdpt", + kmem_cache_init(&pdpt_cache, "pmap_L3", INTEL_PGBYTES, INTEL_PGBYTES, NULL, KMEM_CACHE_PHYSMEM); #ifdef __x86_64__ - kmem_cache_init(&l4_cache, "L4", + kmem_cache_init(&l4_cache, "pmap_L4", INTEL_PGBYTES, INTEL_PGBYTES, NULL, KMEM_CACHE_PHYSMEM); #endif /* __x86_64__ */ @@ -1244,6 +1258,11 @@ pmap_page_table_page_dealloc(vm_offset_t pa) vm_object_lock(pmap_object); m = vm_page_lookup(pmap_object, pa); vm_page_lock_queues(); +#ifdef MACH_PV_PAGETABLES + if (!hyp_mmuext_op_mfn (MMUEXT_UNPIN_TABLE, pa_to_mfn(pa))) + panic("couldn't unpin page %llx(%lx)\n", pa, (vm_offset_t) kv_to_ma(pa)); + pmap_set_page_readwrite((void*) phystokv(pa)); +#endif /* MACH_PV_PAGETABLES */ vm_page_free(m); inuse_ptepages_count--; vm_page_unlock_queues(); @@ -1265,7 +1284,7 @@ pmap_page_table_page_dealloc(vm_offset_t pa) pmap_t pmap_create(vm_size_t size) { #ifdef __x86_64__ - // needs to be reworked if we want to dynamically allocate PDPs + // needs to be reworked if we want to dynamically allocate PDPs for kernel const int PDPNUM = PDPNUM_KERNEL; #endif pt_entry_t *page_dir[PDPNUM]; @@ -1360,30 +1379,6 @@ pmap_t pmap_create(vm_size_t size) memset(p->l4base, 0, INTEL_PGBYTES); WRITE_PTE(&p->l4base[lin2l4num(VM_MIN_KERNEL_ADDRESS)], pa_to_pte(kvtophys((vm_offset_t) pdp_kernel)) | INTEL_PTE_VALID | INTEL_PTE_WRITE); -#if lin2l4num(VM_MIN_KERNEL_ADDRESS) != lin2l4num(VM_MAX_USER_ADDRESS) - // kernel vm and user vm are not in the same l4 entry, so add the user one - // TODO alloc only PDPTE for the user range VM_MIN_USER_ADDRESS, VM_MAX_USER_ADDRESS - // and keep the same for kernel range, in l4 table we have different entries - pt_entry_t *pdp_user = (pt_entry_t *) kmem_cache_alloc(&pdpt_cache); - if (pdp_user == NULL) { - panic("pmap create"); - } - memset(pdp_user, 0, INTEL_PGBYTES); - WRITE_PTE(&p->l4base[lin2l4num(VM_MIN_USER_ADDRESS)], - pa_to_pte(kvtophys((vm_offset_t) pdp_user)) | INTEL_PTE_VALID | INTEL_PTE_WRITE | INTEL_PTE_USER); -#endif /* lin2l4num(VM_MIN_KERNEL_ADDRESS) != lin2l4num(VM_MAX_USER_ADDRESS) */ - for (int i = 0; i < PDPNUM_USER; i++) { - pt_entry_t *user_page_dir = (pt_entry_t *) kmem_cache_alloc(&pd_cache); - memset(user_page_dir, 0, INTEL_PGBYTES); - WRITE_PTE(&pdp_user[i + lin2pdpnum(VM_MIN_USER_ADDRESS)], // pdp_user - pa_to_pte(kvtophys((vm_offset_t)user_page_dir)) - | INTEL_PTE_VALID -#if (defined(__x86_64__) && !defined(MACH_HYP)) || defined(MACH_PV_PAGETABLES) - | INTEL_PTE_WRITE | INTEL_PTE_USER -#endif - ); - } - #ifdef MACH_PV_PAGETABLES // FIXME: use kmem_cache_alloc instead if (kmem_alloc_wired(kernel_map, @@ -1443,15 +1438,7 @@ pmap_t pmap_create(vm_size_t size) void pmap_destroy(pmap_t p) { -#if PAE - int i; -#endif - boolean_t free_all; - pt_entry_t *page_dir; - pt_entry_t *pdep; - phys_addr_t pa; int c, s; - vm_page_t m; if (p == PMAP_NULL) return; @@ -1466,87 +1453,54 @@ void pmap_destroy(pmap_t p) return; /* still in use */ } + /* + * Free the page table tree. + */ #if PAE - for (i = 0; i < lin2pdpnum(VM_MAX_USER_ADDRESS); i++) { #ifdef __x86_64__ -#ifdef USER32 - /* In this case we know we have one PDP for user space */ - pt_entry_t *pdp = (pt_entry_t *) ptetokv(p->l4base[lin2l4num(VM_MIN_USER_ADDRESS)]); -#else -#warning "TODO do 64-bit userspace need more that 512G?" - pt_entry_t *pdp = (pt_entry_t *) ptetokv(p->l4base[lin2l4num(VM_MIN_USER_ADDRESS)]); -#endif /* USER32 */ - page_dir = (pt_entry_t *) ptetokv(pdp[i]); + for (int l4i = 0; l4i < lin2l4num(VM_MAX_USER_ADDRESS); l4i++) { + pt_entry_t pdp = (pt_entry_t) p->l4base[l4i]; + if (!(pdp & INTEL_PTE_VALID)) + continue; + pt_entry_t *pdpbase = (pt_entry_t*) ptetokv(pdp); + for (int l3i = 0; l3i < 512; l3i++) { #else /* __x86_64__ */ - page_dir = (pt_entry_t *) ptetokv(p->pdpbase[i]); + pt_entry_t *pdpbase = p->pdpbase; + for (int l3i = 0; l3i < lin2pdpnum(VM_MAX_USER_ADDRESS); l3i++) { #endif /* __x86_64__ */ - free_all = i < lin2pdpnum(LINEAR_MIN_KERNEL_ADDRESS); + pt_entry_t pde = (pt_entry_t) pdpbase[l3i]; + if (!(pde & INTEL_PTE_VALID)) + continue; + pt_entry_t *pdebase = (pt_entry_t*) ptetokv(pde); + for (int l2i = 0; l2i < 512; l2i++) { #else /* PAE */ - free_all = FALSE; - page_dir = p->dirbase; + pt_entry_t *pdebase = p->dirbase; + for (int l2i = 0; l2i < lin2pdenum(VM_MAX_USER_ADDRESS); l2i++) { #endif /* PAE */ - -#ifdef __x86_64__ -#warning FIXME 64bit need to free l3 -#endif - /* - * Free the memory maps, then the - * pmap structure. - */ - for (pdep = page_dir; - (free_all - || pdep < &page_dir[lin2pdenum(LINEAR_MIN_KERNEL_ADDRESS)]) - && pdep < &page_dir[NPTES]; - pdep += ptes_per_vm_page) { - if (*pdep & INTEL_PTE_VALID) { - pa = pte_to_pa(*pdep); - assert(pa == (vm_offset_t) pa); - vm_object_lock(pmap_object); - m = vm_page_lookup(pmap_object, pa); - if (m == VM_PAGE_NULL) - panic("pmap_destroy: pte page not in object"); - vm_page_lock_queues(); -#ifdef MACH_PV_PAGETABLES - if (!hyp_mmuext_op_mfn (MMUEXT_UNPIN_TABLE, pa_to_mfn(pa))) - panic("pmap_destroy: couldn't unpin page %llx(%lx)\n", pa, (vm_offset_t) kv_to_ma(pa)); - pmap_set_page_readwrite((void*) phystokv(pa)); -#endif /* MACH_PV_PAGETABLES */ - vm_page_free(m); - inuse_ptepages_count--; - vm_page_unlock_queues(); - vm_object_unlock(pmap_object); - } - } -#ifdef MACH_PV_PAGETABLES - pmap_set_page_readwrite((void*) page_dir); -#endif /* MACH_PV_PAGETABLES */ - kmem_cache_free(&pd_cache, (vm_offset_t) page_dir); + pt_entry_t pte = (pt_entry_t) pdebase[l2i]; + if (!(pte & INTEL_PTE_VALID)) + continue; + kmem_cache_free(&pt_cache, (vm_offset_t)ptetokv(pte)); + } #if PAE - } - -#ifdef MACH_PV_PAGETABLES + kmem_cache_free(&pd_cache, (vm_offset_t)pdebase); + } #ifdef __x86_64__ - pmap_set_page_readwrite(p->l4base); - pmap_set_page_readwrite(p->user_l4base); - pmap_set_page_readwrite(p->user_pdpbase); + kmem_cache_free(&pdpt_cache, (vm_offset_t)pdpbase); + } #endif /* __x86_64__ */ - pmap_set_page_readwrite(p->pdpbase); -#endif /* MACH_PV_PAGETABLES */ +#endif /* PAE */ + /* Finally, free the page table tree root and the pmap itself */ +#if PAE #ifdef __x86_64__ - kmem_cache_free(&pdpt_cache, (vm_offset_t) pmap_ptp(p, VM_MIN_USER_ADDRESS)); -#if lin2l4num(VM_MIN_KERNEL_ADDRESS) != lin2l4num(VM_MAX_USER_ADDRESS) - // TODO kernel vm and user vm are not in the same l4 entry -#endif kmem_cache_free(&l4_cache, (vm_offset_t) p->l4base); -#ifdef MACH_PV_PAGETABLES - kmem_free(kernel_map, (vm_offset_t)p->user_l4base, INTEL_PGBYTES); - kmem_free(kernel_map, (vm_offset_t)p->user_pdpbase, INTEL_PGBYTES); -#endif /* MACH_PV_PAGETABLES */ #else /* __x86_64__ */ - kmem_cache_free(&pdpt_cache, (vm_offset_t) p->pdpbase); + kmem_cache_free(&pdpt_cache, (vm_offset_t) p->pdpbase); #endif /* __x86_64__ */ -#endif /* PAE */ +#else /* PAE */ + kmem_cache_free(&pd_cache, (vm_offset_t) p->dirbase); +#endif /* PAE */ kmem_cache_free(&pmap_cache, (vm_offset_t) p); } @@ -1756,7 +1710,7 @@ void pmap_remove( l = (s + PDE_MAPPED_SIZE) & ~(PDE_MAPPED_SIZE-1); if (l > e) l = e; - if (*pde & INTEL_PTE_VALID) { + if (pde && (*pde & INTEL_PTE_VALID)) { spte = (pt_entry_t *)ptetokv(*pde); spte = &spte[ptenum(s)]; epte = &spte[intel_btop(l-s)]; @@ -2036,86 +1990,24 @@ void pmap_protect( SPLX(spl); } +typedef pt_entry_t* (*pmap_level_getter_t)(const pmap_t pmap, vm_offset_t addr); /* - * Insert the given physical page (p) at - * the specified virtual address (v) in the - * target physical map with the protection requested. - * - * If specified, the page will be wired down, meaning - * that the related pte can not be reclaimed. - * - * NB: This is the only routine which MAY NOT lazy-evaluate - * or lose information. That is, this routine must actually - * insert this page into the given map NOW. - */ -void pmap_enter( - pmap_t pmap, - vm_offset_t v, - phys_addr_t pa, - vm_prot_t prot, - boolean_t wired) +* Expand one single level of the page table tree +*/ +static inline pt_entry_t* pmap_expand_level(pmap_t pmap, vm_offset_t v, int spl, + pmap_level_getter_t pmap_level, + pmap_level_getter_t pmap_level_upper, + int n_per_vm_page, + struct kmem_cache *cache) { - boolean_t is_physmem; pt_entry_t *pte; - pv_entry_t pv_h; - unsigned long i, pai; - pv_entry_t pv_e; - pt_entry_t template; - int spl; - phys_addr_t old_pa; - - assert(pa != vm_page_fictitious_addr); - if (pmap_debug) printf("pmap(%zx, %llx)\n", v, (unsigned long long) pa); - if (pmap == PMAP_NULL) - return; - -#if !MACH_KDB - if (pmap == kernel_pmap && (v < kernel_virtual_start || v >= kernel_virtual_end)) - panic("pmap_enter(%zx, %llx) falls in physical memory area!\n", v, (unsigned long long) pa); -#endif -#if !(__i486__ || __i586__ || __i686__) - if (pmap == kernel_pmap && (prot & VM_PROT_WRITE) == 0 - && !wired /* hack for io_wire */ ) { - /* - * Because the 386 ignores write protection in kernel mode, - * we cannot enter a read-only kernel mapping, and must - * remove an existing mapping if changing it. - */ - PMAP_READ_LOCK(pmap, spl); - - pte = pmap_pte(pmap, v); - if (pte != PT_ENTRY_NULL && *pte != 0) { - /* - * Invalidate the translation buffer, - * then remove the mapping. - */ - pmap_remove_range(pmap, v, pte, - pte + ptes_per_vm_page); - PMAP_UPDATE_TLBS(pmap, v, v + PAGE_SIZE); - } - PMAP_READ_UNLOCK(pmap, spl); - return; - } -#endif - - /* - * Must allocate a new pvlist entry while we're unlocked; - * Allocating may cause pageout (which will lock the pmap system). - * If we determine we need a pvlist entry, we will unlock - * and allocate one. Then we will retry, throughing away - * the allocated entry later (if we no longer need it). - */ - pv_e = PV_ENTRY_NULL; -Retry: - PMAP_READ_LOCK(pmap, spl); /* * Expand pmap to include this pte. Assume that * pmap is always expanded to include enough hardware * pages to map one VM page. */ - - while ((pte = pmap_pte(pmap, v)) == PT_ENTRY_NULL) { + while ((pte = pmap_level(pmap, v)) == PT_ENTRY_NULL) { /* * Need to allocate a new page-table page. */ @@ -2136,7 +2028,9 @@ Retry: */ PMAP_READ_UNLOCK(pmap, spl); - ptp = phystokv(pmap_page_table_page_alloc()); + while (!(ptp = kmem_cache_alloc(cache))) + VM_PAGE_WAIT((void (*)()) 0); + memset((void *)ptp, 0, PAGE_SIZE); /* * Re-lock the pmap and check that another thread has @@ -2146,12 +2040,12 @@ Retry: */ PMAP_READ_LOCK(pmap, spl); - if (pmap_pte(pmap, v) != PT_ENTRY_NULL) { + if (pmap_level(pmap, v) != PT_ENTRY_NULL) { /* * Oops... */ PMAP_READ_UNLOCK(pmap, spl); - pmap_page_table_page_dealloc(kvtophys(ptp)); + kmem_cache_free(cache, ptp); PMAP_READ_LOCK(pmap, spl); continue; } @@ -2159,8 +2053,8 @@ Retry: /* * Enter the new page table page in the page directory. */ - i = ptes_per_vm_page; - pdp = pmap_pde(pmap, v); + i = n_per_vm_page; + pdp = pmap_level_upper(pmap, v); do { #ifdef MACH_PV_PAGETABLES pmap_set_page_readonly((void *) ptp); @@ -2185,6 +2079,100 @@ Retry: */ continue; } + return pte; +} + +/* + * Expand, if required, the PMAP to include the virtual address V. + * PMAP needs to be locked, and it will be still locked on return. It + * can temporarily unlock the PMAP, during allocation or deallocation + * of physical pages. + */ +static inline pt_entry_t* pmap_expand(pmap_t pmap, vm_offset_t v, int spl) +{ +#ifdef PAE +#ifdef __x86_64__ + pmap_expand_level(pmap, v, spl, pmap_ptp, pmap_l4base, ptes_per_vm_page, &pdpt_cache); +#endif /* __x86_64__ */ + pmap_expand_level(pmap, v, spl, pmap_pde, pmap_ptp, ptes_per_vm_page, &pd_cache); +#endif /* PAE */ + return pmap_expand_level(pmap, v, spl, pmap_pte, pmap_pde, ptes_per_vm_page, &pt_cache); +} + +/* + * Insert the given physical page (p) at + * the specified virtual address (v) in the + * target physical map with the protection requested. + * + * If specified, the page will be wired down, meaning + * that the related pte can not be reclaimed. + * + * NB: This is the only routine which MAY NOT lazy-evaluate + * or lose information. That is, this routine must actually + * insert this page into the given map NOW. + */ +void pmap_enter( + pmap_t pmap, + vm_offset_t v, + phys_addr_t pa, + vm_prot_t prot, + boolean_t wired) +{ + boolean_t is_physmem; + pt_entry_t *pte; + pv_entry_t pv_h; + unsigned long i, pai; + pv_entry_t pv_e; + pt_entry_t template; + int spl; + phys_addr_t old_pa; + + assert(pa != vm_page_fictitious_addr); + if (pmap_debug) printf("pmap(%zx, %llx)\n", v, (unsigned long long) pa); + if (pmap == PMAP_NULL) + return; + +#if !MACH_KDB + if (pmap == kernel_pmap && (v < kernel_virtual_start || v >= kernel_virtual_end)) + panic("pmap_enter(%llx, %llx) falls in physical memory area!\n", v, (unsigned long long) pa); +#endif +#if !(__i486__ || __i586__ || __i686__) + if (pmap == kernel_pmap && (prot & VM_PROT_WRITE) == 0 + && !wired /* hack for io_wire */ ) { + /* + * Because the 386 ignores write protection in kernel mode, + * we cannot enter a read-only kernel mapping, and must + * remove an existing mapping if changing it. + */ + PMAP_READ_LOCK(pmap, spl); + + pte = pmap_pte(pmap, v); + if (pte != PT_ENTRY_NULL && *pte != 0) { + /* + * Invalidate the translation buffer, + * then remove the mapping. + */ + pmap_remove_range(pmap, v, pte, + pte + ptes_per_vm_page); + PMAP_UPDATE_TLBS(pmap, v, v + PAGE_SIZE); + } + PMAP_READ_UNLOCK(pmap, spl); + return; + } +#endif + + /* + * Must allocate a new pvlist entry while we're unlocked; + * Allocating may cause pageout (which will lock the pmap system). + * If we determine we need a pvlist entry, we will unlock + * and allocate one. Then we will retry, throughing away + * the allocated entry later (if we no longer need it). + */ + pv_e = PV_ENTRY_NULL; +Retry: + PMAP_READ_LOCK(pmap, spl); + + pte = pmap_expand(pmap, v, spl); if (vm_page_ready()) is_physmem = (vm_page_lookup_pa(pa) != NULL); @@ -2462,10 +2450,7 @@ void pmap_copy( */ void pmap_collect(pmap_t p) { - int i; - boolean_t free_all; - pt_entry_t *page_dir; - pt_entry_t *pdp, *ptp; + pt_entry_t *ptp; pt_entry_t *eptp; phys_addr_t pa; int spl, wired; @@ -2476,119 +2461,104 @@ void pmap_collect(pmap_t p) if (p == kernel_pmap) return; + /* + * Free the page table tree. + */ #if PAE - for (i = 0; i < lin2pdpnum(VM_MAX_USER_ADDRESS); i++) { #ifdef __x86_64__ -#ifdef USER32 - /* In this case we know we have one PDP for user space */ - pdp = (pt_entry_t *) ptetokv(p->l4base[lin2l4num(VM_MIN_USER_ADDRESS)]); -#else -#warning "TODO do 64-bit userspace need more that 512G?" - pdp = (pt_entry_t *) ptetokv(p->l4base[lin2l4num(VM_MIN_USER_ADDRESS)]); -#endif /* USER32 */ - page_dir = (pt_entry_t *) ptetokv(pdp[i]); + for (int l4i = 0; l4i < lin2l4num(VM_MAX_USER_ADDRESS); l4i++) { + pt_entry_t pdp = (pt_entry_t) p->l4base[l4i]; + if (!(pdp & INTEL_PTE_VALID)) + continue; + pt_entry_t *pdpbase = (pt_entry_t*) ptetokv(pdp); + for (int l3i = 0; l3i < 512; l3i++) { #else /* __x86_64__ */ - page_dir = (pt_entry_t *) ptetokv(p->pdpbase[i]); + pt_entry_t *pdpbase = p->pdpbase; + for (int l3i = 0; l3i < lin2pdpnum(VM_MAX_USER_ADDRESS); l3i++) { #endif /* __x86_64__ */ - free_all = i < lin2pdpnum(LINEAR_MIN_KERNEL_ADDRESS); -#else - i = 0; - free_all = FALSE; - page_dir = p->dirbase; -#endif - - /* - * Garbage collect map. - */ - PMAP_READ_LOCK(p, spl); - for (pdp = page_dir; - (free_all - || pdp < &page_dir[lin2pdenum(LINEAR_MIN_KERNEL_ADDRESS)]) - && pdp < &page_dir[NPTES]; - pdp += ptes_per_vm_page) { - if (*pdp & INTEL_PTE_VALID) { - - pa = pte_to_pa(*pdp); - ptp = (pt_entry_t *)phystokv(pa); - eptp = ptp + NPTES*ptes_per_vm_page; - - /* - * If the pte page has any wired mappings, we cannot - * free it. - */ - wired = 0; - { - pt_entry_t *ptep; - for (ptep = ptp; ptep < eptp; ptep++) { - if (*ptep & INTEL_PTE_WIRED) { - wired = 1; - break; - } - } - } - if (!wired) { - /* - * Remove the virtual addresses mapped by this pte page. - */ - { /*XXX big hack*/ - vm_offset_t va = pdenum2lin(pdp - page_dir - + i * NPTES); - if (p == kernel_pmap) - va = lintokv(va); - pmap_remove_range(p, - va, - ptp, - eptp); - } - - /* - * Invalidate the page directory pointer. - */ - { - int i = ptes_per_vm_page; - pt_entry_t *pdep = pdp; - do { + pt_entry_t pde = (pt_entry_t ) pdpbase[l3i]; + if (!(pde & INTEL_PTE_VALID)) + continue; + pt_entry_t *pdebase = (pt_entry_t*) ptetokv(pde); + for (int l2i = 0; l2i < 512; l2i++) { +#else /* PAE */ + pt_entry_t *pdebase = p->dirbase; + for (int l2i = 0; l2i < lin2pdenum(VM_MAX_USER_ADDRESS); l2i++) { +#endif /* PAE */ + pt_entry_t pte = (pt_entry_t) pdebase[l2i]; + if (!(pte & INTEL_PTE_VALID)) + continue; + + pa = pte_to_pa(pte); + ptp = (pt_entry_t *)phystokv(pa); + eptp = ptp + NPTES*ptes_per_vm_page; + + /* + * If the pte page has any wired mappings, we cannot + * free it. + */ + wired = 0; + { + pt_entry_t *ptep; + for (ptep = ptp; ptep < eptp; ptep++) { + if (*ptep & INTEL_PTE_WIRED) { + wired = 1; + break; + } + } + } + if (!wired) { + /* + * Remove the virtual addresses mapped by this pte page. + */ + { /*XXX big hack*/ + vm_offset_t va = pagenum2lin(l4i, l3i, l2i, 0); + if (p == kernel_pmap) + va = lintokv(va); + pmap_remove_range(p, va, ptp, eptp); + } + + /* + * Invalidate the page directory pointer. + */ + { + int i = ptes_per_vm_page; + pt_entry_t *pdep = &pdebase[l2i]; + do { #ifdef MACH_PV_PAGETABLES - unsigned long pte = *pdep; - void *ptable = (void*) ptetokv(pte); - if (!(hyp_mmu_update_pte(pa_to_ma(kvtophys((vm_offset_t)pdep++)), 0))) - panic("%s:%d could not clear pde %p\n",__FILE__,__LINE__,pdep-1); - if (!hyp_mmuext_op_mfn (MMUEXT_UNPIN_TABLE, kv_to_mfn(ptable))) - panic("couldn't unpin page %p(%lx)\n", ptable, (vm_offset_t) pa_to_ma(kvtophys((vm_offset_t)ptable))); - pmap_set_page_readwrite(ptable); + unsigned long pte = *pdep; + void *ptable = (void*) ptetokv(pte); + if (!(hyp_mmu_update_pte(pa_to_ma(kvtophys((vm_offset_t)pdep++)), 0))) + panic("%s:%d could not clear pde %p\n",__FILE__,__LINE__,pdep-1); + if (!hyp_mmuext_op_mfn (MMUEXT_UNPIN_TABLE, kv_to_mfn(ptable))) + panic("couldn't unpin page %p(%lx)\n", ptable, (vm_offset_t) pa_to_ma(kvtophys((vm_offset_t)ptable))); + pmap_set_page_readwrite(ptable); #else /* MACH_PV_PAGETABLES */ - *pdep++ = 0; + *pdep++ = 0; #endif /* MACH_PV_PAGETABLES */ - } while (--i > 0); - } + } while (--i > 0); + } - PMAP_READ_UNLOCK(p, spl); + PMAP_READ_UNLOCK(p, spl); - /* - * And free the pte page itself. - */ - { - vm_page_t m; - - vm_object_lock(pmap_object); - assert(pa == (vm_offset_t) pa); - m = vm_page_lookup(pmap_object, pa); - if (m == VM_PAGE_NULL) - panic("pmap_collect: pte page not in object"); - vm_page_lock_queues(); - vm_page_free(m); - inuse_ptepages_count--; - vm_page_unlock_queues(); - vm_object_unlock(pmap_object); - } + /* + * And free the pte page itself. + */ + kmem_cache_free(&pt_cache, (vm_offset_t)ptetokv(pte)); - PMAP_READ_LOCK(p, spl); - } - } - } + PMAP_READ_LOCK(p, spl); + + } + } #if PAE + // TODO check l2? + } +#ifdef __x86_64__ + // TODO check l3? } -#endif +#endif /* __x86_64__ */ +#endif /* PAE */ + PMAP_UPDATE_TLBS(p, VM_MIN_USER_ADDRESS, VM_MAX_USER_ADDRESS); PMAP_READ_UNLOCK(p, spl); diff --git a/i386/intel/pmap.h b/i386/intel/pmap.h index 4c1b9bd5..5fc7fb25 100644 --- a/i386/intel/pmap.h +++ b/i386/intel/pmap.h @@ -75,7 +75,6 @@ typedef phys_addr_t pt_entry_t; #define L4SHIFT 39 /* L4 shift */ #define L4MASK 0x1ff /* mask for L4 index */ #define PDPNUM_KERNEL (((VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) >> PDPSHIFT) + 1) -#define PDPNUM_USER (((VM_MAX_USER_ADDRESS - VM_MIN_USER_ADDRESS) >> PDPSHIFT) + 1) #define PDPMASK 0x1ff /* mask for page directory pointer index */ #else /* __x86_64__ */ #define PDPNUM 4 /* number of page directory pointers */ @@ -130,6 +129,26 @@ typedef phys_addr_t pt_entry_t; */ #define pdenum2lin(a) ((vm_offset_t)(a) << PDESHIFT) +#if PAE +#ifdef __x86_64__ +#define pagenum2lin(l4num, l3num, l2num, l1num) \ + (((vm_offset_t)(l4num) << L4SHIFT) + \ + ((vm_offset_t)(l3num) << PDPSHIFT) + \ + ((vm_offset_t)(l2num) << PDESHIFT) + \ + ((vm_offset_t)(l1num) << PTESHIFT)) +#else /* __x86_64__ */ +#define pagenum2lin(l4num, l3num, l2num, l1num) \ + (((vm_offset_t)(l3num) << PDPSHIFT) + \ + ((vm_offset_t)(l2num) << PDESHIFT) + \ + ((vm_offset_t)(l1num) << PTESHIFT)) +#endif +#else /* PAE */ +#define pagenum2lin(l4num, l3num, l2num, l1num) \ + (((vm_offset_t)(l2num) << PDESHIFT) + \ + ((vm_offset_t)(l1num) << PTESHIFT)) +#endif + + /* * Convert linear offset to page table index */ -- 2.30.2