This sets the HAVE_ARCH_HUGE_VMAP option, and defines the required page table functions.
This will not enable huge iomaps, because powerpc/64 ioremap does not call ioremap_page_range. That is done in a later change. HAVE_ARCH_HUGE_VMAP facilities will be used to enable huge pages for vmalloc memory in a set of generic kernel changes. Combined, this improves cached `git diff` performance by about 5% on a 2-node POWER9 with 32MB dentry cache hash, by allowing the dentry/inode hashes to be mapped with 2MB pages: Profiling git diff dTLB misses with a vanilla kernel: 81.75% git [kernel.vmlinux] [k] __d_lookup_rcu 7.21% git [kernel.vmlinux] [k] strncpy_from_user 1.77% git [kernel.vmlinux] [k] find_get_entry 1.59% git [kernel.vmlinux] [k] kmem_cache_free 40,168 dTLB-miss 0.100342754 seconds time elapsed With powerpc huge vmap and generic huge vmap vmalloc: 2,987 dTLB-miss 0.095933138 seconds time elapsed Signed-off-by: Nicholas Piggin <npig...@gmail.com> --- arch/powerpc/Kconfig | 1 + arch/powerpc/mm/book3s64/radix_pgtable.c | 93 ++++++++++++++++++++++++ 2 files changed, 94 insertions(+) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 8c1c636308c8..f0e5b38d52e8 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -167,6 +167,7 @@ config PPC select GENERIC_STRNLEN_USER select GENERIC_TIME_VSYSCALL select HAVE_ARCH_AUDITSYSCALL + select HAVE_ARCH_HUGE_VMAP if PPC_BOOK3S_64 && PPC_RADIX_MMU select HAVE_ARCH_JUMP_LABEL select HAVE_ARCH_KASAN if PPC32 select HAVE_ARCH_KGDB diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index c9bcf428dd2b..3bc9ade56277 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -1122,3 +1122,96 @@ void radix__ptep_modify_prot_commit(struct vm_area_struct *vma, set_pte_at(mm, addr, ptep, pte); } + +int __init arch_ioremap_pud_supported(void) +{ + return radix_enabled(); +} + +int __init arch_ioremap_pmd_supported(void) +{ + return radix_enabled(); +} + +int p4d_free_pud_page(p4d_t *p4d, unsigned long addr) +{ + return 0; +} + +int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot) +{ + pte_t *ptep = (pte_t *)pud; + pte_t new_pud = pfn_pte(__phys_to_pfn(addr), prot); + + set_pte_at(&init_mm, 0 /* radix unused */, ptep, new_pud); + + return 1; +} + +int pud_clear_huge(pud_t *pud) +{ + if (pud_huge(*pud)) { + pud_clear(pud); + return 1; + } + + return 0; +} + +int pud_free_pmd_page(pud_t *pud, unsigned long addr) +{ + pmd_t *pmd; + int i; + + pmd = (pmd_t *)pud_page_vaddr(*pud); + pud_clear(pud); + + flush_tlb_kernel_range(addr, addr + PUD_SIZE); + + for (i = 0; i < PTRS_PER_PMD; i++) { + if (!pmd_none(pmd[i])) { + pte_t *pte; + pte = (pte_t *)pmd_page_vaddr(pmd[i]); + + pte_free_kernel(&init_mm, pte); + } + } + + pmd_free(&init_mm, pmd); + + return 1; +} + +int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot) +{ + pte_t *ptep = (pte_t *)pmd; + pte_t new_pmd = pfn_pte(__phys_to_pfn(addr), prot); + + set_pte_at(&init_mm, 0 /* radix unused */, ptep, new_pmd); + + return 1; +} + +int pmd_clear_huge(pmd_t *pmd) +{ + if (pmd_huge(*pmd)) { + pmd_clear(pmd); + return 1; + } + + return 0; +} + +int pmd_free_pte_page(pmd_t *pmd, unsigned long addr) +{ + pte_t *pte; + + pte = (pte_t *)pmd_page_vaddr(*pmd); + pmd_clear(pmd); + + flush_tlb_kernel_range(addr, addr + PMD_SIZE); + + pte_free_kernel(&init_mm, pte); + + return 1; +} -- 2.20.1