On 09/04/16 16:13, Aneesh Kumar K.V wrote: > Core kernel don't track the page size of the va range that we are > invalidating. Hence we end up flushing tlb for the entire mm here. > Later patches will improve this. > > We also don't flush page walk cache separetly instead use RIC=2 when > flushing tlb, because we do a mmu gather flush after freeing page table. > > MMU_NO_CONTEXT is updated for hash. > > Signed-off-by: Aneesh Kumar K.V <aneesh.ku...@linux.vnet.ibm.com> > --- > arch/powerpc/include/asm/book3s/64/mmu-hash.h | 1 + > arch/powerpc/include/asm/book3s/64/tlbflush-hash.h | 13 +- > .../powerpc/include/asm/book3s/64/tlbflush-radix.h | 33 +++ > arch/powerpc/include/asm/book3s/64/tlbflush.h | 20 ++ > arch/powerpc/include/asm/tlbflush.h | 1 + > arch/powerpc/mm/Makefile | 2 +- > arch/powerpc/mm/tlb-radix.c | 243 > +++++++++++++++++++++ > 7 files changed, 308 insertions(+), 5 deletions(-) > create mode 100644 arch/powerpc/include/asm/book3s/64/tlbflush-radix.h > create mode 100644 arch/powerpc/mm/tlb-radix.c > > diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h > b/arch/powerpc/include/asm/book3s/64/mmu-hash.h > index 7da61b85406b..290157e8d5b2 100644 > --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h > +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h > @@ -119,6 +119,7 @@ > #define POWER7_TLB_SETS 128 /* # sets in POWER7 TLB */ > #define POWER8_TLB_SETS 512 /* # sets in POWER8 TLB */ > #define POWER9_TLB_SETS_HASH 256 /* # sets in POWER9 TLB Hash mode */ > +#define POWER9_TLB_SETS_RADIX 128 /* # sets in POWER9 TLB Radix > mode */ > > #ifndef __ASSEMBLY__ > > diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h > b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h > index ddce8477fe0c..e90310d1a519 100644 > --- a/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h > +++ b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h > @@ -1,8 +1,6 @@ > #ifndef _ASM_POWERPC_BOOK3S_64_TLBFLUSH_HASH_H > #define _ASM_POWERPC_BOOK3S_64_TLBFLUSH_HASH_H > > -#define MMU_NO_CONTEXT 0 > - > /* > * TLB flushing for 64-bit hash-MMU CPUs > */ > @@ -29,14 +27,21 @@ extern void __flush_tlb_pending(struct ppc64_tlb_batch > *batch); > > static inline void arch_enter_lazy_mmu_mode(void) > { > - struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch); > + struct ppc64_tlb_batch *batch; > > + if (radix_enabled()) > + return; > + batch = this_cpu_ptr(&ppc64_tlb_batch); > batch->active = 1; > } > > static inline void arch_leave_lazy_mmu_mode(void) > { > - struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch); > + struct ppc64_tlb_batch *batch; > + > + if (radix_enabled()) > + return; > + batch = this_cpu_ptr(&ppc64_tlb_batch); >
Are we better of doing #ifdef CONFIG_RADIX_MMU static inline arch_enter_lazy_mmu(...) { Actual code for HASH PTE's } #else static inline arch_enter_lazy_mmu(...) { } Unless you need a runtime switch -- which means we need both HPTE/RADIX to co-exist > if (batch->index) > __flush_tlb_pending(batch); > diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h > b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h > new file mode 100644 > index 000000000000..584ffa0a331f > --- /dev/null > +++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h > @@ -0,0 +1,33 @@ > +#ifndef _ASM_POWERPC_TLBFLUSH_RADIX_H > +#define _ASM_POWERPC_TLBFLUSH_RADIX_H > + > +struct vm_area_struct; > +struct mm_struct; > +struct mmu_gather; > + > +static inline int mmu_get_ap(int psize) > +{ > + return mmu_psize_defs[psize].ap; > +} > + Why the abstraction, the previous patches happily used mmu_psize_defs[psize].YYY > +extern void flush_rtlb_range(struct vm_area_struct *vma, unsigned long start, > + unsigned long end); > +extern void flush_rtlb_kernel_range(unsigned long start, unsigned long end); > + > +extern void local_flush_rtlb_mm(struct mm_struct *mm); > +extern void local_flush_rtlb_page(struct vm_area_struct *vma, unsigned long > vmaddr); > +extern void __local_flush_rtlb_page(struct mm_struct *mm, unsigned long > vmaddr, > + unsigned long ap, int nid); > +extern void rtlb_flush(struct mmu_gather *tlb); > +#ifdef CONFIG_SMP > +extern void flush_rtlb_mm(struct mm_struct *mm); > +extern void flush_rtlb_page(struct vm_area_struct *vma, unsigned long > vmaddr); > +extern void __flush_rtlb_page(struct mm_struct *mm, unsigned long vmaddr, > + unsigned long ap, int nid); > +#else > +#define flush_rtlb_mm(mm) local_flush_rtlb_mm(mm) > +#define flush_rtlb_page(vma,addr) local_flush_rtlb_page(vma,addr) > +#define __flush_rtlb_page(mm,addr,p,i) > __local_flush_rtlb_page(mm,addr,p,i) > +#endif > + > +#endif > diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h > b/arch/powerpc/include/asm/book3s/64/tlbflush.h > index 37d7f289ad42..66b7bc371491 100644 > --- a/arch/powerpc/include/asm/book3s/64/tlbflush.h > +++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h > @@ -1,51 +1,71 @@ > #ifndef _ASM_POWERPC_BOOK3S_64_TLBFLUSH_H > #define _ASM_POWERPC_BOOK3S_64_TLBFLUSH_H > > +#define MMU_NO_CONTEXT ~0UL > + > + > #include <asm/book3s/64/tlbflush-hash.h> > +#include <asm/book3s/64/tlbflush-radix.h> > > static inline void flush_tlb_range(struct vm_area_struct *vma, > unsigned long start, unsigned long end) > { > + if (radix_enabled()) > + return flush_rtlb_range(vma, start, end); > return flush_hltlb_range(vma, start, end); > } > > static inline void flush_tlb_kernel_range(unsigned long start, > unsigned long end) > { > + if (radix_enabled()) > + return flush_rtlb_kernel_range(start, end); > return flush_hltlb_kernel_range(start, end); > } > > static inline void local_flush_tlb_mm(struct mm_struct *mm) > { > + if (radix_enabled()) > + return local_flush_rtlb_mm(mm); > return local_flush_hltlb_mm(mm); > } > > static inline void local_flush_tlb_page(struct vm_area_struct *vma, > unsigned long vmaddr) > { > + if (radix_enabled()) > + return local_flush_rtlb_page(vma, vmaddr); > return local_flush_hltlb_page(vma, vmaddr); > } > > static inline void flush_tlb_page_nohash(struct vm_area_struct *vma, > unsigned long vmaddr) > { > + if (radix_enabled()) > + return flush_rtlb_page(vma, vmaddr); > return flush_hltlb_page_nohash(vma, vmaddr); > } > > static inline void tlb_flush(struct mmu_gather *tlb) > { > + if (radix_enabled()) > + return rtlb_flush(tlb); > return hltlb_flush(tlb); > } > > #ifdef CONFIG_SMP > static inline void flush_tlb_mm(struct mm_struct *mm) > { > + if (radix_enabled()) > + return flush_rtlb_mm(mm); > return flush_hltlb_mm(mm); > } > > static inline void flush_tlb_page(struct vm_area_struct *vma, > unsigned long vmaddr) > { > + if (radix_enabled()) > + return flush_rtlb_page(vma, vmaddr); > return flush_hltlb_page(vma, vmaddr); > } > #else > diff --git a/arch/powerpc/include/asm/tlbflush.h > b/arch/powerpc/include/asm/tlbflush.h > index 2fc4331c5bc5..1b38eea28e5a 100644 > --- a/arch/powerpc/include/asm/tlbflush.h > +++ b/arch/powerpc/include/asm/tlbflush.h > @@ -58,6 +58,7 @@ extern void __flush_tlb_page(struct mm_struct *mm, unsigned > long vmaddr, > > #elif defined(CONFIG_PPC_STD_MMU_32) > > +#define MMU_NO_CONTEXT (0) > /* > * TLB flushing for "classic" hash-MMU 32-bit CPUs, 6xx, 7xx, 7xxx > */ > diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile > index 9589236028f4..48aa11ae6a6b 100644 > --- a/arch/powerpc/mm/Makefile > +++ b/arch/powerpc/mm/Makefile > @@ -15,7 +15,7 @@ obj-$(CONFIG_PPC_BOOK3E) += > tlb_low_$(CONFIG_WORD_SIZE)e.o > hash64-$(CONFIG_PPC_NATIVE) := hash_native_64.o > obj-$(CONFIG_PPC_BOOK3E_64) += pgtable-book3e.o > obj-$(CONFIG_PPC_STD_MMU_64) += pgtable-hash64.o hash_utils_64.o slb_low.o > slb.o $(hash64-y) mmu_context_book3s64.o > -obj-$(CONFIG_PPC_RADIX_MMU) += pgtable-radix.o > +obj-$(CONFIG_PPC_RADIX_MMU) += pgtable-radix.o tlb-radix.o > obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu_32.o hash_low_32.o > mmu_context_hash32.o > obj-$(CONFIG_PPC_STD_MMU) += tlb_hash$(CONFIG_WORD_SIZE).o > ifeq ($(CONFIG_PPC_STD_MMU_64),y) > diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c > new file mode 100644 > index 000000000000..9129c0d6322c > --- /dev/null > +++ b/arch/powerpc/mm/tlb-radix.c > @@ -0,0 +1,243 @@ > +/* > + * TLB flush routines for radix kernels. > + * > + * Copyright (C) 2015 Aneesh Kumar K.V <aneesh.ku...@linux.vnet.ibm.com> > + * > + * This program is free software; you can redistribute it and/or > + * modify it under the terms of the GNU General Public License > + * as published by the Free Software Foundation; either version > + * 2 of the License, or (at your option) any later version. > + * > + */ > + > +#include <linux/mm.h> > +#include <linux/hugetlb.h> > +#include <linux/memblock.h> > + > +#include <asm/tlb.h> > +#include <asm/tlbflush.h> > + > +static DEFINE_RAW_SPINLOCK(native_tlbie_lock); > + > +static inline void __tlbiel_pid(unsigned long pid, int set) > +{ > + unsigned long rb,rs,ric,prs,r; > + > + rb = PPC_BIT(53); /* IS = 1 */ > + rb |= set << PPC_BITLSHIFT(51); Should we mask the set? set & cpu_to_be64(0x0000000000fff000)? > + rs = ((unsigned long)pid) << PPC_BITLSHIFT(31); > + prs = 1; /* process scoped */ > + r = 1; /* raidx format */ > + ric = 2; /* invalidate all the caches */ > + > + asm volatile("ptesync": : :"memory"); > + asm volatile(".long 0x7c000224 | (%0 << 11) | (%1 << 16) |" Can we have a usable name for the opcode, I know compilers might not support it yet but having a #define READABLE_OPCODE 0x7c000224 BTW, does this opcode work for both endians? > + "(%2 << 17) | (%3 << 18) | (%4 << 21)" > + : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : > "memory"); > + asm volatile("ptesync": : :"memory"); > +} > + > +/* > + * We use 128 set in radix mode and 256 set in hpt mode. Why? > + */ > +static inline void _tlbiel_pid(unsigned long pid) > +{ > + int set; > + > + for (set = 0; set < POWER9_TLB_SETS_RADIX ; set++) { > + __tlbiel_pid(pid, set); > + } > + return; > +} > + > +static inline void _tlbie_pid(unsigned long pid) > +{ > + unsigned long rb,rs,ric,prs,r; > + > + rb = PPC_BIT(53); /* IS = 1 */ > + rs = pid << PPC_BITLSHIFT(31); > + prs = 1; /* process scoped */ > + r = 1; /* raidx format */ > + ric = 2; /* invalidate all the caches */ > + > + asm volatile("ptesync": : :"memory"); > + asm volatile(".long 0x7c000264 | (%0 << 11) | (%1 << 16) |" > + "(%2 << 17) | (%3 << 18) | (%4 << 21)" > + : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : > "memory"); Same comments as above > + asm volatile("eieio; tlbsync; ptesync": : :"memory"); > +} > + > +static inline void _tlbiel_va(unsigned long va, unsigned long pid, > + unsigned long ap) > +{ > + unsigned long rb,rs,ric,prs,r; > + > + rb = va & ~(PPC_BITMASK(52, 63)); > + rb |= ap << PPC_BITLSHIFT(58); > + rs = pid << PPC_BITLSHIFT(31); > + prs = 1; /* process scoped */ > + r = 1; /* raidx format */ ^^ radix > + ric = 0; /* no cluster flush yet */ > + Should be explictly set IS = 0 > + asm volatile("ptesync": : :"memory"); > + asm volatile(".long 0x7c000224 | (%0 << 11) | (%1 << 16) |" > + "(%2 << 17) | (%3 << 18) | (%4 << 21)" > + : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : > "memory"); Ditto > + asm volatile("ptesync": : :"memory"); > +} > + > +static inline void _tlbie_va(unsigned long va, unsigned long pid, > + unsigned long ap) > +{ > + unsigned long rb,rs,ric,prs,r; > + > + rb = va & ~(PPC_BITMASK(52, 63)); > + rb |= ap << PPC_BITLSHIFT(58); > + rs = pid << PPC_BITLSHIFT(31); > + prs = 1; /* process scoped */ > + r = 1; /* raidx format */ ^^ radix > + ric = 0; /* no cluster flush yet */ > + > + asm volatile("ptesync": : :"memory"); > + asm volatile(".long 0x7c000264 | (%0 << 11) | (%1 << 16) |" > + "(%2 << 17) | (%3 << 18) | (%4 << 21)" > + : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : > "memory"); Same as above > + asm volatile("eieio; tlbsync; ptesync": : :"memory"); > +} > + > +/* > + * Base TLB flushing operations: > + * > + * - flush_tlb_mm(mm) flushes the specified mm context TLB's > + * - flush_tlb_page(vma, vmaddr) flushes one page > + * - flush_tlb_range(vma, start, end) flushes a range of pages > + * - flush_tlb_kernel_range(start, end) flushes kernel pages > + * > + * - local_* variants of page and mm only apply to the current > + * processor > + */ > +void local_flush_rtlb_mm(struct mm_struct *mm) > +{ > + unsigned int pid; > + > + preempt_disable(); > + pid = mm->context.id; > + if (pid != MMU_NO_CONTEXT) > + _tlbiel_pid(pid); > + preempt_enable(); > +} > +EXPORT_SYMBOL(local_flush_rtlb_mm); > + > +void __local_flush_rtlb_page(struct mm_struct *mm, unsigned long vmaddr, > + unsigned long ap, int nid) > +{ > + unsigned int pid; > + > + preempt_disable(); > + pid = mm ? mm->context.id : 0; > + if (pid != MMU_NO_CONTEXT) > + _tlbiel_va(vmaddr, pid, ap); > + preempt_enable(); > +} > + > +void local_flush_rtlb_page(struct vm_area_struct *vma, unsigned long vmaddr) > +{ > + __local_flush_rtlb_page(vma ? vma->vm_mm : NULL, vmaddr, > + mmu_get_ap(mmu_virtual_psize), 0); > +} > +EXPORT_SYMBOL(local_flush_rtlb_page); > + > +#ifdef CONFIG_SMP > +static int mm_is_core_local(struct mm_struct *mm) > +{ > + return cpumask_subset(mm_cpumask(mm), > + topology_sibling_cpumask(smp_processor_id())); Comment should say that this should be called with preempt_disable() > +} > + > +void flush_rtlb_mm(struct mm_struct *mm) > +{ > + unsigned int pid; > + > + preempt_disable(); > + pid = mm->context.id; > + if (unlikely(pid == MMU_NO_CONTEXT)) > + goto no_context; Why did we flush from this context? Is this common? > + > + if (!mm_is_core_local(mm)) { > + int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); > + I think any radix CPU will support this feature -- no? > + if (lock_tlbie) > + raw_spin_lock(&native_tlbie_lock); > + _tlbie_pid(pid); > + if (lock_tlbie) > + raw_spin_unlock(&native_tlbie_lock); > + } else > + _tlbiel_pid(pid); > +no_context: > + preempt_enable(); > +} > +EXPORT_SYMBOL(flush_rtlb_mm); > + > +void __flush_rtlb_page(struct mm_struct *mm, unsigned long vmaddr, > + unsigned long ap, int nid) > +{ > + unsigned int pid; > + > + preempt_disable(); > + pid = mm ? mm->context.id : 0; > + if (unlikely(pid == MMU_NO_CONTEXT)) > + goto bail; bail here and no_context above? > + if (!mm_is_core_local(mm)) { > + int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); > + > + if (lock_tlbie) > + raw_spin_lock(&native_tlbie_lock); > + _tlbie_va(vmaddr, pid, ap); > + if (lock_tlbie) > + raw_spin_unlock(&native_tlbie_lock); > + } else > + _tlbiel_va(vmaddr, pid, ap); > +bail: > + preempt_enable(); > +} > + > +void flush_rtlb_page(struct vm_area_struct *vma, unsigned long vmaddr) > +{ > + __flush_rtlb_page(vma ? vma->vm_mm : NULL, vmaddr, > + mmu_get_ap(mmu_virtual_psize), 0); > +} > +EXPORT_SYMBOL(flush_rtlb_page); > + > +#endif /* CONFIG_SMP */ > + > +void flush_rtlb_kernel_range(unsigned long start, unsigned long end) > +{ > + int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); > + > + if (lock_tlbie) > + raw_spin_lock(&native_tlbie_lock); > + _tlbie_pid(0); Oh! so PID can be 0 for vmalloc'ed regions? > + if (lock_tlbie) > + raw_spin_unlock(&native_tlbie_lock); > +} > +EXPORT_SYMBOL(flush_rtlb_kernel_range); > + > +/* > + * Currently, for range flushing, we just do a full mm flush. Because > + * we use this in code path where we don' track the page size. > + */ > +void flush_rtlb_range(struct vm_area_struct *vma, unsigned long start, > + unsigned long end) > + > +{ > + struct mm_struct *mm = vma->vm_mm; > + flush_rtlb_mm(mm); > +} > +EXPORT_SYMBOL(flush_rtlb_range); > + > + > +void rtlb_flush(struct mmu_gather *tlb) > +{ > + struct mm_struct *mm = tlb->mm; > + flush_rtlb_mm(mm); > +} > _______________________________________________ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev