On PPC_MMU_NOHASH processors that support a large number of contexts, implement a lazy flush_tlb_mm() that switches to a free context, marking the old one stale. The tlb is only flushed when no free contexts are available.
The lazy tlb flushing is controlled by the global variable tlb_lazy_flush which is set during init, dependent upon MMU_FTR_TYPE_47x. Signed-off-by: Dave Kleikamp <sha...@linux.vnet.ibm.com> --- arch/powerpc/mm/mmu_context_nohash.c | 154 +++++++++++++++++++++++++++++++--- arch/powerpc/mm/mmu_decl.h | 8 ++ arch/powerpc/mm/tlb_nohash.c | 28 +++++- 3 files changed, 174 insertions(+), 16 deletions(-) diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c index ddfd7ad..87c7dc2 100644 --- a/arch/powerpc/mm/mmu_context_nohash.c +++ b/arch/powerpc/mm/mmu_context_nohash.c @@ -17,10 +17,6 @@ * TODO: * * - The global context lock will not scale very well - * - The maps should be dynamically allocated to allow for processors - * that support more PID bits at runtime - * - Implement flush_tlb_mm() by making the context stale and picking - * a new one * - More aggressively clear stale map bits and maybe find some way to * also clear mm->cpu_vm_mask bits when processes are migrated */ @@ -52,6 +48,8 @@ #include <asm/mmu_context.h> #include <asm/tlbflush.h> +#include "mmu_decl.h" + static unsigned int first_context, last_context; static unsigned int next_context, nr_free_contexts; static unsigned long *context_map; @@ -59,9 +57,31 @@ static unsigned long *stale_map[NR_CPUS]; static struct mm_struct **context_mm; static DEFINE_RAW_SPINLOCK(context_lock); +int tlb_lazy_flush; +static int tlb_needs_flush[NR_CPUS]; +static unsigned long *context_available_map; +static unsigned int nr_stale_contexts; + #define CTX_MAP_SIZE \ (sizeof(unsigned long) * (last_context / BITS_PER_LONG + 1)) +/* + * if another cpu recycled the stale contexts, we need to flush + * the local TLB, so that we may re-use those contexts + */ +void flush_recycled_contexts(int cpu) +{ + int i; + + if (tlb_needs_flush[cpu]) { + pr_hard("[%d] flushing tlb\n", cpu); + _tlbil_all(); + for (i = cpu_first_thread_in_core(cpu); + i <= cpu_last_thread_in_core(cpu); i++) { + tlb_needs_flush[i] = 0; + } + } +} /* Steal a context from a task that has one at the moment. * @@ -147,7 +167,7 @@ static unsigned int steal_context_up(unsigned int id) pr_hardcont(" | steal %d from 0x%p", id, mm); /* Flush the TLB for that context */ - local_flush_tlb_mm(mm); + __local_flush_tlb_mm(mm); /* Mark this mm has having no context anymore */ mm->context.id = MMU_NO_CONTEXT; @@ -161,13 +181,19 @@ static unsigned int steal_context_up(unsigned int id) #ifdef DEBUG_MAP_CONSISTENCY static void context_check_map(void) { - unsigned int id, nrf, nact; + unsigned int id, nrf, nact, nstale; - nrf = nact = 0; + nrf = nact = nstale = 0; for (id = first_context; id <= last_context; id++) { int used = test_bit(id, context_map); - if (!used) - nrf++; + int allocated = tlb_lazy_flush && + test_bit(id, context_available_map); + if (!used) { + if (allocated) + nstale++; + else + nrf++; + } if (used != (context_mm[id] != NULL)) pr_err("MMU: Context %d is %s and MM is %p !\n", id, used ? "used" : "free", context_mm[id]); @@ -179,6 +205,11 @@ static void context_check_map(void) nr_free_contexts, nrf); nr_free_contexts = nrf; } + if (nstale != nr_stale_contexts) { + pr_err("MMU: Stale context count out of sync ! (%d vs %d)\n", + nr_stale_contexts, nstale); + nr_stale_contexts = nstale; + } if (nact > num_online_cpus()) pr_err("MMU: More active contexts than CPUs ! (%d vs %d)\n", nact, num_online_cpus()); @@ -189,6 +220,38 @@ static void context_check_map(void) static void context_check_map(void) { } #endif +/* + * On architectures that support a large number of contexts, the tlb + * can be flushed lazily by picking a new context and making the stale + * context unusable until a lazy tlb flush has been issued. + * + * context_available_map keeps track of both active and stale contexts, + * while context_map continues to track only active contexts. When the + * lazy tlb flush is triggered, context_map is copied to + * context_available_map, making the once-stale contexts available again + */ +static void recycle_stale_contexts(void) +{ + if (nr_free_contexts == 0 && nr_stale_contexts > 0) { + unsigned int cpu = smp_processor_id(); + unsigned int i; + + pr_hard("[%d] recycling stale contexts\n", cpu); + /* Time to flush the TLB's */ + memcpy(context_available_map, context_map, CTX_MAP_SIZE); + nr_free_contexts = nr_stale_contexts; + nr_stale_contexts = 0; + for_each_online_cpu(i) { + if ((i < cpu_first_thread_in_core(cpu)) || + (i > cpu_last_thread_in_core(cpu))) + tlb_needs_flush[i] = 1; + else + tlb_needs_flush[i] = 0; /* This core */ + } + _tlbil_all(); + } +} + void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next) { unsigned int i, id, cpu = smp_processor_id(); @@ -197,6 +260,8 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next) /* No lockless fast path .. yet */ raw_spin_lock(&context_lock); + flush_recycled_contexts(cpu); + pr_hard("[%d] activating context for mm @%p, active=%d, id=%d", cpu, next, next->context.active, next->context.id); @@ -227,7 +292,12 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next) id = next_context; if (id > last_context) id = first_context; - map = context_map; + + if (tlb_lazy_flush) { + recycle_stale_contexts(); + map = context_available_map; + } else + map = context_map; /* No more free contexts, let's try to steal one */ if (nr_free_contexts == 0) { @@ -250,6 +320,13 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next) if (id > last_context) id = first_context; } + if (tlb_lazy_flush) + /* + * In the while loop above, we set the bit in + * context_available_map, it also needs to be set in + * context_map + */ + __set_bit(id, context_map); stolen: next_context = id + 1; context_mm[id] = next; @@ -267,7 +344,7 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next) id, cpu_first_thread_in_core(cpu), cpu_last_thread_in_core(cpu)); - local_flush_tlb_mm(next); + __local_flush_tlb_mm(next); /* XXX This clear should ultimately be part of local_flush_tlb_mm */ for (i = cpu_first_thread_in_core(cpu); @@ -317,11 +394,61 @@ void destroy_context(struct mm_struct *mm) mm->context.active = 0; #endif context_mm[id] = NULL; - nr_free_contexts++; + + if (tlb_lazy_flush) + nr_stale_contexts++; + else + nr_free_contexts++; } raw_spin_unlock_irqrestore(&context_lock, flags); } +/* + * This is called from flush_tlb_mm(). Mark the current context as stale + * and grab an available one. The tlb will be flushed when no more + * contexts are available + */ +void lazy_flush_context(struct mm_struct *mm) +{ + unsigned int id; + unsigned long flags; + unsigned long *map; + + raw_spin_lock_irqsave(&context_lock, flags); + + id = mm->context.id; + if (unlikely(id == MMU_NO_CONTEXT)) + goto no_context; + + /* + * Make the existing context stale. It remains in + * context_available_map as long as nr_free_contexts remains non-zero + */ + __clear_bit(id, context_map); + context_mm[id] = NULL; + nr_stale_contexts++; + + recycle_stale_contexts(); + BUG_ON(nr_free_contexts == 0); + + nr_free_contexts--; + id = last_context; + map = context_available_map; + while (__test_and_set_bit(id, map)) { + id = find_next_zero_bit(map, last_context+1, id); + if (id > last_context) + id = first_context; + } + set_bit(id, context_map); + next_context = id + 1; + context_mm[id] = mm; + mm->context.id = id; + if (current->active_mm == mm) + set_context(id, mm->pgd); +no_context: + raw_spin_unlock_irqrestore(&context_lock, flags); +} + #ifdef CONFIG_SMP static int __cpuinit mmu_context_cpu_notify(struct notifier_block *self, @@ -407,6 +534,7 @@ void __init mmu_context_init(void) } else if (mmu_has_feature(MMU_FTR_TYPE_47x)) { first_context = 1; last_context = 65535; + tlb_lazy_flush = 1; } else { first_context = 1; last_context = 255; @@ -419,6 +547,8 @@ void __init mmu_context_init(void) * Allocate the maps used by context management */ context_map = alloc_bootmem(CTX_MAP_SIZE); + if (tlb_lazy_flush) + context_available_map = alloc_bootmem(CTX_MAP_SIZE); context_mm = alloc_bootmem(sizeof(void *) * (last_context + 1)); stale_map[0] = alloc_bootmem(CTX_MAP_SIZE); diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index 63b84a0..64240f1 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -25,6 +25,14 @@ #ifdef CONFIG_PPC_MMU_NOHASH /* + * Lazy tlb flush + */ +extern int tlb_lazy_flush; +extern void flush_recycled_contexts(int); +void lazy_flush_context(struct mm_struct *mm); +void __local_flush_tlb_mm(struct mm_struct *mm); + +/* * On 40x and 8xx, we directly inline tlbia and tlbivax */ #if defined(CONFIG_40x) || defined(CONFIG_8xx) diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c index fe391e9..264d0ea 100644 --- a/arch/powerpc/mm/tlb_nohash.c +++ b/arch/powerpc/mm/tlb_nohash.c @@ -36,6 +36,7 @@ #include <linux/spinlock.h> #include <linux/memblock.h> +#include <asm/mmu_context.h> #include <asm/tlbflush.h> #include <asm/tlb.h> #include <asm/code-patching.h> @@ -117,7 +118,7 @@ unsigned long linear_map_top; /* Top of linear mapping */ /* * These are the base non-SMP variants of page and mm flushing */ -void local_flush_tlb_mm(struct mm_struct *mm) +void __local_flush_tlb_mm(struct mm_struct *mm) { unsigned int pid; @@ -127,6 +128,14 @@ void local_flush_tlb_mm(struct mm_struct *mm) _tlbil_pid(pid); preempt_enable(); } + +void local_flush_tlb_mm(struct mm_struct *mm) +{ + if (tlb_lazy_flush) + lazy_flush_context(mm); + else + __local_flush_tlb_mm(mm); +} EXPORT_SYMBOL(local_flush_tlb_mm); void __local_flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr, @@ -166,13 +175,19 @@ struct tlb_flush_param { unsigned int pid; unsigned int tsize; unsigned int ind; + struct mm_struct *mm; }; static void do_flush_tlb_mm_ipi(void *param) { struct tlb_flush_param *p = param; - _tlbil_pid(p ? p->pid : 0); + if (tlb_lazy_flush && p) { + flush_recycled_contexts(smp_processor_id()); + if (current->active_mm == p->mm) + set_context(p->pid, p->mm->pgd); + } else + _tlbil_pid(p ? p->pid : 0); } static void do_flush_tlb_page_ipi(void *param) @@ -207,13 +222,18 @@ void flush_tlb_mm(struct mm_struct *mm) pid = mm->context.id; if (unlikely(pid == MMU_NO_CONTEXT)) goto no_context; + if (tlb_lazy_flush) { + lazy_flush_context(mm); + pid = mm->context.id; + } if (!mm_is_core_local(mm)) { - struct tlb_flush_param p = { .pid = pid }; + struct tlb_flush_param p = { .pid = pid, .mm = mm }; /* Ignores smp_processor_id() even if set. */ smp_call_function_many(mm_cpumask(mm), do_flush_tlb_mm_ipi, &p, 1); } - _tlbil_pid(pid); + if (!tlb_lazy_flush) + _tlbil_pid(pid); no_context: preempt_enable(); } -- 1.7.2.2 _______________________________________________ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev