On Dec 14, 2008, at 11:44 PM, Benjamin Herrenschmidt wrote:
This patch moves the whole no-hash TLB handling out of line into a
new tlb_nohash.c file, and implements some basic SMP support using
IPIs and/or broadcast tlbivax instructions.
Note that I'm using local invalidations for D->I cache coherency.
At worst, if another processor is trying to execute the same and
has the old entry in its TLB, it will just take a fault and re-do
the TLB flush locally (it won't re-do the cache flush in any case).
Signed-off-by: Benjamin Herrenschmidt <b...@kernel.crashing.org>
---
v2. This variant fixes usage of linux/spinlock.h instead of asm/
spinlock.h
v3. Invadvertently un-EXPORT_SYMBOL'ed some cache flush calls on ppc64
v4. Fix differences in local_* flush variants between CPU types and
corresponding clash with highmem code. Remove remaining _tlbie
calls
from nohash code.
arch/powerpc/include/asm/highmem.h | 4
arch/powerpc/include/asm/mmu.h | 3
arch/powerpc/include/asm/tlbflush.h | 84 ++++++--------
arch/powerpc/kernel/misc_32.S | 9 +
arch/powerpc/kernel/ppc_ksyms.c | 6 -
arch/powerpc/mm/Makefile | 2
arch/powerpc/mm/fault.c | 2
arch/powerpc/mm/mem.c | 2
arch/powerpc/mm/tlb_hash32.c | 4
arch/powerpc/mm/tlb_nohash.c | 209 ++++++++++++++++++++++++++
++++++++++
10 files changed, 268 insertions(+), 57 deletions(-)
Index: linux-work/arch/powerpc/mm/tlb_nohash.c
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-work/arch/powerpc/mm/tlb_nohash.c 2008-12-15
14:36:20.000000000 +1100
@@ -0,0 +1,209 @@
+/*
+ * This file contains the routines for TLB flushing.
+ * On machines where the MMU does not use a hash table to store
virtual to
+ * physical translations (ie, SW loaded TLBs or Book3E compilant
processors,
+ * this does -not- include 603 however which shares the
implementation with
+ * hash based processors)
+ *
+ * -- BenH
+ *
+ * Copyright 2008 Ben Herrenschmidt <b...@kernel.crashing.org>
+ * IBM Corp.
+ *
+ * Derived from arch/ppc/mm/init.c:
+ * Copyright (C) 1995-1996 Gary Thomas (g...@linuxppc.org)
+ *
+ * Modifications by Paul Mackerras (PowerMac) (pau...@cs.anu.edu.au)
+ * and Cort Dougan (PReP) (c...@cs.nmt.edu)
+ * Copyright (C) 1996 Paul Mackerras
+ *
+ * Derived from "arch/i386/mm/init.c"
+ * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/highmem.h>
+#include <linux/pagemap.h>
+#include <linux/preempt.h>
+#include <linux/spinlock.h>
+
+#include <asm/tlbflush.h>
+#include <asm/tlb.h>
+
+#include "mmu_decl.h"
+
+/*
+ * Basse TLB flushing operations:
One 's'
+ *
+ * - flush_tlb_mm(mm) flushes the specified mm context TLB's
+ * - flush_tlb_page(vma, vmaddr) flushes one page
+ * - flush_tlb_range(vma, start, end) flushes a range of pages
+ * - flush_tlb_kernel_range(start, end) flushes kernel pages
+ *
+ * - local_* variants of page and mm only apply to the current
+ * processor
+ */
+
+/*
+ * These are the base non-SMP variants of page and mm flushing
+ */
+void local_flush_tlb_mm(struct mm_struct *mm)
+{
+ unsigned int pid;
+
+ preempt_disable();
+ pid = mm->context.id;
+ if (pid != MMU_NO_CONTEXT)
+ _tlbil_pid(pid);
+ preempt_enable();
+}
+EXPORT_SYMBOL(local_flush_tlb_mm);
Do these really get called w/MMU_NO_CONTEXT? What is the calling code
trying to flush under those situations?
+
+void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long
vmaddr)
+{
+ unsigned int pid;
+
+ preempt_disable();
+ pid = vma ? vma->vm_mm->context.id : 0;
+ if (pid != MMU_NO_CONTEXT)
+ _tlbil_va(vmaddr, pid);
+ preempt_enable();
+}
+EXPORT_SYMBOL(local_flush_tlb_page);
+
+
+/*
+ * And here are the SMP non-local implementations
+ */
+#ifdef CONFIG_SMP
+
+static DEFINE_SPINLOCK(tlbivax_lock);
+
+struct tlb_flush_param {
+ unsigned long addr;
+ unsigned int pid;
+};
+
+static void do_flush_tlb_mm_ipi(void *param)
+{
+ struct tlb_flush_param *p = param;
+
+ _tlbil_pid(p ? p->pid : 0);
+}
+
+static void do_flush_tlb_page_ipi(void *param)
+{
+ struct tlb_flush_param *p = param;
+
+ _tlbil_va(p->addr, p->pid);
+}
+
+
+/* Note on invalidations and PID:
+ *
+ * We snapshot the PID with preempt disabled. At this point, it can
still
+ * change either because:
+ * - our context is being stolen (PID -> NO_CONTEXT) on another CPU
+ * - we are invaliating some target that isn't currently running here
+ * and is concurrently acquiring a new PID on another CPU
+ * - some other CPU is re-acquiring a lost PID for this mm
+ * etc...
+ *
+ * However, this shouldn't be a problem as we only guarantee
+ * invalidation of TLB entries present prior to this call, so we
+ * don't care about the PID changing, and invalidating a stale PID
+ * is generally harmless.
+ */
+
+void flush_tlb_mm(struct mm_struct *mm)
+{
+ cpumask_t cpu_mask;
+ unsigned int pid;
+
+ preempt_disable();
+ pid = mm->context.id;
+ if (unlikely(pid == MMU_NO_CONTEXT))
+ goto no_context;
+ cpu_mask = mm->cpu_vm_mask;
+ cpu_clear(smp_processor_id(), cpu_mask);
+ if (!cpus_empty(cpu_mask)) {
+ struct tlb_flush_param p = { .pid = pid };
+ smp_call_function_mask(cpu_mask, do_flush_tlb_mm_ipi, &p, 1);
+ }
+ _tlbil_pid(pid);
+ no_context:
+ preempt_enable();
+}
+EXPORT_SYMBOL(flush_tlb_mm);
+
+void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
+{
+ cpumask_t cpu_mask;
+ unsigned int pid;
+
+ preempt_disable();
+ pid = vma ? vma->vm_mm->context.id : 0;
+ if (unlikely(pid == MMU_NO_CONTEXT))
+ goto bail;
+ cpu_mask = vma->vm_mm->cpu_vm_mask;
+ cpu_clear(smp_processor_id(), cpu_mask);
+ if (!cpus_empty(cpu_mask)) {
+ /* If broadcast tlbivax is supported, use it */
+ if (mmu_has_feature(MMU_FTR_HAS_TLBIVAX_BCAST)) {
+ int lock = mmu_has_feature(MMU_FTR_TLBIVAX_NEED_LOCK);
+ if (lock)
+ spin_lock(&tlbivax_lock);
+ _tlbivax_bcast(vmaddr, pid);
+ if (lock)
+ spin_unlock(&tlbivax_lock);
+ goto bail;
+ } else {
+ struct tlb_flush_param p = { .pid = pid, .addr = vmaddr
};
+ smp_call_function_mask(cpu_mask,
+ do_flush_tlb_page_ipi, &p, 1);
+ }
+ }
+ _tlbil_va(vmaddr, pid);
+ bail:
+ preempt_enable();
+}
+EXPORT_SYMBOL(flush_tlb_page);
+
+#endif /* CONFIG_SMP */
+
+/*
+ * Flush kernel TLB entries in the given range
+ */
+void flush_tlb_kernel_range(unsigned long start, unsigned long end)
+{
+#ifdef CONFIG_SMP
+ preempt_disable();
+ smp_call_function(do_flush_tlb_mm_ipi, NULL, 1);
+ _tlbil_pid(0);
+ preempt_enable();
+#endif
+ _tlbil_pid(0);
+}
+EXPORT_SYMBOL(flush_tlb_kernel_range);
+
+/*
+ * Currently, for range flushing, we just do a full mm flush. This
should
+ * be optimized based on a threshold on the size of the range, since
+ * some implementation can stack multiple tlbivax before a tlbsync
but
+ * for now, we keep it that way
+ */
+void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+ unsigned long end)
+
+{
+ flush_tlb_mm(vma->vm_mm);
+}
+EXPORT_SYMBOL(flush_tlb_range);
[snip]
Index: linux-work/arch/powerpc/include/asm/mmu.h
===================================================================
--- linux-work.orig/arch/powerpc/include/asm/mmu.h 2008-12-15
14:36:20.000000000 +1100
+++ linux-work/arch/powerpc/include/asm/mmu.h 2008-12-15
14:36:20.000000000 +1100
@@ -15,6 +15,9 @@
#define MMU_FTR_TYPE_FSL_E ASM_CONST(0x00000010)
#define MMU_FTR_HAS_HIGH_BATS ASM_CONST(0x00010000)
#define MMU_FTR_BIG_PHYS ASM_CONST(0x00020000)
+#define MMU_FTR_HAS_TLBIVAX_BCAST ASM_CONST(0x00040000)
+#define MMU_FTR_HAS_TLBILX_PID ASM_CONST(0x00080000)
Can we make these FTR_USE_ instead of FTR_HAS_. On e500 we have
TLBIVAX_BCAST but dont plan to use it. I'd prefer not to have to
answer questions about that.
+#define MMU_FTR_TLBIVAX_NEED_LOCK ASM_CONST(0x00100000)
Is this really ivax lock or sync lock?
- k
_______________________________________________
Linuxppc-dev mailing list
Linuxppc-dev@ozlabs.org
https://ozlabs.org/mailman/listinfo/linuxppc-dev