This badly needs breaking up, and a better changelog... oh well...

The big changes:

* The "ppc64_caches" structure is now "powerpc_caches" and is used on
  both PPC32 and PPC64.  I hated staring at the pages and pages of
  assembly code, so nearly all of the functions are now C with tiny
  snippets of inline ASM in the loops.

* Lots of ugly assembly functions in arch/powerpc/kernel/misc_*.S were
  rewritten as cleaner inline ASM in arch/powerpc/mm/cache.c

* I'm not sure that the physical address functions from those files
  actually came out cleaner, but they are now more correct.

* I'm not 100% sure I like the new FOR_EACH_CACHE_LINE() macro, but it
  sure does make a lot of the other code much cleaner.

* I have a bit of a temptation to try to merge the 32/64-bit variants
  of copy_page() into a single C function.  A quick test seems to show
  that I can get nearly identical output to the 64-bit ASM with very
  little work.

---
 arch/powerpc/include/asm/cache.h             |  155 ++++++++++++---
 arch/powerpc/include/asm/cacheflush.h        |    3 -
 arch/powerpc/include/asm/page.h              |    6 +
 arch/powerpc/include/asm/page_32.h           |    4 +-
 arch/powerpc/include/asm/page_64.h           |   17 --
 arch/powerpc/kernel/align.c                  |    7 +-
 arch/powerpc/kernel/asm-offsets.c            |   13 +-
 arch/powerpc/kernel/head_32.S                |    9 +-
 arch/powerpc/kernel/head_64.S                |    2 +-
 arch/powerpc/kernel/misc_32.S                |  193 ------------------
 arch/powerpc/kernel/misc_64.S                |  182 -----------------
 arch/powerpc/kernel/ppc_ksyms.c              |    3 -
 arch/powerpc/kernel/setup-common.c           |  103 ++++++++++
 arch/powerpc/kernel/setup.h                  |    1 +
 arch/powerpc/kernel/setup_32.c               |   11 +-
 arch/powerpc/kernel/setup_64.c               |  118 +----------
 arch/powerpc/kernel/vdso.c                   |   27 +--
 arch/powerpc/lib/copypage_64.S               |   10 +-
 arch/powerpc/mm/Makefile                     |    2 +-
 arch/powerpc/mm/cache.c                      |  279 ++++++++++++++++++++++++++
 arch/powerpc/mm/dma-noncoherent.c            |    2 +-
 arch/powerpc/platforms/52xx/lite5200_sleep.S |    9 +-
 arch/powerpc/platforms/powermac/pci.c        |    2 +-
 arch/powerpc/xmon/xmon.c                     |   53 +++---
 drivers/macintosh/smu.c                      |    8 +-
 25 files changed, 599 insertions(+), 620 deletions(-)
 create mode 100644 arch/powerpc/mm/cache.c

diff --git a/arch/powerpc/include/asm/cache.h b/arch/powerpc/include/asm/cache.h
index 4b50941..b1dc08f 100644
--- a/arch/powerpc/include/asm/cache.h
+++ b/arch/powerpc/include/asm/cache.h
@@ -3,47 +3,142 @@
 
 #ifdef __KERNEL__
 
-
-/* bytes per L1 cache line */
-#if defined(CONFIG_8xx) || defined(CONFIG_403GCX)
-#define L1_CACHE_SHIFT         4
-#define MAX_COPY_PREFETCH      1
+/*
+ * Various PowerPC CPUs which are otherwise compatible have different L1
+ * cache line sizes.
+ *
+ * Unfortunately, lots of kernel code assumes that L1_CACHE_BYTES and
+ * L1_CACHE_SHIFT are compile-time constants that can be used to align
+ * data-structures to avoid false cacheline sharing, so we can't just
+ * compute them at runtime from the cputable values.
+ *
+ * So for alignment purposes, we will compute these values as safe maximums
+ * of all the CPU support compiled into the kernel.
+ */
+#if defined(CONFIG_PPC64) || defined(CONFIG_PPC_47x)
+# define L1_CACHE_SHIFT_MAX 7 /* 128-byte cache blocks */
 #elif defined(CONFIG_PPC_E500MC)
-#define L1_CACHE_SHIFT         6
-#define MAX_COPY_PREFETCH      4
-#elif defined(CONFIG_PPC32)
-#define MAX_COPY_PREFETCH      4
-#if defined(CONFIG_PPC_47x)
-#define L1_CACHE_SHIFT         7
+# define L1_CACHE_SHIFT_MAX 6 /* 64-byte cache blocks */
 #else
-#define L1_CACHE_SHIFT         5
+# define L1_CACHE_SHIFT_MAX 5 /* 32-byte cache blocks */
 #endif
+#define L1_CACHE_BYTES_MAX (1 << L1_CACHE_SHIFT_MAX)
+
+#define L1_CACHE_SHIFT  L1_CACHE_SHIFT_MAX
+#define L1_CACHE_BYTES  L1_CACHE_BYTES_MAX
+#define SMP_CACHE_BYTES L1_CACHE_BYTES_MAX
+
+/*
+ * Unfortunately, for other purposes, we can't just use a safe maximum value
+ * because it gets used in loops when invalidating or clearing cachelines and
+ * it would be very bad to only flush/invalidate/zero/etc every 4th one.
+ *
+ * During early initialization we load these values from the device-tree and
+ * the cputable into the powerpc_caches structure, but we need to be able to
+ * clear pages before that occurs, so these need sane default values.
+ *
+ * As explained in the powerpc_caches structure definition, the defaults
+ * should be safe minimums, so that's what we compute here.
+ */
+#if defined(CONFIG_8xx) || defined(CONFIG_403GCX)
+# define L1_CACHE_SHIFT_MIN 4 /* 16-byte cache blocks */
+#elif defined(CONFIG_PPC32)
+# define L1_CACHE_SHIFT_MIN 5 /* 32-byte cache blocks */
 #else /* CONFIG_PPC64 */
-#define L1_CACHE_SHIFT         7
+# define L1_CACHE_SHIFT_MIN 6 /* 64-byte cache blocks */
 #endif
+#define L1_CACHE_BYTES_MIN (1 << L1_CACHE_SHIFT_MIN)
 
-#define        L1_CACHE_BYTES          (1 << L1_CACHE_SHIFT)
+/*
+ * Apparently the 8xx and the 403GCX have tiny caches, so they never prefetch
+ * more than a single cacheline in the ASM memory copy functions.
+ *
+ * All other 32-bit CPUs prefetch 4 cachelines, and the 64-bit CPUs have
+ * their own copy routines which prefetch the entire page.
+ */
+#ifdef PPC32
+# if defined(CONFIG_8xx) || defined(CONFIG_403GCX)
+#  define MAX_COPY_PREFETCH 1
+# else
+#  define MAX_COPY_PREFETCH 4
+# endif
+#endif
 
-#define        SMP_CACHE_BYTES         L1_CACHE_BYTES
+#ifndef __ASSEMBLY__
 
-#if defined(__powerpc64__) && !defined(__ASSEMBLY__)
-struct ppc64_caches {
-       u32     dsize;                  /* L1 d-cache size */
-       u32     dline_size;             /* L1 d-cache line size */
-       u32     log_dline_size;
-       u32     dlines_per_page;
-       u32     isize;                  /* L1 i-cache size */
-       u32     iline_size;             /* L1 i-cache line size */
-       u32     log_iline_size;
-       u32     ilines_per_page;
-};
+/*
+ * A handy macro to iterate over all the cachelines referring to memory from
+ * "START" through "STOP - 1", inclusive.
+ */
+#define FOR_EACH_CACHELINE(LINE, START, STOP, CACHE)                   \
+       for (u32 linesize__ = powerpc_caches.CACHE##_block_bytes,       \
+                       (LINE) = (START) & ~(linesize__ - 1);           \
+                       (LINE) < (STOP); (LINE) += linesize__)
+
+/* Write out a data cache block if it is dirty */
+static inline void dcbst(unsigned long addr)
+{
+       asm volatile("dcbst %y0" :: "Z"(addr) : "memory");
+}
 
-extern struct ppc64_caches ppc64_caches;
-#endif /* __powerpc64__ && ! __ASSEMBLY__ */
+/* Invalidate a data cache block (will lose data if dirty!) */
+static inline void dcbi(unsigned long addr)
+{
+       asm volatile("dcbi %y0" :: "Z"(addr) : "memory");
+}
+
+/* Write out (if dirty) and invalidate a data cache block */
+static inline void dcbf(unsigned long addr)
+{
+       asm volatile("dcbf %y0" :: "Z"(addr) : "memory");
+}
+
+/* Populate a data cache block with zeros */
+static inline void dcbz(unsigned long addr)
+{
+       asm volatile("dcbz %y0" :: "Z"(addr) : "memory");
+}
+
+/* Invalidate an instruction cache block */
+static inline void icbi(unsigned long addr)
+{
+       asm volatile("icbi %y0" :: "Z"(addr) : "memory");
+}
+
+/*
+ * This structure contains the various PowerPC cache parameters computed
+ * shortly after the device-tree has been unflattened during boot.
+ *
+ * Prior to that they have statically initialized values from L1_CACHE_*_MIN
+ * computed above.
+ *
+ * NOTE: If the dcache/icache are separate then ucache_* should be zeroed,
+ *       otherwise dcache == icache == ucache.
+ */
+struct powerpc_caches {
+       /* Data cache parameters */
+       u32 dcache_total_bytes;
+       u32 dcache_block_bytes;
+       u32 dcache_block_shift;
+       u32 dcache_blocks_per_page;
+
+       /* Instruction cache parameters */
+       u32 icache_total_bytes;
+       u32 icache_block_bytes;
+       u32 icache_block_shift;
+       u32 icache_blocks_per_page;
+
+       /* Unified cache parameters (If != 0, all 3 caches must be equal) */
+       u32 ucache_total_bytes;
+       u32 ucache_block_bytes;
+       u32 ucache_block_shift;
+       u32 ucache_blocks_per_page;
+};
+extern struct powerpc_caches powerpc_caches;
 
-#if !defined(__ASSEMBLY__)
 #define __read_mostly __attribute__((__section__(".data..read_mostly")))
-#endif
+
+#endif /* not __ASSEMBLY__ */
 
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_CACHE_H */
diff --git a/arch/powerpc/include/asm/cacheflush.h 
b/arch/powerpc/include/asm/cacheflush.h
index ab9e402..8646443 100644
--- a/arch/powerpc/include/asm/cacheflush.h
+++ b/arch/powerpc/include/asm/cacheflush.h
@@ -47,12 +47,9 @@ extern void __flush_dcache_icache_phys(unsigned long 
physaddr);
 #endif /* CONFIG_PPC32 && !CONFIG_BOOKE */
 
 extern void flush_dcache_range(unsigned long start, unsigned long stop);
-#ifdef CONFIG_PPC32
 extern void clean_dcache_range(unsigned long start, unsigned long stop);
 extern void invalidate_dcache_range(unsigned long start, unsigned long stop);
-#endif /* CONFIG_PPC32 */
 #ifdef CONFIG_PPC64
-extern void flush_inval_dcache_range(unsigned long start, unsigned long stop);
 extern void flush_dcache_phys_range(unsigned long start, unsigned long stop);
 #endif
 
diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index dd9c4fd..b2e24ce 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -286,11 +286,17 @@ static inline int hugepd_ok(hugepd_t hpd)
 #endif /* CONFIG_HUGETLB_PAGE */
 
 struct page;
+extern void clear_pages(void *page, int order);
 extern void clear_user_page(void *page, unsigned long vaddr, struct page *pg);
 extern void copy_user_page(void *to, void *from, unsigned long vaddr,
                struct page *p);
 extern int page_is_ram(unsigned long pfn);
 
+static inline void clear_page(void *page)
+{
+       clear_pages(page, 0);
+}
+
 #ifdef CONFIG_PPC_SMLPAR
 void arch_free_page(struct page *page, int order);
 #define HAVE_ARCH_FREE_PAGE
diff --git a/arch/powerpc/include/asm/page_32.h 
b/arch/powerpc/include/asm/page_32.h
index 68d73b2..12ae694 100644
--- a/arch/powerpc/include/asm/page_32.h
+++ b/arch/powerpc/include/asm/page_32.h
@@ -10,7 +10,7 @@
 #define VM_DATA_DEFAULT_FLAGS  VM_DATA_DEFAULT_FLAGS32
 
 #ifdef CONFIG_NOT_COHERENT_CACHE
-#define ARCH_DMA_MINALIGN      L1_CACHE_BYTES
+#define ARCH_DMA_MINALIGN      L1_CACHE_BYTES_MAX
 #endif
 
 #ifdef CONFIG_PTE_64BIT
@@ -37,8 +37,6 @@ typedef unsigned long pte_basic_t;
 #endif
 
 struct page;
-extern void clear_pages(void *page, int order);
-static inline void clear_page(void *page) { clear_pages(page, 0); }
 extern void copy_page(void *to, void *from);
 
 #include <asm-generic/getorder.h>
diff --git a/arch/powerpc/include/asm/page_64.h 
b/arch/powerpc/include/asm/page_64.h
index fb40ede..7e156f6 100644
--- a/arch/powerpc/include/asm/page_64.h
+++ b/arch/powerpc/include/asm/page_64.h
@@ -42,23 +42,6 @@
 
 typedef unsigned long pte_basic_t;
 
-static __inline__ void clear_page(void *addr)
-{
-       unsigned long lines, line_size;
-
-       line_size = ppc64_caches.dline_size;
-       lines = ppc64_caches.dlines_per_page;
-
-       __asm__ __volatile__(
-       "mtctr  %1      # clear_page\n\
-1:      dcbz   0,%0\n\
-       add     %0,%0,%3\n\
-       bdnz+   1b"
-        : "=r" (addr)
-        : "r" (lines), "0" (addr), "r" (line_size)
-       : "ctr", "memory");
-}
-
 extern void copy_page(void *to, void *from);
 
 /* Log 2 of page table size */
diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c
index 8184ee9..debfb99 100644
--- a/arch/powerpc/kernel/align.c
+++ b/arch/powerpc/kernel/align.c
@@ -233,14 +233,9 @@ static inline unsigned make_dsisr(unsigned instr)
  */
 static int emulate_dcbz(struct pt_regs *regs, unsigned char __user *addr)
 {
+       int i, size = powerpc_caches.dcache_block_bytes;
        long __user *p;
-       int i, size;
 
-#ifdef __powerpc64__
-       size = ppc64_caches.dline_size;
-#else
-       size = L1_CACHE_BYTES;
-#endif
        p = (long __user *) (regs->dar & -size);
        if (user_mode(regs) && !access_ok(VERIFY_WRITE, p, size))
                return -EFAULT;
diff --git a/arch/powerpc/kernel/asm-offsets.c 
b/arch/powerpc/kernel/asm-offsets.c
index 7c5324f..505b25a 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -126,13 +126,14 @@ int main(void)
        DEFINE(TI_TASK, offsetof(struct thread_info, task));
        DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
 
+       DEFINE(DCACHE_BLOCK_SHIFT,      offsetof(struct powerpc_caches, 
dcache_block_shift));
+       DEFINE(DCACHE_BLOCK_BYTES,      offsetof(struct powerpc_caches, 
dcache_block_bytes));
+       DEFINE(DCACHE_BLOCKS_PER_PAGE,  offsetof(struct powerpc_caches, 
dcache_blocks_per_page));
+       DEFINE(ICACHE_BLOCK_SHIFT,      offsetof(struct powerpc_caches, 
icache_block_shift));
+       DEFINE(ICACHE_BLOCK_BYTES,      offsetof(struct powerpc_caches, 
icache_block_bytes));
+       DEFINE(ICACHE_BLOCKS_PER_PAGE,  offsetof(struct powerpc_caches, 
icache_blocks_per_page));
+
 #ifdef CONFIG_PPC64
-       DEFINE(DCACHEL1LINESIZE, offsetof(struct ppc64_caches, dline_size));
-       DEFINE(DCACHEL1LOGLINESIZE, offsetof(struct ppc64_caches, 
log_dline_size));
-       DEFINE(DCACHEL1LINESPERPAGE, offsetof(struct ppc64_caches, 
dlines_per_page));
-       DEFINE(ICACHEL1LINESIZE, offsetof(struct ppc64_caches, iline_size));
-       DEFINE(ICACHEL1LOGLINESIZE, offsetof(struct ppc64_caches, 
log_iline_size));
-       DEFINE(ICACHEL1LINESPERPAGE, offsetof(struct ppc64_caches, 
ilines_per_page));
        /* paca */
        DEFINE(PACA_SIZE, sizeof(struct paca_struct));
        DEFINE(PACA_LOCK_TOKEN, offsetof(struct paca_struct, lock_token));
diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
index 0654dba..8abc44a 100644
--- a/arch/powerpc/kernel/head_32.S
+++ b/arch/powerpc/kernel/head_32.S
@@ -786,7 +786,14 @@ relocate_kernel:
 _ENTRY(copy_and_flush)
        addi    r5,r5,-4
        addi    r6,r6,-4
-4:     li      r0,L1_CACHE_BYTES/4
+4:     li      r0,L1_CACHE_BYTES_MIN/4 /* Use the smallest common      */
+                                       /* denominator cache line       */
+                                       /* size.  This results in       */
+                                       /* extra cache line flushes     */
+                                       /* but operation is correct.    */
+                                       /* Can't get cache line size    */
+                                       /* from device-tree yet         */
+
        mtctr   r0
 3:     addi    r6,r6,4                 /* copy a cache line */
        lwzx    r0,r6,r4
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 06c7251..183d371 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -480,7 +480,7 @@ p_end:      .llong  _end - _stext
 _GLOBAL(copy_and_flush)
        addi    r5,r5,-8
        addi    r6,r6,-8
-4:     li      r0,8                    /* Use the smallest common      */
+4:     li      r0,L1_CACHE_BYTES_MIN/8 /* Use the smallest common      */
                                        /* denominator cache line       */
                                        /* size.  This results in       */
                                        /* extra cache line flushes     */
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index f7d760a..ee61600 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -321,199 +321,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_UNIFIED_ID_CACHE)
        blr
 
 /*
- * Write any modified data cache blocks out to memory
- * and invalidate the corresponding instruction cache blocks.
- * This is a no-op on the 601.
- *
- * flush_icache_range(unsigned long start, unsigned long stop)
- */
-_KPROBE(__flush_icache_range)
-BEGIN_FTR_SECTION
-       blr                             /* for 601, do nothing */
-END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
-       li      r5,L1_CACHE_BYTES-1
-       andc    r3,r3,r5
-       subf    r4,r3,r4
-       add     r4,r4,r5
-       srwi.   r4,r4,L1_CACHE_SHIFT
-       beqlr
-       mtctr   r4
-       mr      r6,r3
-1:     dcbst   0,r3
-       addi    r3,r3,L1_CACHE_BYTES
-       bdnz    1b
-       sync                            /* wait for dcbst's to get to ram */
-#ifndef CONFIG_44x
-       mtctr   r4
-2:     icbi    0,r6
-       addi    r6,r6,L1_CACHE_BYTES
-       bdnz    2b
-#else
-       /* Flash invalidate on 44x because we are passed kmapped addresses and
-          this doesn't work for userspace pages due to the virtually tagged
-          icache.  Sigh. */
-       iccci   0, r0
-#endif
-       sync                            /* additional sync needed on g4 */
-       isync
-       blr
-/*
- * Write any modified data cache blocks out to memory.
- * Does not invalidate the corresponding cache lines (especially for
- * any corresponding instruction cache).
- *
- * clean_dcache_range(unsigned long start, unsigned long stop)
- */
-_GLOBAL(clean_dcache_range)
-       li      r5,L1_CACHE_BYTES-1
-       andc    r3,r3,r5
-       subf    r4,r3,r4
-       add     r4,r4,r5
-       srwi.   r4,r4,L1_CACHE_SHIFT
-       beqlr
-       mtctr   r4
-
-1:     dcbst   0,r3
-       addi    r3,r3,L1_CACHE_BYTES
-       bdnz    1b
-       sync                            /* wait for dcbst's to get to ram */
-       blr
-
-/*
- * Write any modified data cache blocks out to memory and invalidate them.
- * Does not invalidate the corresponding instruction cache blocks.
- *
- * flush_dcache_range(unsigned long start, unsigned long stop)
- */
-_GLOBAL(flush_dcache_range)
-       li      r5,L1_CACHE_BYTES-1
-       andc    r3,r3,r5
-       subf    r4,r3,r4
-       add     r4,r4,r5
-       srwi.   r4,r4,L1_CACHE_SHIFT
-       beqlr
-       mtctr   r4
-
-1:     dcbf    0,r3
-       addi    r3,r3,L1_CACHE_BYTES
-       bdnz    1b
-       sync                            /* wait for dcbst's to get to ram */
-       blr
-
-/*
- * Like above, but invalidate the D-cache.  This is used by the 8xx
- * to invalidate the cache so the PPC core doesn't get stale data
- * from the CPM (no cache snooping here :-).
- *
- * invalidate_dcache_range(unsigned long start, unsigned long stop)
- */
-_GLOBAL(invalidate_dcache_range)
-       li      r5,L1_CACHE_BYTES-1
-       andc    r3,r3,r5
-       subf    r4,r3,r4
-       add     r4,r4,r5
-       srwi.   r4,r4,L1_CACHE_SHIFT
-       beqlr
-       mtctr   r4
-
-1:     dcbi    0,r3
-       addi    r3,r3,L1_CACHE_BYTES
-       bdnz    1b
-       sync                            /* wait for dcbi's to get to ram */
-       blr
-
-/*
- * Flush a particular page from the data cache to RAM.
- * Note: this is necessary because the instruction cache does *not*
- * snoop from the data cache.
- * This is a no-op on the 601 which has a unified cache.
- *
- *     void __flush_dcache_icache(void *page)
- */
-_GLOBAL(__flush_dcache_icache)
-BEGIN_FTR_SECTION
-       blr
-END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
-       rlwinm  r3,r3,0,0,31-PAGE_SHIFT         /* Get page base address */
-       li      r4,PAGE_SIZE/L1_CACHE_BYTES     /* Number of lines in a page */
-       mtctr   r4
-       mr      r6,r3
-0:     dcbst   0,r3                            /* Write line to ram */
-       addi    r3,r3,L1_CACHE_BYTES
-       bdnz    0b
-       sync
-#ifdef CONFIG_44x
-       /* We don't flush the icache on 44x. Those have a virtual icache
-        * and we don't have access to the virtual address here (it's
-        * not the page vaddr but where it's mapped in user space). The
-        * flushing of the icache on these is handled elsewhere, when
-        * a change in the address space occurs, before returning to
-        * user space
-        */
-BEGIN_MMU_FTR_SECTION
-       blr
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_44x)
-#endif /* CONFIG_44x */
-       mtctr   r4
-1:     icbi    0,r6
-       addi    r6,r6,L1_CACHE_BYTES
-       bdnz    1b
-       sync
-       isync
-       blr
-
-#ifndef CONFIG_BOOKE
-/*
- * Flush a particular page from the data cache to RAM, identified
- * by its physical address.  We turn off the MMU so we can just use
- * the physical address (this may be a highmem page without a kernel
- * mapping).
- *
- *     void __flush_dcache_icache_phys(unsigned long physaddr)
- */
-_GLOBAL(__flush_dcache_icache_phys)
-BEGIN_FTR_SECTION
-       blr                                     /* for 601, do nothing */
-END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
-       mfmsr   r10
-       rlwinm  r0,r10,0,28,26                  /* clear DR */
-       mtmsr   r0
-       isync
-       rlwinm  r3,r3,0,0,31-PAGE_SHIFT         /* Get page base address */
-       li      r4,PAGE_SIZE/L1_CACHE_BYTES     /* Number of lines in a page */
-       mtctr   r4
-       mr      r6,r3
-0:     dcbst   0,r3                            /* Write line to ram */
-       addi    r3,r3,L1_CACHE_BYTES
-       bdnz    0b
-       sync
-       mtctr   r4
-1:     icbi    0,r6
-       addi    r6,r6,L1_CACHE_BYTES
-       bdnz    1b
-       sync
-       mtmsr   r10                             /* restore DR */
-       isync
-       blr
-#endif /* CONFIG_BOOKE */
-
-/*
- * Clear pages using the dcbz instruction, which doesn't cause any
- * memory traffic (except to write out any cache lines which get
- * displaced).  This only works on cacheable memory.
- *
- * void clear_pages(void *page, int order) ;
- */
-_GLOBAL(clear_pages)
-       li      r0,PAGE_SIZE/L1_CACHE_BYTES
-       slw     r0,r0,r4
-       mtctr   r0
-1:     dcbz    0,r3
-       addi    r3,r3,L1_CACHE_BYTES
-       bdnz    1b
-       blr
-
-/*
  * Copy a whole page.  We use the dcbz instruction on the destination
  * to reduce memory traffic (it eliminates the unnecessary reads of
  * the destination into cache).  This requires that the destination
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index 616921e..500fd61 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -53,188 +53,6 @@ _GLOBAL(call_handle_irq)
        mtlr    r0
        blr
 
-       .section        ".toc","aw"
-PPC64_CACHES:
-       .tc             ppc64_caches[TC],ppc64_caches
-       .section        ".text"
-
-/*
- * Write any modified data cache blocks out to memory
- * and invalidate the corresponding instruction cache blocks.
- *
- * flush_icache_range(unsigned long start, unsigned long stop)
- *
- *   flush all bytes from start through stop-1 inclusive
- */
-
-_KPROBE(__flush_icache_range)
-
-/*
- * Flush the data cache to memory 
- * 
- * Different systems have different cache line sizes
- * and in some cases i-cache and d-cache line sizes differ from
- * each other.
- */
-       ld      r10,PPC64_CACHES@toc(r2)
-       lwz     r7,DCACHEL1LINESIZE(r10)/* Get cache line size */
-       addi    r5,r7,-1
-       andc    r6,r3,r5                /* round low to line bdy */
-       subf    r8,r6,r4                /* compute length */
-       add     r8,r8,r5                /* ensure we get enough */
-       lwz     r9,DCACHEL1LOGLINESIZE(r10)     /* Get log-2 of cache line size 
*/
-       srw.    r8,r8,r9                /* compute line count */
-       beqlr                           /* nothing to do? */
-       mtctr   r8
-1:     dcbst   0,r6
-       add     r6,r6,r7
-       bdnz    1b
-       sync
-
-/* Now invalidate the instruction cache */
-       
-       lwz     r7,ICACHEL1LINESIZE(r10)        /* Get Icache line size */
-       addi    r5,r7,-1
-       andc    r6,r3,r5                /* round low to line bdy */
-       subf    r8,r6,r4                /* compute length */
-       add     r8,r8,r5
-       lwz     r9,ICACHEL1LOGLINESIZE(r10)     /* Get log-2 of Icache line 
size */
-       srw.    r8,r8,r9                /* compute line count */
-       beqlr                           /* nothing to do? */
-       mtctr   r8
-2:     icbi    0,r6
-       add     r6,r6,r7
-       bdnz    2b
-       isync
-       blr
-       .previous .text
-/*
- * Like above, but only do the D-cache.
- *
- * flush_dcache_range(unsigned long start, unsigned long stop)
- *
- *    flush all bytes from start to stop-1 inclusive
- */
-_GLOBAL(flush_dcache_range)
-
-/*
- * Flush the data cache to memory 
- * 
- * Different systems have different cache line sizes
- */
-       ld      r10,PPC64_CACHES@toc(r2)
-       lwz     r7,DCACHEL1LINESIZE(r10)        /* Get dcache line size */
-       addi    r5,r7,-1
-       andc    r6,r3,r5                /* round low to line bdy */
-       subf    r8,r6,r4                /* compute length */
-       add     r8,r8,r5                /* ensure we get enough */
-       lwz     r9,DCACHEL1LOGLINESIZE(r10)     /* Get log-2 of dcache line 
size */
-       srw.    r8,r8,r9                /* compute line count */
-       beqlr                           /* nothing to do? */
-       mtctr   r8
-0:     dcbst   0,r6
-       add     r6,r6,r7
-       bdnz    0b
-       sync
-       blr
-
-/*
- * Like above, but works on non-mapped physical addresses.
- * Use only for non-LPAR setups ! It also assumes real mode
- * is cacheable. Used for flushing out the DART before using
- * it as uncacheable memory 
- *
- * flush_dcache_phys_range(unsigned long start, unsigned long stop)
- *
- *    flush all bytes from start to stop-1 inclusive
- */
-_GLOBAL(flush_dcache_phys_range)
-       ld      r10,PPC64_CACHES@toc(r2)
-       lwz     r7,DCACHEL1LINESIZE(r10)        /* Get dcache line size */
-       addi    r5,r7,-1
-       andc    r6,r3,r5                /* round low to line bdy */
-       subf    r8,r6,r4                /* compute length */
-       add     r8,r8,r5                /* ensure we get enough */
-       lwz     r9,DCACHEL1LOGLINESIZE(r10)     /* Get log-2 of dcache line 
size */
-       srw.    r8,r8,r9                /* compute line count */
-       beqlr                           /* nothing to do? */
-       mfmsr   r5                      /* Disable MMU Data Relocation */
-       ori     r0,r5,MSR_DR
-       xori    r0,r0,MSR_DR
-       sync
-       mtmsr   r0
-       sync
-       isync
-       mtctr   r8
-0:     dcbst   0,r6
-       add     r6,r6,r7
-       bdnz    0b
-       sync
-       isync
-       mtmsr   r5                      /* Re-enable MMU Data Relocation */
-       sync
-       isync
-       blr
-
-_GLOBAL(flush_inval_dcache_range)
-       ld      r10,PPC64_CACHES@toc(r2)
-       lwz     r7,DCACHEL1LINESIZE(r10)        /* Get dcache line size */
-       addi    r5,r7,-1
-       andc    r6,r3,r5                /* round low to line bdy */
-       subf    r8,r6,r4                /* compute length */
-       add     r8,r8,r5                /* ensure we get enough */
-       lwz     r9,DCACHEL1LOGLINESIZE(r10)/* Get log-2 of dcache line size */
-       srw.    r8,r8,r9                /* compute line count */
-       beqlr                           /* nothing to do? */
-       sync
-       isync
-       mtctr   r8
-0:     dcbf    0,r6
-       add     r6,r6,r7
-       bdnz    0b
-       sync
-       isync
-       blr
-
-
-/*
- * Flush a particular page from the data cache to RAM.
- * Note: this is necessary because the instruction cache does *not*
- * snoop from the data cache.
- *
- *     void __flush_dcache_icache(void *page)
- */
-_GLOBAL(__flush_dcache_icache)
-/*
- * Flush the data cache to memory 
- * 
- * Different systems have different cache line sizes
- */
-
-/* Flush the dcache */
-       ld      r7,PPC64_CACHES@toc(r2)
-       clrrdi  r3,r3,PAGE_SHIFT                    /* Page align */
-       lwz     r4,DCACHEL1LINESPERPAGE(r7)     /* Get # dcache lines per page 
*/
-       lwz     r5,DCACHEL1LINESIZE(r7)         /* Get dcache line size */
-       mr      r6,r3
-       mtctr   r4
-0:     dcbst   0,r6
-       add     r6,r6,r5
-       bdnz    0b
-       sync
-
-/* Now invalidate the icache */        
-
-       lwz     r4,ICACHEL1LINESPERPAGE(r7)     /* Get # icache lines per page 
*/
-       lwz     r5,ICACHEL1LINESIZE(r7)         /* Get icache line size */
-       mtctr   r4
-1:     icbi    0,r3
-       add     r3,r3,r5
-       bdnz    1b
-       isync
-       blr
-
-
 #if defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_MAPLE)
 /*
  * Do an IO access in real mode
diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
index acba8ce..ccdceb7 100644
--- a/arch/powerpc/kernel/ppc_ksyms.c
+++ b/arch/powerpc/kernel/ppc_ksyms.c
@@ -53,7 +53,6 @@ extern void program_check_exception(struct pt_regs *regs);
 extern void single_step_exception(struct pt_regs *regs);
 extern int sys_sigreturn(struct pt_regs *regs);
 
-EXPORT_SYMBOL(clear_pages);
 EXPORT_SYMBOL(ISA_DMA_THRESHOLD);
 EXPORT_SYMBOL(DMA_MODE_READ);
 EXPORT_SYMBOL(DMA_MODE_WRITE);
@@ -113,8 +112,6 @@ EXPORT_SYMBOL(giveup_spe);
 #ifndef CONFIG_PPC64
 EXPORT_SYMBOL(flush_instruction_cache);
 #endif
-EXPORT_SYMBOL(__flush_icache_range);
-EXPORT_SYMBOL(flush_dcache_range);
 
 #ifdef CONFIG_SMP
 #ifdef CONFIG_PPC32
diff --git a/arch/powerpc/kernel/setup-common.c 
b/arch/powerpc/kernel/setup-common.c
index 77bb77d..3abfea4 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -83,6 +83,54 @@ unsigned long klimit = (unsigned long) _end;
 char cmd_line[COMMAND_LINE_SIZE];
 
 /*
+ * Initialize these values to minimum safe defaults in case they need to be
+ * used early during the boot process.  While this may not seem safe, it is
+ * actually safe in practice, because all of the kernel loops that use this
+ * data operate on whole pages.
+ *
+ * The PowerPC Book III-E spec documents that the pagesize is an even
+ * multiple of the cache block size and the cache blocks are always
+ * page-aligned.
+ *
+ * So, for example, when clearing a whole page there are only two things that
+ * can be done wrong with "dcbz":
+ *
+ *   (1) Call "dcbz" with an address outside the page you want to zero.
+ *
+ *   (2) Call "dcbz" too few times to actually hit all of the cachelines,
+ *       IE: Use a too-large cacheline stride.
+ *
+ * So as long as we ensure that this number is small enough for the current
+ * CPU everything will operate correctly, albeit with a slight performance
+ * hit, until we get a chance to parse the device-tree for the right value.
+ *
+ * NOTE: Userspace expects an exact value, so none of the above applies after
+ * the device tree has been unflattened and actual values computed.
+ *
+ * See arch/powerpc/asm/caches.h for more information.
+ */
+struct powerpc_caches powerpc_caches = {
+       /* Data cache sizes */
+       .dcache_total_bytes  = 0, /* Unknown */
+       .dcache_block_bytes = L1_CACHE_BYTES_MIN,
+       .dcache_block_shift = L1_CACHE_SHIFT_MIN,
+       .dcache_blocks_per_page = (PAGE_SIZE >> L1_CACHE_SHIFT_MIN),
+
+       /* Instruction cache sizes */
+       .icache_total_bytes = 0,
+       .icache_block_bytes = L1_CACHE_BYTES_MIN,
+       .icache_block_shift = L1_CACHE_SHIFT_MIN,
+       .icache_blocks_per_page = (PAGE_SIZE >> L1_CACHE_SHIFT_MIN),
+
+       /* Unified cache (assume cache is split by default) */
+       .ucache_total_bytes = 0,
+       .ucache_block_bytes = 0,
+       .ucache_block_shift = 0,
+       .ucache_blocks_per_page = 0,
+};
+EXPORT_SYMBOL_GPL(powerpc_caches);
+
+/*
  * This still seems to be needed... -- paulus
  */ 
 struct screen_info screen_info = {
@@ -349,6 +397,61 @@ const struct seq_operations cpuinfo_op = {
        .show = show_cpuinfo,
 };
 
+/* Helper functions to compute various values from a cache block size */
+static void __init set_dcache_block_data(u32 bytes)
+{
+       u32 shift = __ilog2(bytes);
+       powerpc_caches.dcache_block_bytes = bytes;
+       powerpc_caches.dcache_block_shift = shift;
+       powerpc_caches.dcache_blocks_per_page = (PAGE_SIZE >> shift);
+}
+static void __init set_icache_block_data(u32 bytes)
+{
+       u32 shift = __ilog2(bytes);
+       powerpc_caches.icache_block_bytes = bytes;
+       powerpc_caches.icache_block_shift = shift;
+       powerpc_caches.icache_blocks_per_page = (PAGE_SIZE >> shift);
+}
+
+/*
+ * Preinitialize the powerpc_caches structure from the cputable.  We will
+ * later scan the device-tree for this information, which may be more
+ * accurate.
+ */
+void __init initialize_early_cache_info(void)
+{
+       set_dcache_block_data(cur_cpu_spec->dcache_bsize);
+       set_icache_block_data(cur_cpu_spec->icache_bsize);
+}
+
+/*
+ * Initialize the powerpc_caches structure from the device-tree for use by
+ * copy_page(), cache flush routines, and AT_DCACHEBSIZE elf headers.
+ *
+ * In the unlikely event that the device-tree doesn't have this information,
+ * the defaults loaded by initialize_early_cache_info() from the cputable
+ * will be used.
+ */
+void __init initialize_cache_info(void)
+{
+       /* Assume that the cache properties are the same across all nodes */
+       struct device_node *np = of_find_node_by_type(NULL, "cpu");
+       u32 value = 0;
+
+       /* First check data/instruction cache block sizes */
+       if (    !of_property_read_u32(np, "d-cache-block-size", &value) ||
+               !of_property_read_u32(np, "d-cache-line-size", &value))
+               set_dcache_block_data(value);
+
+       if (    !of_property_read_u32(np, "i-cache-block-size", &value) ||
+               !of_property_read_u32(np, "i-cache-line-size", &value))
+               set_icache_block_data(value);
+
+       /* Also read total cache sizes (no defaults here) */
+       of_property_read_u32(np, "d-cache-size", 
&powerpc_caches.dcache_total_bytes);
+       of_property_read_u32(np, "i-cache-size", 
&powerpc_caches.icache_total_bytes);
+}
+
 void __init check_for_initrd(void)
 {
 #ifdef CONFIG_BLK_DEV_INITRD
diff --git a/arch/powerpc/kernel/setup.h b/arch/powerpc/kernel/setup.h
index 4c67ad7..1ae16ec 100644
--- a/arch/powerpc/kernel/setup.h
+++ b/arch/powerpc/kernel/setup.h
@@ -1,6 +1,7 @@
 #ifndef _POWERPC_KERNEL_SETUP_H
 #define _POWERPC_KERNEL_SETUP_H
 
+void initialize_cache_info(void);
 void check_for_initrd(void);
 void do_init_bootmem(void);
 void setup_panic(void);
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index c1ce863..1db2bfb 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -63,14 +63,6 @@ EXPORT_SYMBOL(vgacon_remap_base);
 #endif
 
 /*
- * These are used in binfmt_elf.c to put aux entries on the stack
- * for each elf executable being started.
- */
-int dcache_bsize;
-int icache_bsize;
-int ucache_bsize;
-
-/*
  * We're called here very early in the boot.  We determine the machine
  * type and call the appropriate low-level setup functions.
  *  -- Cort <c...@fsmlabs.com>
@@ -286,10 +278,13 @@ void __init setup_arch(char **cmdline_p)
 {
        *cmdline_p = cmd_line;
 
+       initialize_early_cache_info();
+
        /* so udelay does something sensible, assume <= 1000 bogomips */
        loops_per_jiffy = 500000000 / HZ;
 
        unflatten_device_tree();
+       initialize_cache_info();
        check_for_initrd();
 
        if (ppc_md.init_early)
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 1a9dea8..bb686de 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -77,25 +77,6 @@ int boot_cpuid = 0;
 int __initdata spinning_secondaries;
 u64 ppc64_pft_size;
 
-/* Pick defaults since we might want to patch instructions
- * before we've read this from the device tree.
- */
-struct ppc64_caches ppc64_caches = {
-       .dline_size = 0x40,
-       .log_dline_size = 6,
-       .iline_size = 0x40,
-       .log_iline_size = 6
-};
-EXPORT_SYMBOL_GPL(ppc64_caches);
-
-/*
- * These are used in binfmt_elf.c to put aux entries on the stack
- * for each elf executable being started.
- */
-int dcache_bsize;
-int icache_bsize;
-int ucache_bsize;
-
 #ifdef CONFIG_SMP
 
 static char *smt_enabled_cmdline;
@@ -265,82 +246,6 @@ void smp_release_cpus(void)
 #endif /* CONFIG_SMP || CONFIG_KEXEC */
 
 /*
- * Initialize some remaining members of the ppc64_caches and systemcfg
- * structures
- * (at least until we get rid of them completely). This is mostly some
- * cache informations about the CPU that will be used by cache flush
- * routines and/or provided to userland
- */
-static void __init initialize_cache_info(void)
-{
-       struct device_node *np;
-       unsigned long num_cpus = 0;
-
-       DBG(" -> initialize_cache_info()\n");
-
-       for_each_node_by_type(np, "cpu") {
-               num_cpus += 1;
-
-               /*
-                * We're assuming *all* of the CPUs have the same
-                * d-cache and i-cache sizes... -Peter
-                */
-               if (num_cpus == 1) {
-                       const u32 *sizep, *lsizep;
-                       u32 size, lsize;
-
-                       size = 0;
-                       lsize = cur_cpu_spec->dcache_bsize;
-                       sizep = of_get_property(np, "d-cache-size", NULL);
-                       if (sizep != NULL)
-                               size = *sizep;
-                       lsizep = of_get_property(np, "d-cache-block-size",
-                                                NULL);
-                       /* fallback if block size missing */
-                       if (lsizep == NULL)
-                               lsizep = of_get_property(np,
-                                                        "d-cache-line-size",
-                                                        NULL);
-                       if (lsizep != NULL)
-                               lsize = *lsizep;
-                       if (sizep == 0 || lsizep == 0)
-                               DBG("Argh, can't find dcache properties ! "
-                                   "sizep: %p, lsizep: %p\n", sizep, lsizep);
-
-                       ppc64_caches.dsize = size;
-                       ppc64_caches.dline_size = lsize;
-                       ppc64_caches.log_dline_size = __ilog2(lsize);
-                       ppc64_caches.dlines_per_page = PAGE_SIZE / lsize;
-
-                       size = 0;
-                       lsize = cur_cpu_spec->icache_bsize;
-                       sizep = of_get_property(np, "i-cache-size", NULL);
-                       if (sizep != NULL)
-                               size = *sizep;
-                       lsizep = of_get_property(np, "i-cache-block-size",
-                                                NULL);
-                       if (lsizep == NULL)
-                               lsizep = of_get_property(np,
-                                                        "i-cache-line-size",
-                                                        NULL);
-                       if (lsizep != NULL)
-                               lsize = *lsizep;
-                       if (sizep == 0 || lsizep == 0)
-                               DBG("Argh, can't find icache properties ! "
-                                   "sizep: %p, lsizep: %p\n", sizep, lsizep);
-
-                       ppc64_caches.isize = size;
-                       ppc64_caches.iline_size = lsize;
-                       ppc64_caches.log_iline_size = __ilog2(lsize);
-                       ppc64_caches.ilines_per_page = PAGE_SIZE / lsize;
-               }
-       }
-
-       DBG(" <- initialize_cache_info()\n");
-}
-
-
-/*
  * Do some initial setup of the system.  The parameters are those which 
  * were passed in from the bootloader.
  */
@@ -365,10 +270,7 @@ void __init setup_system(void)
         */
        unflatten_device_tree();
 
-       /*
-        * Fill the ppc64_caches & systemcfg structures with informations
-        * retrieved from the device-tree.
-        */
+       /* Fill the powerpc_caches structure with device-tree data */
        initialize_cache_info();
 
 #ifdef CONFIG_PPC_RTAS
@@ -423,12 +325,10 @@ void __init setup_system(void)
        printk("-----------------------------------------------------\n");
        printk("ppc64_pft_size                = 0x%llx\n", ppc64_pft_size);
        printk("physicalMemorySize            = 0x%llx\n", 
memblock_phys_mem_size());
-       if (ppc64_caches.dline_size != 0x80)
-               printk("ppc64_caches.dcache_line_size = 0x%x\n",
-                      ppc64_caches.dline_size);
-       if (ppc64_caches.iline_size != 0x80)
-               printk("ppc64_caches.icache_line_size = 0x%x\n",
-                      ppc64_caches.iline_size);
+       if (powerpc_caches.dcache_block_bytes != 0x80)
+               printk("dcache_block_bytes = 0x%x\n", 
powerpc_caches.dcache_block_bytes);
+       if (powerpc_caches.icache_block_bytes != 0x80)
+               printk("icache_block_bytes = 0x%x\n", 
powerpc_caches.icache_block_bytes);
 #ifdef CONFIG_PPC_STD_MMU_64
        if (htab_address)
                printk("htab_address                  = 0x%p\n", htab_address);
@@ -545,13 +445,7 @@ void __init setup_arch(char **cmdline_p)
 
        *cmdline_p = cmd_line;
 
-       /*
-        * Set cache line size based on type of cpu as a default.
-        * Systems with OF can look in the properties on the cpu node(s)
-        * for a possibly more accurate value.
-        */
-       dcache_bsize = ppc64_caches.dline_size;
-       icache_bsize = ppc64_caches.iline_size;
+       initialize_early_cache_info();
 
        /* reboot on panic */
        panic_timeout = 180;
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
index 7d14bb6..4a038fb 100644
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -726,6 +726,7 @@ static int __init vdso_init(void)
        vdso_data->version.major = SYSTEMCFG_MAJOR;
        vdso_data->version.minor = SYSTEMCFG_MINOR;
        vdso_data->processor = mfspr(SPRN_PVR);
+
        /*
         * Fake the old platform number for pSeries and iSeries and add
         * in LPAR bit if necessary
@@ -734,29 +735,25 @@ static int __init vdso_init(void)
        if (firmware_has_feature(FW_FEATURE_LPAR))
                vdso_data->platform |= 1;
        vdso_data->physicalMemorySize = memblock_phys_mem_size();
-       vdso_data->dcache_size = ppc64_caches.dsize;
-       vdso_data->dcache_line_size = ppc64_caches.dline_size;
-       vdso_data->icache_size = ppc64_caches.isize;
-       vdso_data->icache_line_size = ppc64_caches.iline_size;
 
-       /* XXXOJN: Blocks should be added to ppc64_caches and used instead */
-       vdso_data->dcache_block_size = ppc64_caches.dline_size;
-       vdso_data->icache_block_size = ppc64_caches.iline_size;
-       vdso_data->dcache_log_block_size = ppc64_caches.log_dline_size;
-       vdso_data->icache_log_block_size = ppc64_caches.log_iline_size;
+       /* There are more cache parameters saved for 64-bit than 32-bit */
+       vdso_data->dcache_size           = powerpc_caches.dcache_total_size;
+       vdso_data->icache_size           = powerpc_caches.icache_total_size;
+       vdso_data->dcache_line_size      = powerpc_caches.dcache_block_bytes;
+       vdso_data->icache_line_size      = powerpc_caches.icache_block_bytes;
 
        /*
         * Calculate the size of the 64 bits vDSO
         */
        vdso64_pages = (&vdso64_end - &vdso64_start) >> PAGE_SHIFT;
        DBG("vdso64_kbase: %p, 0x%x pages\n", vdso64_kbase, vdso64_pages);
-#else
-       vdso_data->dcache_block_size = L1_CACHE_BYTES;
-       vdso_data->dcache_log_block_size = L1_CACHE_SHIFT;
-       vdso_data->icache_block_size = L1_CACHE_BYTES;
-       vdso_data->icache_log_block_size = L1_CACHE_SHIFT;
-#endif /* CONFIG_PPC64 */
+#endif
 
+       /* Save the cache-block sizes for the VDSO */
+       vdso_data->dcache_block_size     = powerpc_caches.dcache_block_bytes;
+       vdso_data->icache_block_size     = powerpc_caches.icache_block_bytes;
+       vdso_data->dcache_log_block_size = powerpc_caches.dcache_block_shift;
+       vdso_data->icache_log_block_size = powerpc_caches.icache_block_shift;
 
        /*
         * Calculate the size of the 32 bits vDSO
diff --git a/arch/powerpc/lib/copypage_64.S b/arch/powerpc/lib/copypage_64.S
index 53dcb6b..c466977 100644
--- a/arch/powerpc/lib/copypage_64.S
+++ b/arch/powerpc/lib/copypage_64.S
@@ -12,17 +12,17 @@
 #include <asm/asm-offsets.h>
 
         .section        ".toc","aw"
-PPC64_CACHES:
-        .tc             ppc64_caches[TC],ppc64_caches
+POWERPC_CACHES:
+        .tc             powerpc_caches[TC],powerpc_caches
         .section        ".text"
 
 _GLOBAL(copy_page)
        lis     r5,PAGE_SIZE@h
        ori     r5,r5,PAGE_SIZE@l
 BEGIN_FTR_SECTION
-       ld      r10,PPC64_CACHES@toc(r2)
-       lwz     r11,DCACHEL1LOGLINESIZE(r10)    /* log2 of cache line size */
-       lwz     r12,DCACHEL1LINESIZE(r10)       /* get cache line size */
+       ld      r10,POWERPC_CACHES@toc(r2)
+       lwz     r11,DCACHE_BLOCK_SHIFT(r10)     /* log2 of cache line size */
+       lwz     r12,DCACHE_BLOCK_BYTES(r10)     /* get cache line size */
        li      r9,0
        srd     r8,r5,r11
 
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index 991ee81..8ad36a9 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -6,7 +6,7 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
 
 ccflags-$(CONFIG_PPC64)        := -mno-minimal-toc
 
-obj-y                          := fault.o mem.o pgtable.o gup.o \
+obj-y                          := cache.o fault.o mem.o pgtable.o gup.o \
                                   init_$(CONFIG_WORD_SIZE).o \
                                   pgtable_$(CONFIG_WORD_SIZE).o
 obj-$(CONFIG_PPC_MMU_NOHASH)   += mmu_context_nohash.o tlb_nohash.o \
diff --git a/arch/powerpc/mm/cache.c b/arch/powerpc/mm/cache.c
new file mode 100644
index 0000000..0fbf2d6
--- /dev/null
+++ b/arch/powerpc/mm/cache.c
@@ -0,0 +1,279 @@
+#include <linux/kprobes.h>
+#include <linux/export.h>
+#include <linux/types.h>
+
+#include <asm/cputable.h>
+#include <asm/system.h>
+#include <asm/cache.h>
+#include <asm/page.h>
+#include <asm/mmu.h>
+
+/*
+ * Write any modified data cache blocks out to memory.
+ * Does not invalidate the corresponding cache lines (especially for
+ * any corresponding instruction cache).
+ */
+void clean_dcache_range(unsigned long start, unsigned long stop)
+{
+       unsigned long addr;
+       FOR_EACH_CACHELINE(addr, start, stop, dcache)
+               dcbst(addr);
+       mb();
+}
+
+/*
+ * Write any modified data cache blocks out to memory and invalidate them.
+ * Does not invalidate the corresponding instruction cache blocks.
+ */
+void flush_dcache_range(unsigned long start, unsigned long stop)
+{
+       unsigned long addr;
+       FOR_EACH_CACHELINE(addr, start, stop, dcache)
+               dcbf(addr);
+       mb();
+}
+EXPORT_SYMBOL(flush_dcache_range);
+
+/*
+ * Like above, but invalidate the D-cache.  This is used by the 8xx
+ * to invalidate the cache so the PPC core doesn't get stale data
+ * from the CPM (no cache snooping here :-).
+ *
+ * invalidate_dcache_range(unsigned long start, unsigned long stop)
+ */
+void invalidate_dcache_range(unsigned long start, unsigned long stop)
+{
+       unsigned long addr;
+       FOR_EACH_CACHELINE(addr, start, stop, dcache)
+               dcbi(addr);
+       mb();
+}
+
+/*
+ * Unfortunately, we cannot flush individual chunks of the icache on 44x as
+ * we are passed kmapped addresses and we have a virtually-tagged icache.
+ *
+ * The only workaround is to invalidate the whole icache.
+ *
+ * NOTE: The CPU does not use the operands for this instruction, so
+ *       they are passed as dummies.
+ */
+__kprobes void __flush_icache_range(unsigned long start, unsigned long stop)
+{
+       unsigned long addr;
+
+       if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
+               return;
+
+       /* First ensure that data has been written to memory */
+       FOR_EACH_CACHELINE(addr, start, stop, dcache)
+               dcbst(addr);
+       mb();
+
+#ifdef CONFIG_44x
+       if (mmu_has_feature(MMU_FTR_TYPE_44x)) {
+               asm volatile("iccci 0, r0" ::: "memory");
+               return;
+       }
+#endif
+
+       /* Now discard the corresponding icache */
+       FOR_EACH_CACHELINE(addr, start, stop, icache)
+               icbi(addr);
+       mb();
+       isync();
+}
+EXPORT_SYMBOL(__flush_icache_range);
+
+/*
+ * Flush a particular page from the data cache to RAM.
+ * Note: this is necessary because the instruction cache does *not*
+ * snoop from the data cache.
+ * This is a no-op on the 601 which has a unified cache.
+ *
+ *     void __flush_dcache_icache(void *page)
+ */
+void __flush_dcache_icache(void *page)
+{
+       unsigned long base = ((unsigned long)page) & ~(PAGE_SIZE-1);
+       unsigned long addr;
+
+       if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
+               return;
+
+       /* First ensure that data has been written to memory */
+       FOR_EACH_CACHELINE(addr, base, base + PAGE_SIZE, dcache)
+               dcbst(addr);
+
+#ifdef CONFIG_44x
+       /*
+        * We don't flush the icache on 44x. Those have a virtual icache and
+        * we don't have access to the virtual address here (it's not the
+        * page vaddr but where it's mapped in user space). The flushing of
+        * the icache on these is handled elsewhere, when a change in the
+        * address space occurs, before returning to user space.
+        */
+       if (mmu_has_feature(MMU_FTR_TYPE_44x))
+               return;
+#endif
+
+       FOR_EACH_CACHELINE(addr, base, base + PAGE_SIZE, icache)
+               icbi(addr);
+
+       mb();
+       isync();
+}
+
+/*
+ * Clear pages using the dcbz instruction, which doesn't cause any
+ * memory traffic (except to write out any cache lines which get
+ * displaced).  This only works on cacheable memory.
+ *
+ */
+void clear_pages(void *page, int order)
+{
+       unsigned long addr, base = (unsigned long)page;
+       FOR_EACH_CACHELINE(addr, base, base + (PAGE_SIZE << order), dcache)
+               dcbz(addr);
+}
+EXPORT_SYMBOL(clear_pages);
+
+#if defined(CONFIG_PPC32) && !defined(CONFIG_BOOKE)
+/*
+ * Flush a particular page from the data cache to RAM, identified
+ * by its physical address.  We turn off the MMU so we can just use
+ * the physical address (this may be a highmem page without a kernel
+ * mapping).
+ */
+void __flush_dcache_icache_phys(unsigned long phys_page)
+{
+       u32 d_size      = powerpc_caches.dcache_block_bytes;
+       u32 i_size      = powerpc_caches.icache_block_bytes;
+       u32 d_per_page  = powerpc_caches.dcache_blocks_per_page;
+       u32 i_per_page  = powerpc_caches.icache_blocks_per_page;
+
+       /* Temporary registers for the ASM to use */
+       unsigned long old_msr, tmp_msr, d_phys_page, i_phys_page;
+
+       if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
+               return;
+
+       /* Page base address (used in 2 different loops) */
+       d_phys_page = i_phys_page = phys_page & ~(PAGE_SIZE - 1);
+
+       /*
+        * This part needs to be 100% ASM because we disable the MMU, and we
+        * can't accidentally let some C code go poking at memory while the
+        * MMU isn't enabled.
+        *
+        * NOTE: This looks blatantly unsafe with respect to interrupts.
+        *       Hopefully all the callers provide sufficient protection?
+        */
+       asm volatile(
+               /* First disable the MMU */
+               "mfmsr %[old_msr]\n\t"
+               "rlwinm %[tmp_msr], %[old_msr], 0, 28, 26\n\t"
+               "mtmsr %[tmp_msr]\n\t"
+               "isync\n\t"
+
+               /* Clean the data cache */
+               "mtctr %[d_per_page]\n"
+       "0:     dcbst 0, %[d_phys_page]\n\t"
+               "add %[d_phys_page], %[d_phys_page], %[d_size]\n\t"
+               "bdnz 0b\n\t"
+               "sync\n\t"
+
+               /* Invalidate the instruction cache */
+               "mtctr %[i_per_page]\n"
+       "0:     icbi 0, %[i_phys_page]\n\t"
+               "add %[i_phys_page], %[i_phys_page], %[i_size]\n\t"
+               "bdnz 0b\n\t"
+
+               /* Finally, re-enable the MMU */
+               "sync\n\t"
+               "mtmsr %[old_msr]\n\t"
+               "isync\n\t"
+
+               /* Temporary variables and inputs */
+               : [old_msr]    "=&r" (old_msr),
+                 [tmp_msr]    "=&r" (tmp_msr),
+                 [d_phys_page] "=b" (d_phys_page),
+                 [i_phys_page] "=b" (i_phys_page)
+
+               /* Inputs */
+               : [d_size]     "b" (d_size),
+                 [i_size]     "b" (i_size),
+                 [d_per_page] "b" (d_per_page),
+                 [i_per_page] "b" (i_per_page),
+                 "[d_phys_page]"  (d_phys_page),
+                 "[i_phys_page]"  (i_phys_page)
+
+               /* Clobbers */
+               : "memory", "c"
+       );
+}
+#endif /* CONFIG_PPC32 && !CONFIG_BOOKE */
+
+#ifdef CONFIG_PPC64
+/*
+ * Data cache flush that works on non-mapped physical addresses.
+ * Use only for non-LPAR setups ! It also assumes real mode
+ * is cacheable. Used for flushing out the DART before using
+ * it as uncacheable memory 
+ */
+void flush_dcache_phys_range(unsigned long start, unsigned long stop)
+{
+       /* System data cache block size */
+       unsigned long bytes = powerpc_caches.dcache_block_bytes;
+       unsigned long shift = powerpc_caches.dcache_block_shift;
+
+       /* Temporary registers for the ASM to use */
+       unsigned long old_msr, tmp_msr;
+
+       /* Compute a start address and number of cachelines */
+       unsigned long phys_addr = start & ~(bytes - 1);
+       unsigned long nr_lines = ((stop - phys_addr) + (bytes - 1)) >> shift;
+
+       /*
+        * This part needs to be 100% ASM because we disable the MMU, and we
+        * can't accidentally let some C code go poking at memory while the
+        * MMU isn't enabled.
+        *
+        * NOTE: This looks blatantly unsafe with respect to interrupts.
+        *       Hopefully all the callers provide sufficient protection?
+        */
+       asm volatile(
+               /* First disable the MMU */
+               "mfmsr %[old_msr]\n\t"
+               "rlwinm %[tmp_msr], %[old_msr], 0, 28, 26\n\t"
+               "mtmsr %[tmp_msr]\n\t"
+               "isync\n\t"
+
+               /* Clean the data cache */
+               "mtctr %[nr_lines]\n"
+       "0:     dcbst 0, %[phys_addr]\n\t"
+               "add %[phys_addr], %[phys_addr], %[bytes]\n\t"
+               "bdnz 0b\n\t"
+               "sync\n\t"
+               "isync\n\t"
+
+               /* Finally, re-enable the MMU */
+               "mtmsr %[old_msr]\n\t"
+               "sync\n\t"
+               "isync\n\t"
+
+               /* Temporary variables and inputs */
+               : [old_msr]  "=&r" (old_msr),
+                 [tmp_msr]  "=&r" (tmp_msr),
+                 [phys_addr] "=b" (phys_addr)
+
+               /* Inputs */
+               : [bytes]    "b" (bytes),
+                 [nr_lines] "b" (nr_lines),
+                 "[phys_addr]"  (phys_addr)
+
+               /* Clobbers */
+               : "memory", "c"
+       );
+}
+#endif /* CONFIG_PPC64 */
diff --git a/arch/powerpc/mm/dma-noncoherent.c 
b/arch/powerpc/mm/dma-noncoherent.c
index 329be36..3823f64 100644
--- a/arch/powerpc/mm/dma-noncoherent.c
+++ b/arch/powerpc/mm/dma-noncoherent.c
@@ -328,7 +328,7 @@ void __dma_sync(void *vaddr, size_t size, int direction)
                 * invalidate only when cache-line aligned otherwise there is
                 * the potential for discarding uncommitted data from the cache
                 */
-               if ((start & (L1_CACHE_BYTES - 1)) || (size & (L1_CACHE_BYTES - 
1)))
+               if ((start | size) & (powerpc_caches.dcache_block_bytes - 1))
                        flush_dcache_range(start, end);
                else
                        invalidate_dcache_range(start, end);
diff --git a/arch/powerpc/platforms/52xx/lite5200_sleep.S 
b/arch/powerpc/platforms/52xx/lite5200_sleep.S
index 08ab6fe..ac285d9 100644
--- a/arch/powerpc/platforms/52xx/lite5200_sleep.S
+++ b/arch/powerpc/platforms/52xx/lite5200_sleep.S
@@ -394,11 +394,16 @@ restore_regs:
 
 
 /* cache flushing code. copied from arch/ppc/boot/util.S */
-#define NUM_CACHE_LINES (128*8)
+#define NUM_CACHE_LINES ((128 * 8) << (L1_CACHE_SHIFT_MAX - 
L1_CACHE_SHIFT_MIN))
 
 /*
  * Flush data cache
  * Do this by just reading lots of stuff into the cache.
+ *
+ * NOTE: This does not handle variable-sized cachelines properly, but since
+ *       we are just trying to flush the data cache by reading lots of data,
+ *       this works anyways.  We just make sure we read as many cachelines
+ *       as we could possibly need to overflow the cache on any hardware.
  */
 flush_data_cache:
        lis     r3,CONFIG_KERNEL_START@h
@@ -407,6 +412,6 @@ flush_data_cache:
        mtctr   r4
 1:
        lwz     r4,0(r3)
-       addi    r3,r3,L1_CACHE_BYTES    /* Next line, please */
+       addi    r3,r3,L1_CACHE_BYTES_MIN /* Next line, please */
        bdnz    1b
        blr
diff --git a/arch/powerpc/platforms/powermac/pci.c 
b/arch/powerpc/platforms/powermac/pci.c
index 31a7d3a..8503e38 100644
--- a/arch/powerpc/platforms/powermac/pci.c
+++ b/arch/powerpc/platforms/powermac/pci.c
@@ -1135,7 +1135,7 @@ int pmac_pci_enable_device_hook(struct pci_dev *dev)
                pci_write_config_byte(dev, PCI_LATENCY_TIMER, 16);
 
                pci_write_config_byte(dev, PCI_CACHE_LINE_SIZE,
-                                     L1_CACHE_BYTES >> 2);
+                               powerpc_caches.dcache_block_bytes >> 2);
        }
 
        return 0;
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 03a217a..c537d49 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -26,6 +26,7 @@
 
 #include <asm/ptrace.h>
 #include <asm/string.h>
+#include <asm/cache.h>
 #include <asm/prom.h>
 #include <asm/machdep.h>
 #include <asm/xmon.h>
@@ -254,16 +255,6 @@ static inline void store_inst(void *p)
        asm volatile ("dcbst 0,%0; sync; icbi 0,%0; isync" : : "r" (p));
 }
 
-static inline void cflush(void *p)
-{
-       asm volatile ("dcbf 0,%0; icbi 0,%0" : : "r" (p));
-}
-
-static inline void cinval(void *p)
-{
-       asm volatile ("dcbi 0,%0; icbi 0,%0" : : "r" (p));
-}
-
 /*
  * Disable surveillance (the service processor watchdog function)
  * while we are in xmon.
@@ -1513,10 +1504,9 @@ static void prregs(struct pt_regs *fp)
 
 static void cacheflush(void)
 {
-       int cmd;
-       unsigned long nflush;
+       unsigned long nflush, i;
 
-       cmd = inchar();
+       int cmd = inchar();
        if (cmd != 'i')
                termch = cmd;
        scanhex((void *)&adrs);
@@ -1524,23 +1514,30 @@ static void cacheflush(void)
                termch = 0;
        nflush = 1;
        scanhex(&nflush);
-       nflush = (nflush + L1_CACHE_BYTES - 1) / L1_CACHE_BYTES;
-       if (setjmp(bus_error_jmp) == 0) {
-               catch_memory_errors = 1;
-               sync();
 
-               if (cmd != 'i') {
-                       for (; nflush > 0; --nflush, adrs += L1_CACHE_BYTES)
-                               cflush((void *) adrs);
-               } else {
-                       for (; nflush > 0; --nflush, adrs += L1_CACHE_BYTES)
-                               cinval((void *) adrs);
-               }
-               sync();
-               /* wait a little while to see if we get a machine check */
-               __delay(200);
+       if (setjmp(bus_error_jmp) != 0) {
+               catch_memory_errors = 0;
+               return;
        }
-       catch_memory_errors = 0;
+       catch_memory_errors = 1;
+       sync();
+
+       /* First flush/invalidate data caches */
+       if (cmd != 'i') {
+               FOR_EACH_CACHELINE(i, adrs, adrs + nflush, dcache)
+                       dcbf(i);
+       } else {
+               FOR_EACH_CACHELINE(i, adrs, adrs + nflush, dcache)
+                       dcbi(i);
+       }
+
+       /* Now invalidate instruction caches */
+       FOR_EACH_CACHELINE(i, adrs, adrs + nflush, icache)
+               icbi(i);
+
+       sync();
+       /* wait a little while to see if we get a machine check */
+       __delay(200);
 }
 
 static unsigned long
diff --git a/drivers/macintosh/smu.c b/drivers/macintosh/smu.c
index 116a49c..04ead15 100644
--- a/drivers/macintosh/smu.c
+++ b/drivers/macintosh/smu.c
@@ -136,7 +136,9 @@ static void smu_start_cmd(void)
        /* Flush command and data to RAM */
        faddr = (unsigned long)smu->cmd_buf;
        fend = faddr + smu->cmd_buf->length + 2;
-       flush_inval_dcache_range(faddr, fend);
+       flush_dcache_range(faddr, fend);
+       mb();
+       isync();
 
 
        /* We also disable NAP mode for the duration of the command
@@ -198,7 +200,9 @@ static irqreturn_t smu_db_intr(int irq, void *arg)
                 * reply length (it's only 2 cache lines anyway)
                 */
                faddr = (unsigned long)smu->cmd_buf;
-               flush_inval_dcache_range(faddr, faddr + 256);
+               flush_dcache_range(faddr, faddr + 256);
+               mb();
+               isync();
 
                /* Now check ack */
                ack = (~cmd->cmd) & 0xff;
-- 
1.7.2.5

_______________________________________________
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Reply via email to