From: Paul E. McKenney <paul...@linux.vnet.ibm.com>

The powerpc 64-bit __copy_tofrom_user() function uses shifts to handle
unaligned invocations.  However, these shifts were designed for
big-endian systems: On little-endian systems, they must shift in the
opposite direction.

This commit relies on the C preprocessor to insert the correct shifts
into the assembly code.

[ This is a rare but nasty LE issue. Most of the time we use the POWER7
optimised __copy_tofrom_user_power7 loop, but when it hits an exception
we fall back to the base __copy_tofrom_user loop. - Anton ]

Signed-off-by: Paul E. McKenney <paul...@linux.vnet.ibm.com>
Signed-off-by: Anton Blanchard <an...@samba.org>
---

diff --git a/arch/powerpc/lib/copyuser_64.S b/arch/powerpc/lib/copyuser_64.S
index d73a590..596a285 100644
--- a/arch/powerpc/lib/copyuser_64.S
+++ b/arch/powerpc/lib/copyuser_64.S
@@ -9,6 +9,14 @@
 #include <asm/processor.h>
 #include <asm/ppc_asm.h>
 
+#ifdef __BIG_ENDIAN__
+#define sLd sld                /* Shift towards low-numbered address. */
+#define sHd srd                /* Shift towards high-numbered address. */
+#else
+#define sLd srd                /* Shift towards low-numbered address. */
+#define sHd sld                /* Shift towards high-numbered address. */
+#endif
+
        .align  7
 _GLOBAL(__copy_tofrom_user)
 BEGIN_FTR_SECTION
@@ -118,10 +126,10 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
 
 24:    ld      r9,0(r4)        /* 3+2n loads, 2+2n stores */
 25:    ld      r0,8(r4)
-       sld     r6,r9,r10
+       sLd     r6,r9,r10
 26:    ldu     r9,16(r4)
-       srd     r7,r0,r11
-       sld     r8,r0,r10
+       sHd     r7,r0,r11
+       sLd     r8,r0,r10
        or      r7,r7,r6
        blt     cr6,79f
 27:    ld      r0,8(r4)
@@ -129,35 +137,35 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
 
 28:    ld      r0,0(r4)        /* 4+2n loads, 3+2n stores */
 29:    ldu     r9,8(r4)
-       sld     r8,r0,r10
+       sLd     r8,r0,r10
        addi    r3,r3,-8
        blt     cr6,5f
 30:    ld      r0,8(r4)
-       srd     r12,r9,r11
-       sld     r6,r9,r10
+       sHd     r12,r9,r11
+       sLd     r6,r9,r10
 31:    ldu     r9,16(r4)
        or      r12,r8,r12
-       srd     r7,r0,r11
-       sld     r8,r0,r10
+       sHd     r7,r0,r11
+       sLd     r8,r0,r10
        addi    r3,r3,16
        beq     cr6,78f
 
 1:     or      r7,r7,r6
 32:    ld      r0,8(r4)
 76:    std     r12,8(r3)
-2:     srd     r12,r9,r11
-       sld     r6,r9,r10
+2:     sHd     r12,r9,r11
+       sLd     r6,r9,r10
 33:    ldu     r9,16(r4)
        or      r12,r8,r12
 77:    stdu    r7,16(r3)
-       srd     r7,r0,r11
-       sld     r8,r0,r10
+       sHd     r7,r0,r11
+       sLd     r8,r0,r10
        bdnz    1b
 
 78:    std     r12,8(r3)
        or      r7,r7,r6
 79:    std     r7,16(r3)
-5:     srd     r12,r9,r11
+5:     sHd     r12,r9,r11
        or      r12,r8,r12
 80:    std     r12,24(r3)
        bne     6f
@@ -165,23 +173,38 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
        blr
 6:     cmpwi   cr1,r5,8
        addi    r3,r3,32
-       sld     r9,r9,r10
+       sLd     r9,r9,r10
        ble     cr1,7f
 34:    ld      r0,8(r4)
-       srd     r7,r0,r11
+       sHd     r7,r0,r11
        or      r9,r7,r9
 7:
        bf      cr7*4+1,1f
+#ifdef __BIG_ENDIAN__
        rotldi  r9,r9,32
+#endif
 94:    stw     r9,0(r3)
+#ifdef __LITTLE_ENDIAN__
+       rotrdi  r9,r9,32
+#endif
        addi    r3,r3,4
 1:     bf      cr7*4+2,2f
+#ifdef __BIG_ENDIAN__
        rotldi  r9,r9,16
+#endif
 95:    sth     r9,0(r3)
+#ifdef __LITTLE_ENDIAN__
+       rotrdi  r9,r9,16
+#endif
        addi    r3,r3,2
 2:     bf      cr7*4+3,3f
+#ifdef __BIG_ENDIAN__
        rotldi  r9,r9,8
+#endif
 96:    stb     r9,0(r3)
+#ifdef __LITTLE_ENDIAN__
+       rotrdi  r9,r9,8
+#endif
 3:     li      r3,0
        blr
 
_______________________________________________
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Reply via email to