From: Ian Munsie <imun...@au1.ibm.com>

The 32bit PowerPC ABI states that when passing arguments and return
values via registers a value of type long long is stored in pairs of
registers as follows:

The lower addressed word is stored in the next available odd numbered
register and the higher addressed value is stored in register+1.

i.e. the values will either be stored in the next available of:
r3/r4, r5/r6, r7/r8 or r9/r10

Since the lower addressed value must be in the lower register number we
have an endianness issue and need to treat this specially in any
assembly that is passed or returns a 64bit value.

This patch introduces some aliases in ppc_asm.h which will select the
appropriate register from the pair depending on the CPU endianness.
There are in the form of r34l for the low word from the r3/r4 pair and
r34h for the high word and so on for the remaining register pairs.

It also introduces p64l and p64h which can be used to select the
appropriate offset whenever loading a 32bit word while referring to the
address of a 64bit value. For instance if r3 contains the address of a
64bit value the following assembly would load the high word into r5 and
the low word into r6 regardless of endianness:
        lwz     r5,p64h(r3)
        lwz     r6,p64l(r3)

Finally, the patch also alters the functions in misc_32.S that take
64bit arguments to use these new accessors to work on the little endian
PowerPC architecture:

mulhdu, __div64_32, __ashrdi3, __ashldi3, __lshrdi3 and __ucmpdi2

Signed-off-by: Ian Munsie <imun...@au1.ibm.com>
---
 arch/powerpc/include/asm/ppc_asm.h |   24 ++++++++++++
 arch/powerpc/kernel/misc_32.S      |   72 ++++++++++++++++++------------------
 arch/powerpc/lib/div64.S           |    8 ++--
 3 files changed, 64 insertions(+), 40 deletions(-)

diff --git a/arch/powerpc/include/asm/ppc_asm.h 
b/arch/powerpc/include/asm/ppc_asm.h
index 9821006..6929483 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -510,6 +510,30 @@ END_FTR_SECTION_IFCLR(CPU_FTR_601)
 #define        r30     30
 #define        r31     31
 
+/* Endian agnostic accessors for 64 bit values passed and returned in GPRs */
+#ifdef __BIG_ENDIAN__
+#define r34l   r4
+#define r34h   r3
+#define r56l   r6
+#define r56h   r5
+#define r78l   r8
+#define r78h   r7
+
+/* Endian agnostic accessors for pointer offsets to 64 bit values */
+#define p64l   4
+#define p64h   0
+#else
+#define r34l   r3
+#define r34h   r4
+#define r56l   r5
+#define r56h   r6
+#define r78l   r7
+#define r78h   r8
+
+#define p64l   0
+#define p64h   4
+#endif
+
 
 /* Floating Point Registers (FPRs) */
 
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index a7a570d..6c40079 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -60,27 +60,27 @@ _GLOBAL(call_handle_irq)
  * This returns the high 64 bits of the product of two 64-bit numbers.
  */
 _GLOBAL(mulhdu)
-       cmpwi   r6,0
-       cmpwi   cr1,r3,0
-       mr      r10,r4
-       mulhwu  r4,r4,r5
+       cmpwi   r56l,0
+       cmpwi   cr1,r34h,0
+       mr      r10,r34l
+       mulhwu  r34l,r34l,r56h
        beq     1f
-       mulhwu  r0,r10,r6
-       mullw   r7,r10,r5
+       mulhwu  r0,r10,r56l
+       mullw   r7,r10,r56h
        addc    r7,r0,r7
-       addze   r4,r4
+       addze   r34l,r34l
 1:     beqlr   cr1             /* all done if high part of A is 0 */
-       mr      r10,r3
-       mullw   r9,r3,r5
-       mulhwu  r3,r3,r5
+       mr      r10,r34h
+       mullw   r9,r34h,r56h
+       mulhwu  r34h,r34h,r56h
        beq     2f
-       mullw   r0,r10,r6
-       mulhwu  r8,r10,r6
+       mullw   r0,r10,r56l
+       mulhwu  r8,r10,r56l
        addc    r7,r0,r7
-       adde    r4,r4,r8
-       addze   r3,r3
-2:     addc    r4,r4,r9
-       addze   r3,r3
+       adde    r34l,r34l,r8
+       addze   r34h,r34h
+2:     addc    r34l,r34l,r9
+       addze   r34h,r34h
        blr
 
 /*
@@ -606,37 +606,37 @@ _GLOBAL(atomic_set_mask)
  */
 _GLOBAL(__ashrdi3)
        subfic  r6,r5,32
-       srw     r4,r4,r5        # LSW = count > 31 ? 0 : LSW >> count
+       srw     r34l,r34l,r5    # LSW = count > 31 ? 0 : LSW >> count
        addi    r7,r5,32        # could be xori, or addi with -32
-       slw     r6,r3,r6        # t1 = count > 31 ? 0 : MSW << (32-count)
+       slw     r6,r34h,r6      # t1 = count > 31 ? 0 : MSW << (32-count)
        rlwinm  r8,r7,0,32      # t3 = (count < 32) ? 32 : 0
-       sraw    r7,r3,r7        # t2 = MSW >> (count-32)
-       or      r4,r4,r6        # LSW |= t1
+       sraw    r7,r34h,r7      # t2 = MSW >> (count-32)
+       or      r34l,r34l,r6    # LSW |= t1
        slw     r7,r7,r8        # t2 = (count < 32) ? 0 : t2
-       sraw    r3,r3,r5        # MSW = MSW >> count
-       or      r4,r4,r7        # LSW |= t2
+       sraw    r34h,r34h,r5    # MSW = MSW >> count
+       or      r34l,r34l,r7    # LSW |= t2
        blr
 
 _GLOBAL(__ashldi3)
        subfic  r6,r5,32
-       slw     r3,r3,r5        # MSW = count > 31 ? 0 : MSW << count
+       slw     r34h,r34h,r5    # MSW = count > 31 ? 0 : MSW << count
        addi    r7,r5,32        # could be xori, or addi with -32
-       srw     r6,r4,r6        # t1 = count > 31 ? 0 : LSW >> (32-count)
-       slw     r7,r4,r7        # t2 = count < 32 ? 0 : LSW << (count-32)
-       or      r3,r3,r6        # MSW |= t1
-       slw     r4,r4,r5        # LSW = LSW << count
-       or      r3,r3,r7        # MSW |= t2
+       srw     r6,r34l,r6      # t1 = count > 31 ? 0 : LSW >> (32-count)
+       slw     r7,r34l,r7      # t2 = count < 32 ? 0 : LSW << (count-32)
+       or      r34h,r34h,r6    # MSW |= t1
+       slw     r34l,r34l,r5    # LSW = LSW << count
+       or      r34h,r34h,r7    # MSW |= t2
        blr
 
 _GLOBAL(__lshrdi3)
        subfic  r6,r5,32
-       srw     r4,r4,r5        # LSW = count > 31 ? 0 : LSW >> count
+       srw     r34l,r34l,r5    # LSW = count > 31 ? 0 : LSW >> count
        addi    r7,r5,32        # could be xori, or addi with -32
-       slw     r6,r3,r6        # t1 = count > 31 ? 0 : MSW << (32-count)
-       srw     r7,r3,r7        # t2 = count < 32 ? 0 : MSW >> (count-32)
-       or      r4,r4,r6        # LSW |= t1
-       srw     r3,r3,r5        # MSW = MSW >> count
-       or      r4,r4,r7        # LSW |= t2
+       slw     r6,r34h,r6      # t1 = count > 31 ? 0 : MSW << (32-count)
+       srw     r7,r34h,r7      # t2 = count < 32 ? 0 : MSW >> (count-32)
+       or      r34l,r34l,r6    # LSW |= t1
+       srw     r34h,r34h,r5    # MSW = MSW >> count
+       or      r34l,r34l,r7    # LSW |= t2
        blr
 
 /*
@@ -644,10 +644,10 @@ _GLOBAL(__lshrdi3)
  * Returns 0 if a < b, 1 if a == b, 2 if a > b.
  */
 _GLOBAL(__ucmpdi2)
-       cmplw   r3,r5
+       cmplw   r34h,r56h
        li      r3,1
        bne     1f
-       cmplw   r4,r6
+       cmplw   r34l,r56l
        beqlr
 1:     li      r3,0
        bltlr
diff --git a/arch/powerpc/lib/div64.S b/arch/powerpc/lib/div64.S
index 83d9832..12f2da4 100644
--- a/arch/powerpc/lib/div64.S
+++ b/arch/powerpc/lib/div64.S
@@ -17,8 +17,8 @@
 #include <asm/processor.h>
 
 _GLOBAL(__div64_32)
-       lwz     r5,0(r3)        # get the dividend into r5/r6
-       lwz     r6,4(r3)
+       lwz     r5,p64h(r3)     # get the dividend into r5/r6
+       lwz     r6,p64l(r3)
        cmplw   r5,r4
        li      r7,0
        li      r8,0
@@ -53,7 +53,7 @@ _GLOBAL(__div64_32)
        mullw   r10,r0,r4       # and get the remainder
        add     r8,r8,r0
        subf    r6,r10,r6
-4:     stw     r7,0(r3)        # return the quotient in *r3
-       stw     r8,4(r3)
+4:     stw     r7,p64h(r3)     # return the quotient in *r3
+       stw     r8,p64l(r3)
        mr      r3,r6           # return the remainder in r3
        blr
-- 
1.7.1

_______________________________________________
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Reply via email to