This patch adds a 64-64 bit divider that supports ARMv4 and above.

Because clz (count leading zero) instruction is added until ARMv5, the
divider implements a clz function for ARMv4 targets.

The divider was tested with the following test driver code ran by
qemu-arm:

  int main(void)
  {
    uint64_t a, b, q, r;
    while (scanf("%llx %llx %llx %llx", &a, &b, &q, &r) > 0)
      printf("%016llx %016llx %016llx %016llx\n", a, b, a / b, a % b);
    return 0;
  }

Signed-off-by: Che-Liang Chiou <clch...@chromium.org>
Cc: Albert Aribaud <albert.u.b...@aribaud.net>
---
This patch is alos tested with `MAKEALL -a arm`

 arch/arm/lib/Makefile    |    1 +
 arch/arm/lib/_uldivmod.S |  266 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 267 insertions(+), 0 deletions(-)
 create mode 100644 arch/arm/lib/_uldivmod.S

diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile
index 300c8fa..31770dd 100644
--- a/arch/arm/lib/Makefile
+++ b/arch/arm/lib/Makefile
@@ -33,6 +33,7 @@ GLSOBJS       += _divsi3.o
 GLSOBJS        += _lshrdi3.o
 GLSOBJS        += _modsi3.o
 GLSOBJS        += _udivsi3.o
+GLSOBJS        += _uldivmod.o
 GLSOBJS        += _umodsi3.o
 
 GLCOBJS        += div0.o
diff --git a/arch/arm/lib/_uldivmod.S b/arch/arm/lib/_uldivmod.S
new file mode 100644
index 0000000..9e3a5e6
--- /dev/null
+++ b/arch/arm/lib/_uldivmod.S
@@ -0,0 +1,266 @@
+/*
+ * Copyright (c) 2011 The Chromium OS Authors.
+ * See file CREDITS for list of people who contributed to this
+ * project.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ * MA 02111-1307 USA
+ */
+
+/*
+ * A, Q = r0 + (r1 << 32)
+ * B, R = r2 + (r3 << 32)
+ * A / B = Q ... R
+ */
+
+       .text
+       .global __aeabi_uldivmod
+       .type   __aeabi_uldivmod, function
+       .align  0
+
+/* armv4 does not support clz (count leading zero) instruction. */
+#if __LINUX_ARM_ARCH__ <= 4
+#  define CLZ(dst, src)                bl      L_clz_ ## dst ## _ ## src
+#  define CLZEQ(dst, src)      bleq    L_clz_ ## dst ## _ ## src
+#else
+#  define CLZ(dst, src)                clz     dst, src
+#  define CLZEQ(dst, src)      clzeq   dst, src
+#endif
+
+A_0    .req    r0
+A_1    .req    r1
+B_0    .req    r2
+B_1    .req    r3
+C_0    .req    r4
+C_1    .req    r5
+D_0    .req    r6
+D_1    .req    r7
+
+Q_0    .req    r0
+Q_1    .req    r1
+R_0    .req    r2
+R_1    .req    r3
+
+__aeabi_uldivmod:
+       stmfd   sp!, {r4, r5, r6, r7, lr}
+       @ Test if B == 0
+       orrs    ip, B_0, B_1            @ Z set -> B == 0
+       beq     L_div_by_0
+       @ Test if B is power of 2: (B & (B - 1)) == 0
+       subs    C_0, B_0, #1
+       sbc     C_1, B_1, #0
+       tst     C_0, B_0
+       tsteq   B_1, C_1
+       beq     L_pow2
+       @ Test if A_1 == B_1 == 0
+       orrs    ip, A_1, B_1
+       beq     L_div_32_32
+
+L_div_64_64:
+       mov     C_0, #1
+       mov     C_1, #0
+       @ D_0 = clz A
+       CLZ(D_0, A_1)
+       teq     A_1, #0
+       CLZEQ(ip, A_0)
+       teq     A_1, #0
+       addeq   D_0, D_0, ip
+       @ D_1 = clz B
+       CLZ(D_1, B_1)
+       teq     B_1, #0
+       CLZEQ(ip, B_0)
+       teq     B_1, #0
+       addeq   D_1, D_1, ip
+       @ if clz B - clz A <= 0: goto L_done_shift
+       subs    D_0, D_1, D_0
+       bls     L_done_shift
+       subs    D_1, D_0, #32
+       rsb     ip, D_0, #32
+       @ B <<= (clz B - clz A)
+       movmi   B_1, B_1, lsl D_0
+       orrmi   B_1, B_1, B_0, lsr ip
+       movpl   B_1, B_0, lsl D_1
+       mov     B_0, B_0, lsl D_0
+       @ C = 1 << (clz B - clz A)
+       movmi   C_1, C_1, lsl D_0
+       orrmi   C_1, C_1, C_0, lsr ip
+       movpl   C_1, C_0, lsl D_1
+       mov     C_0, C_0, lsl D_0
+L_done_shift:
+       mov     D_0, #0
+       mov     D_1, #0
+       @ C: current bit; D: result
+L_subtract:
+       @ if A >= B
+       cmp     A_1, B_1
+       cmpeq   A_0, B_0
+       bcc     L_update
+       @ A -= B
+       subs    A_0, A_0, B_0
+       sbc     A_1, A_1, B_1
+       @ D |= C
+       orr     D_0, D_0, C_0
+       orr     D_1, D_1, C_1
+L_update:
+       @ if A == 0: break
+       orrs    ip, A_1, A_0
+       beq     L_exit
+       @ C >>= 1
+       movs    C_1, C_1, lsr #1
+       movs    C_0, C_0, rrx
+       @ if C == 0: break
+       orrs    ip, C_1, C_0
+       beq     L_exit
+       @ B >>= 1
+       movs    B_1, B_1, lsr #1
+       mov     B_0, B_0, rrx
+       b       L_subtract
+L_exit:
+       @ Note: A, B & Q, R are aliases
+       mov     R_0, A_0
+       mov     R_1, A_1
+       mov     Q_0, D_0
+       mov     Q_1, D_1
+       ldmfd   sp!, {r4, r5, r6, r7, pc}
+
+L_div_32_32:
+       @ Note: A_0 &   r0 are aliases
+       @       Q_1     r1
+       mov     r1, B_0
+       bl      __aeabi_uidivmod
+       mov     R_0, r1
+       mov     R_1, #0
+       mov     Q_1, #0
+       ldmfd   sp!, {r4, r5, r6, r7, pc}
+
+L_pow2:
+       @ Note: A, B and Q, R are aliases
+       @ R = A & (B - 1)
+       and     C_0, A_0, C_0
+       and     C_1, A_1, C_1
+       @ Q = A >> log2(B)
+       @ Note: B must not be 0 here!
+       CLZ(D_0, B_0)
+       add     D_1, D_0, #1
+       rsbs    D_0, D_0, #31
+       movpl   A_0, A_0, lsr D_0
+       orrpl   A_0, A_0, A_1, lsl D_1
+       bpl     L_1
+       CLZ(D_0, B_1)
+       rsb     D_0, D_0, #31
+       mov     A_0, A_1, lsr D_0
+       add     D_0, D_0, #32
+L_1:
+       mov     A_1, A_1, lsr D_0
+       @ Mov back C to R
+       mov     R_0, C_0
+       mov     R_1, C_1
+       ldmfd   sp!, {r4, r5, r6, r7, pc}
+
+L_div_by_0:
+       bl      __div0
+       @ As wrong as it could be
+       mov     Q_0, #0
+       mov     Q_1, #0
+       mov     R_0, #0
+       mov     R_1, #0
+       ldmfd   sp!, {r4, r5, r6, r7, pc}
+
+#if __LINUX_ARM_ARCH__ <= 4
+/*
+ * count leading zero
+ *
+ * input       : r0
+ * output      : r0
+ * destroy     : r1, r2, r3, r4, r5
+ */
+L_clz:
+       mov     r1, #0          // clz result
+       mov     r2, #0xf0000000 // mask
+       mov     r3, #28         // shift amount
+       adr     r4, L_clz_table
+L_clz_loop:
+       teq     r2, #0
+       beq     L_clz_loop_done
+       ands    r5, r0, r2
+       mov     r5, r5, lsr r3
+       ldrsb   r5, [r4, r5]
+       add     r1, r1, r5
+       mov     r2, r2, lsr #4
+       add     r3, r3, #-4
+       beq     L_clz_loop
+L_clz_loop_done:
+       mov     r0, r1
+       mov     pc, lr
+L_clz_table:
+       .byte   4
+       .byte   3
+       .byte   2
+       .byte   2
+       .byte   1
+       .byte   1
+       .byte   1
+       .byte   1
+       .byte   0
+       .byte   0
+       .byte   0
+       .byte   0
+       .byte   0
+       .byte   0
+       .byte   0
+       .byte   0
+
+L_clz_D_0_A_1:
+       stmfd   sp!, {r0, r1, r2, r3, r4, r5, lr}
+       mov     r0, A_1
+       bl      L_clz
+       mov     D_0, r0
+       ldmfd   sp!, {r0, r1, r2, r3, r4, r5, pc}
+
+L_clz_ip_A_0:
+       stmfd   sp!, {r0, r1, r2, r3, r4, r5, lr}
+       mov     r0, A_0
+       bl      L_clz
+       mov     ip, r0
+       ldmfd   sp!, {r0, r1, r2, r3, r4, r5, pc}
+
+L_clz_D_1_B_1:
+       stmfd   sp!, {r0, r1, r2, r3, r4, r5, lr}
+       mov     r0, B_1
+       bl      L_clz
+       mov     D_1, r0
+       ldmfd   sp!, {r0, r1, r2, r3, r4, r5, pc}
+
+L_clz_ip_B_0:
+       stmfd   sp!, {r0, r1, r2, r3, r4, r5, lr}
+       mov     r0, B_0
+       bl      L_clz
+       mov     ip, r0
+       ldmfd   sp!, {r0, r1, r2, r3, r4, r5, pc}
+
+L_clz_D_0_B_0:
+       stmfd   sp!, {r0, r1, r2, r3, r4, r5, lr}
+       mov     r0, B_0
+       bl      L_clz
+       mov     D_0, r0
+       ldmfd   sp!, {r0, r1, r2, r3, r4, r5, pc}
+
+L_clz_D_0_B_1:
+       stmfd   sp!, {r0, r1, r2, r3, r4, r5, lr}
+       mov     r0, B_1
+       bl      L_clz
+       mov     D_0, r0
+       ldmfd   sp!, {r0, r1, r2, r3, r4, r5, pc}
+#endif /* __LINUX_ARM_ARCH__  */
-- 
1.7.3.1

_______________________________________________
U-Boot mailing list
U-Boot@lists.denx.de
http://lists.denx.de/mailman/listinfo/u-boot

Reply via email to