This patch adds a 64-64 bit divider that supports ARMv4 and above. Because clz (count leading zero) instruction is added until ARMv5, the divider implements a clz function for ARMv4 targets.
The divider was tested with the following test driver code ran by qemu-arm: int main(void) { uint64_t a, b, q, r; while (scanf("%llx %llx %llx %llx", &a, &b, &q, &r) > 0) printf("%016llx %016llx %016llx %016llx\n", a, b, a / b, a % b); return 0; } Signed-off-by: Che-Liang Chiou <clch...@chromium.org> Cc: Albert Aribaud <albert.u.b...@aribaud.net> --- This patch is alos tested with `MAKEALL -a arm` arch/arm/lib/Makefile | 1 + arch/arm/lib/_uldivmod.S | 266 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 267 insertions(+), 0 deletions(-) create mode 100644 arch/arm/lib/_uldivmod.S diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile index 300c8fa..31770dd 100644 --- a/arch/arm/lib/Makefile +++ b/arch/arm/lib/Makefile @@ -33,6 +33,7 @@ GLSOBJS += _divsi3.o GLSOBJS += _lshrdi3.o GLSOBJS += _modsi3.o GLSOBJS += _udivsi3.o +GLSOBJS += _uldivmod.o GLSOBJS += _umodsi3.o GLCOBJS += div0.o diff --git a/arch/arm/lib/_uldivmod.S b/arch/arm/lib/_uldivmod.S new file mode 100644 index 0000000..9e3a5e6 --- /dev/null +++ b/arch/arm/lib/_uldivmod.S @@ -0,0 +1,266 @@ +/* + * Copyright (c) 2011 The Chromium OS Authors. + * See file CREDITS for list of people who contributed to this + * project. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, + * MA 02111-1307 USA + */ + +/* + * A, Q = r0 + (r1 << 32) + * B, R = r2 + (r3 << 32) + * A / B = Q ... R + */ + + .text + .global __aeabi_uldivmod + .type __aeabi_uldivmod, function + .align 0 + +/* armv4 does not support clz (count leading zero) instruction. */ +#if __LINUX_ARM_ARCH__ <= 4 +# define CLZ(dst, src) bl L_clz_ ## dst ## _ ## src +# define CLZEQ(dst, src) bleq L_clz_ ## dst ## _ ## src +#else +# define CLZ(dst, src) clz dst, src +# define CLZEQ(dst, src) clzeq dst, src +#endif + +A_0 .req r0 +A_1 .req r1 +B_0 .req r2 +B_1 .req r3 +C_0 .req r4 +C_1 .req r5 +D_0 .req r6 +D_1 .req r7 + +Q_0 .req r0 +Q_1 .req r1 +R_0 .req r2 +R_1 .req r3 + +__aeabi_uldivmod: + stmfd sp!, {r4, r5, r6, r7, lr} + @ Test if B == 0 + orrs ip, B_0, B_1 @ Z set -> B == 0 + beq L_div_by_0 + @ Test if B is power of 2: (B & (B - 1)) == 0 + subs C_0, B_0, #1 + sbc C_1, B_1, #0 + tst C_0, B_0 + tsteq B_1, C_1 + beq L_pow2 + @ Test if A_1 == B_1 == 0 + orrs ip, A_1, B_1 + beq L_div_32_32 + +L_div_64_64: + mov C_0, #1 + mov C_1, #0 + @ D_0 = clz A + CLZ(D_0, A_1) + teq A_1, #0 + CLZEQ(ip, A_0) + teq A_1, #0 + addeq D_0, D_0, ip + @ D_1 = clz B + CLZ(D_1, B_1) + teq B_1, #0 + CLZEQ(ip, B_0) + teq B_1, #0 + addeq D_1, D_1, ip + @ if clz B - clz A <= 0: goto L_done_shift + subs D_0, D_1, D_0 + bls L_done_shift + subs D_1, D_0, #32 + rsb ip, D_0, #32 + @ B <<= (clz B - clz A) + movmi B_1, B_1, lsl D_0 + orrmi B_1, B_1, B_0, lsr ip + movpl B_1, B_0, lsl D_1 + mov B_0, B_0, lsl D_0 + @ C = 1 << (clz B - clz A) + movmi C_1, C_1, lsl D_0 + orrmi C_1, C_1, C_0, lsr ip + movpl C_1, C_0, lsl D_1 + mov C_0, C_0, lsl D_0 +L_done_shift: + mov D_0, #0 + mov D_1, #0 + @ C: current bit; D: result +L_subtract: + @ if A >= B + cmp A_1, B_1 + cmpeq A_0, B_0 + bcc L_update + @ A -= B + subs A_0, A_0, B_0 + sbc A_1, A_1, B_1 + @ D |= C + orr D_0, D_0, C_0 + orr D_1, D_1, C_1 +L_update: + @ if A == 0: break + orrs ip, A_1, A_0 + beq L_exit + @ C >>= 1 + movs C_1, C_1, lsr #1 + movs C_0, C_0, rrx + @ if C == 0: break + orrs ip, C_1, C_0 + beq L_exit + @ B >>= 1 + movs B_1, B_1, lsr #1 + mov B_0, B_0, rrx + b L_subtract +L_exit: + @ Note: A, B & Q, R are aliases + mov R_0, A_0 + mov R_1, A_1 + mov Q_0, D_0 + mov Q_1, D_1 + ldmfd sp!, {r4, r5, r6, r7, pc} + +L_div_32_32: + @ Note: A_0 & r0 are aliases + @ Q_1 r1 + mov r1, B_0 + bl __aeabi_uidivmod + mov R_0, r1 + mov R_1, #0 + mov Q_1, #0 + ldmfd sp!, {r4, r5, r6, r7, pc} + +L_pow2: + @ Note: A, B and Q, R are aliases + @ R = A & (B - 1) + and C_0, A_0, C_0 + and C_1, A_1, C_1 + @ Q = A >> log2(B) + @ Note: B must not be 0 here! + CLZ(D_0, B_0) + add D_1, D_0, #1 + rsbs D_0, D_0, #31 + movpl A_0, A_0, lsr D_0 + orrpl A_0, A_0, A_1, lsl D_1 + bpl L_1 + CLZ(D_0, B_1) + rsb D_0, D_0, #31 + mov A_0, A_1, lsr D_0 + add D_0, D_0, #32 +L_1: + mov A_1, A_1, lsr D_0 + @ Mov back C to R + mov R_0, C_0 + mov R_1, C_1 + ldmfd sp!, {r4, r5, r6, r7, pc} + +L_div_by_0: + bl __div0 + @ As wrong as it could be + mov Q_0, #0 + mov Q_1, #0 + mov R_0, #0 + mov R_1, #0 + ldmfd sp!, {r4, r5, r6, r7, pc} + +#if __LINUX_ARM_ARCH__ <= 4 +/* + * count leading zero + * + * input : r0 + * output : r0 + * destroy : r1, r2, r3, r4, r5 + */ +L_clz: + mov r1, #0 // clz result + mov r2, #0xf0000000 // mask + mov r3, #28 // shift amount + adr r4, L_clz_table +L_clz_loop: + teq r2, #0 + beq L_clz_loop_done + ands r5, r0, r2 + mov r5, r5, lsr r3 + ldrsb r5, [r4, r5] + add r1, r1, r5 + mov r2, r2, lsr #4 + add r3, r3, #-4 + beq L_clz_loop +L_clz_loop_done: + mov r0, r1 + mov pc, lr +L_clz_table: + .byte 4 + .byte 3 + .byte 2 + .byte 2 + .byte 1 + .byte 1 + .byte 1 + .byte 1 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + +L_clz_D_0_A_1: + stmfd sp!, {r0, r1, r2, r3, r4, r5, lr} + mov r0, A_1 + bl L_clz + mov D_0, r0 + ldmfd sp!, {r0, r1, r2, r3, r4, r5, pc} + +L_clz_ip_A_0: + stmfd sp!, {r0, r1, r2, r3, r4, r5, lr} + mov r0, A_0 + bl L_clz + mov ip, r0 + ldmfd sp!, {r0, r1, r2, r3, r4, r5, pc} + +L_clz_D_1_B_1: + stmfd sp!, {r0, r1, r2, r3, r4, r5, lr} + mov r0, B_1 + bl L_clz + mov D_1, r0 + ldmfd sp!, {r0, r1, r2, r3, r4, r5, pc} + +L_clz_ip_B_0: + stmfd sp!, {r0, r1, r2, r3, r4, r5, lr} + mov r0, B_0 + bl L_clz + mov ip, r0 + ldmfd sp!, {r0, r1, r2, r3, r4, r5, pc} + +L_clz_D_0_B_0: + stmfd sp!, {r0, r1, r2, r3, r4, r5, lr} + mov r0, B_0 + bl L_clz + mov D_0, r0 + ldmfd sp!, {r0, r1, r2, r3, r4, r5, pc} + +L_clz_D_0_B_1: + stmfd sp!, {r0, r1, r2, r3, r4, r5, lr} + mov r0, B_1 + bl L_clz + mov D_0, r0 + ldmfd sp!, {r0, r1, r2, r3, r4, r5, pc} +#endif /* __LINUX_ARM_ARCH__ */ -- 1.7.3.1 _______________________________________________ U-Boot mailing list U-Boot@lists.denx.de http://lists.denx.de/mailman/listinfo/u-boot