Hi, This patch ports the aeabi_idiv routine from Linaro Cortex-Strings (https://git.linaro.org/toolchain/cortex-strings.git), which was contributed by ARM under Free BSD license.
The new aeabi_idiv routine is used to replace the one in libgcc/config/arm/lib1funcs.S. This replacement happens within the Thumb1 wrapper. The new routine is under LGPLv3 license. The main advantage of this version is that it can improve the performance of the aeabi_idiv function for Thumb1. This solution will also increase the code size. So it will only be used if __OPTIMIZE_SIZE__ is not defined. Make check passed for armv6-m. OK for trunk? Thanks, Hale Wang libgcc/ChangeLog: 2014-11-26 Hale Wang <hale.w...@arm.com> * config/arm/lib1funcs.S: Add new wrapper. =============================================== diff --git a/libgcc/config/arm/lib1funcs.S b/libgcc/config/arm/lib1funcs.S index b617137..de66c81 100644 --- a/libgcc/config/arm/lib1funcs.S +++ b/libgcc/config/arm/lib1funcs.S @@ -306,34 +306,12 @@ LSYM(Lend_fde): #ifdef __ARM_EABI__ .macro THUMB_LDIV0 name signed #if defined(__ARM_ARCH_6M__) - .ifc \signed, unsigned - cmp r0, #0 - beq 1f - mov r0, #0 - mvn r0, r0 @ 0xffffffff -1: - .else - cmp r0, #0 - beq 2f - blt 3f + + push {r0, lr} mov r0, #0 - mvn r0, r0 - lsr r0, r0, #1 @ 0x7fffffff - b 2f -3: mov r0, #0x80 - lsl r0, r0, #24 @ 0x80000000 -2: - .endif - push {r0, r1, r2} - ldr r0, 4f - adr r1, 4f - add r0, r1 - str r0, [sp, #8] - @ We know we are not on armv4t, so pop pc is safe. - pop {r0, r1, pc} - .align 2 -4: - .word __aeabi_idiv0 - 4b + bl SYM(__aeabi_idiv0) + pop {r1, pc} + #elif defined(__thumb2__) .syntax unified .ifc \signed, unsigned @@ -927,7 +905,158 @@ LSYM(Lover7): add dividend, work .endif LSYM(Lgot_result): -.endm +.endm + +#if defined(__prefer_thumb__) && !defined(__OPTIMIZE_SIZE__) +.macro BranchToDiv n, label + lsr curbit, dividend, \n + cmp curbit, divisor + blo \label +.endm + +.macro DoDiv n + lsr curbit, dividend, \n + cmp curbit, divisor + bcc 1f + lsl curbit, divisor, \n + sub dividend, dividend, curbit + +1: adc result, result +.endm + +.macro THUMB1_Div_Positive + mov result, #0 + BranchToDiv #1, LSYM(Lthumb1_div1) + BranchToDiv #4, LSYM(Lthumb1_div4) + BranchToDiv #8, LSYM(Lthumb1_div8) + BranchToDiv #12, LSYM(Lthumb1_div12) + BranchToDiv #16, LSYM(Lthumb1_div16) +LSYM(Lthumb1_div_large_positive): + mov result, #0xff + lsl divisor, divisor, #8 + rev result, result + lsr curbit, dividend, #16 + cmp curbit, divisor + blo 1f + asr result, #8 + lsl divisor, divisor, #8 + beq LSYM(Ldivbyzero_waypoint) + +1: lsr curbit, dividend, #12 + cmp curbit, divisor + blo LSYM(Lthumb1_div12) + b LSYM(Lthumb1_div16) +LSYM(Lthumb1_div_loop): + lsr divisor, divisor, #8 +LSYM(Lthumb1_div16): + Dodiv #15 + Dodiv #14 + Dodiv #13 + Dodiv #12 +LSYM(Lthumb1_div12): + Dodiv #11 + Dodiv #10 + Dodiv #9 + Dodiv #8 + bcs LSYM(Lthumb1_div_loop) +LSYM(Lthumb1_div8): + Dodiv #7 + Dodiv #6 + Dodiv #5 +LSYM(Lthumb1_div5): + Dodiv #4 +LSYM(Lthumb1_div4): + Dodiv #3 +LSYM(Lthumb1_div3): + Dodiv #2 +LSYM(Lthumb1_div2): + Dodiv #1 +LSYM(Lthumb1_div1): + sub divisor, dividend, divisor + bcs 1f + cpy divisor, dividend + +1: adc result, result + cpy dividend, result + RET + +LSYM(Ldivbyzero_waypoint): + b LSYM(Ldiv0) +.endm + +.macro THUMB1_Div_Negative + lsr result, divisor, #31 + beq 1f + neg divisor, divisor + +1: asr curbit, dividend, #32 + bcc 2f + neg dividend, dividend + +2: eor curbit, result + mov result, #0 + cpy ip, curbit + BranchToDiv #4, LSYM(Lthumb1_div_negative4) + BranchToDiv #8, LSYM(Lthumb1_div_negative8) +LSYM(Lthumb1_div_large): + mov result, #0xfc + lsl divisor, divisor, #6 + rev result, result + lsr curbit, dividend, #8 + cmp curbit, divisor + blo LSYM(Lthumb1_div_negative8) + + lsl divisor, divisor, #6 + asr result, result, #6 + cmp curbit, divisor + blo LSYM(Lthumb1_div_negative8) + + lsl divisor, divisor, #6 + asr result, result, #6 + cmp curbit, divisor + blo LSYM(Lthumb1_div_negative8) + + lsl divisor, divisor, #6 + beq LSYM(Ldivbyzero_negative) + asr result, result, #6 + b LSYM(Lthumb1_div_negative8) +LSYM(Lthumb1_div_negative_loop): + lsr divisor, divisor, #6 +LSYM(Lthumb1_div_negative8): + DoDiv #7 + DoDiv #6 + DoDiv #5 + DoDiv #4 +LSYM(Lthumb1_div_negative4): + DoDiv #3 + DoDiv #2 + bcs LSYM(Lthumb1_div_negative_loop) + DoDiv #1 + sub divisor, dividend, divisor + bcs 1f + cpy divisor, dividend + +1: cpy curbit, ip + adc result, result + asr curbit, curbit, #1 + cpy dividend, result + bcc 2f + neg dividend, dividend + cmp curbit, #0 + +2: bpl 3f + neg divisor, divisor + +3: RET + +LSYM(Ldivbyzero_negative): + cpy curbit, ip + asr curbit, curbit, #1 + bcc LSYM(Ldiv0) + neg dividend, dividend +.endm +#endif /* ARM Thumb version. */ + /* ------------------------------------------------------------------------ */ /* Start of the Real Functions */ /* ------------------------------------------------------------------------ */ @@ -937,6 +1066,7 @@ LSYM(Lgot_result): FUNC_START udivsi3 FUNC_ALIAS aeabi_uidiv udivsi3 +#if defined(__OPTIMIZE_SIZE__) cmp divisor, #0 beq LSYM(Ldiv0) @@ -954,6 +1084,13 @@ LSYM(udivsi3_skip_div0_test): pop { work } RET +#else + +LSYM(udivsi3_skip_div0_test): + THUMB1_Div_Positive + +#endif + #elif defined(__ARM_ARCH_EXT_IDIV__) ARM_FUNC_START udivsi3 @@ -1066,7 +1203,7 @@ LSYM(Lover10): RET #else /* ARM version. */ - + FUNC_START umodsi3 subs r2, r1, #1 @ compare divisor with 1 @@ -1091,8 +1228,9 @@ LSYM(Lover10): #if defined(__prefer_thumb__) - FUNC_START divsi3 + FUNC_START divsi3 FUNC_ALIAS aeabi_idiv divsi3 +#if defined(__OPTIMIZE_SIZE__) cmp divisor, #0 beq LSYM(Ldiv0) @@ -1115,7 +1253,7 @@ LSYM(Lover11): blo LSYM(Lgot_result) THUMB_DIV_MOD_BODY 0 - + mov r0, result mov work, ip cmp work, #0 @@ -1124,6 +1262,20 @@ LSYM(Lover11): LSYM(Lover12): pop { work } RET +#else + +LSYM(divsi3_skip_div0_test): + cpy curbit, dividend + orr curbit, divisor + bmi LSYM(Lthumb1_div_negative) + +LSYM(Lthumb1_div_positive): + THUMB1_Div_Positive + +LSYM(Lthumb1_div_negative): + THUMB1_Div_Negative + +#endif /* __OPTIMIZE_SIZE__ */ #elif defined(__ARM_ARCH_EXT_IDIV__) @@ -1136,8 +1288,8 @@ LSYM(Lover12): RET #else /* ARM/Thumb-2 version. */ - - ARM_FUNC_START divsi3 + + ARM_FUNC_START divsi3 ARM_FUNC_ALIAS aeabi_idiv divsi3 cmp r1, #0 ===============================================
div-3.patch
Description: Binary data