On 07/07/15 16:23, Tejas Belagod wrote:
> Ping!
> 

I've had a look at this (sorry for the delay).  I think it's mostly OK,
but I have two comments to make.

1) It's quite hard to understand the algorithm and there are no comments
to aid understanding (to be fair, there aren't many comments on the
other algorithms either).

2) It looks as though the new code calculates both the division and the
modulus simultaneously.  As such, it means that the the divmod code
should be simplified to share the same code as the division function
itself (saving a few bytes, and more importantly several cycles when
modulus is required).

Can you please run some tests to validate 2) above?  and if correct
adjust the code to handle this case.  I think that will go some way to
mitigating the code size increase from the new implementation.

R.

> On 30/04/15 10:40, Hale Wang wrote:
>>> -----Original Message-----
>>> From: Hale Wang [mailto:hale.w...@arm.com]
>>> Sent: Monday, February 09, 2015 9:54 AM
>>> To: Richard Earnshaw
>>> Cc: Hale Wang; gcc-patches; Matthew Gretton-Dann
>>> Subject: RE: [Ping^2] [PATCH, ARM, libgcc] New aeabi_idiv function for
>>> armv6-m
>>>
>>> Ping https://gcc.gnu.org/ml/gcc-patches/2014-12/msg01059.html.
>>>
>>
>> Ping for trunk. Is it ok for trunk now?
>>
>> Thanks,
>> Hale
>>>> -----Original Message-----
>>>> From: gcc-patches-ow...@gcc.gnu.org [mailto:gcc-patches-
>>>> ow...@gcc.gnu.org] On Behalf Of Hale Wang
>>>> Sent: Friday, December 12, 2014 9:36 AM
>>>> To: gcc-patches
>>>> Subject: RE: [Ping] [PATCH, ARM, libgcc] New aeabi_idiv function for
>>>> armv6- m
>>>>
>>>> Ping? Already applied to arm/embedded-4_9-branch, is it OK for trunk?
>>>>
>>>> -Hale
>>>>
>>>>> -----Original Message-----
>>>>> From: Joey Ye [mailto:joey.ye...@gmail.com]
>>>>> Sent: Thursday, November 27, 2014 10:01 AM
>>>>> To: Hale Wang
>>>>> Cc: gcc-patches
>>>>> Subject: Re: [PATCH, ARM, libgcc] New aeabi_idiv function for
>>>>> armv6-m
>>>>>
>>>>> OK applying to arm/embedded-4_9-branch, though you still need
>>>>> maintainer approval into trunk.
>>>>>
>>>>> - Joey
>>>>>
>>>>> On Wed, Nov 26, 2014 at 11:43 AM, Hale Wang <hale.w...@arm.com>
>>>> wrote:
>>>>>> Hi,
>>>>>>
>>>>>> This patch ports the aeabi_idiv routine from Linaro Cortex-Strings
>>>>>> (https://git.linaro.org/toolchain/cortex-strings.git), which was
>>>>>> contributed by ARM under Free BSD license.
>>>>>>
>>>>>> The new aeabi_idiv routine is used to replace the one in
>>>>>> libgcc/config/arm/lib1funcs.S. This replacement happens within the
>>>>>> Thumb1 wrapper. The new routine is under LGPLv3 license.
>>>>>>
>>>>>> The main advantage of this version is that it can improve the
>>>>>> performance of the aeabi_idiv function for Thumb1. This solution
>>>>>> will also increase the code size. So it will only be used if
>>>>>> __OPTIMIZE_SIZE__ is
>>>>> not defined.
>>>>>>
>>>>>> Make check passed for armv6-m.
>>>>>>
>>>>>> OK for trunk?
>>>>>>
>>>>>> Thanks,
>>>>>> Hale Wang
>>>>>>
>>>>>> libgcc/ChangeLog:
>>>>>>
>>>>>> 2014-11-26  Hale Wang  <hale.w...@arm.com>
>>>>>>
>>>>>>          * config/arm/lib1funcs.S: Add new wrapper.
>>>>>>
>>>>>> ===============================================
>>>>>> diff --git a/libgcc/config/arm/lib1funcs.S
>>>>>> b/libgcc/config/arm/lib1funcs.S index b617137..de66c81 100644
>>>>>> --- a/libgcc/config/arm/lib1funcs.S
>>>>>> +++ b/libgcc/config/arm/lib1funcs.S
>>>>>> @@ -306,34 +306,12 @@ LSYM(Lend_fde):
>>>>>>   #ifdef __ARM_EABI__
>>>>>>   .macro THUMB_LDIV0 name signed
>>>>>>   #if defined(__ARM_ARCH_6M__)
>>>>>> -       .ifc \signed, unsigned
>>>>>> -       cmp     r0, #0
>>>>>> -       beq     1f
>>>>>> -       mov     r0, #0
>>>>>> -       mvn     r0, r0          @ 0xffffffff
>>>>>> -1:
>>>>>> -       .else
>>>>>> -       cmp     r0, #0
>>>>>> -       beq     2f
>>>>>> -       blt     3f
>>>>>> +
>>>>>> +       push    {r0, lr}
>>>>>>          mov     r0, #0
>>>>>> -       mvn     r0, r0
>>>>>> -       lsr     r0, r0, #1      @ 0x7fffffff
>>>>>> -       b       2f
>>>>>> -3:     mov     r0, #0x80
>>>>>> -       lsl     r0, r0, #24     @ 0x80000000
>>>>>> -2:
>>>>>> -       .endif
>>>>>> -       push    {r0, r1, r2}
>>>>>> -       ldr     r0, 4f
>>>>>> -       adr     r1, 4f
>>>>>> -       add     r0, r1
>>>>>> -       str     r0, [sp, #8]
>>>>>> -       @ We know we are not on armv4t, so pop pc is safe.
>>>>>> -       pop     {r0, r1, pc}
>>>>>> -       .align  2
>>>>>> -4:
>>>>>> -       .word   __aeabi_idiv0 - 4b
>>>>>> +       bl      SYM(__aeabi_idiv0)
>>>>>> +       pop     {r1, pc}
>>>>>> +
>>>>>>   #elif defined(__thumb2__)
>>>>>>          .syntax unified
>>>>>>          .ifc \signed, unsigned
>>>>>> @@ -927,7 +905,158 @@ LSYM(Lover7):
>>>>>>          add     dividend, work
>>>>>>     .endif
>>>>>>   LSYM(Lgot_result):
>>>>>> -.endm
>>>>>> +.endm
>>>>>> +
>>>>>> +#if defined(__prefer_thumb__)
>>>>> && !defined(__OPTIMIZE_SIZE__) .macro
>>>>>> +BranchToDiv n, label
>>>>>> +       lsr     curbit, dividend, \n
>>>>>> +       cmp     curbit, divisor
>>>>>> +       blo     \label
>>>>>> +.endm
>>>>>> +
>>>>>> +.macro DoDiv n
>>>>>> +       lsr     curbit, dividend, \n
>>>>>> +       cmp     curbit, divisor
>>>>>> +       bcc     1f
>>>>>> +       lsl     curbit, divisor, \n
>>>>>> +       sub     dividend, dividend, curbit
>>>>>> +
>>>>>> +1:     adc     result, result
>>>>>> +.endm
>>>>>> +
>>>>>> +.macro THUMB1_Div_Positive
>>>>>> +       mov     result, #0
>>>>>> +       BranchToDiv #1, LSYM(Lthumb1_div1)
>>>>>> +       BranchToDiv #4, LSYM(Lthumb1_div4)
>>>>>> +       BranchToDiv #8, LSYM(Lthumb1_div8)
>>>>>> +       BranchToDiv #12, LSYM(Lthumb1_div12)
>>>>>> +       BranchToDiv #16, LSYM(Lthumb1_div16)
>>>>>> +LSYM(Lthumb1_div_large_positive):
>>>>>> +       mov     result, #0xff
>>>>>> +       lsl     divisor, divisor, #8
>>>>>> +       rev     result, result
>>>>>> +       lsr     curbit, dividend, #16
>>>>>> +       cmp     curbit, divisor
>>>>>> +       blo     1f
>>>>>> +       asr     result, #8
>>>>>> +       lsl     divisor, divisor, #8
>>>>>> +       beq     LSYM(Ldivbyzero_waypoint)
>>>>>> +
>>>>>> +1:     lsr     curbit, dividend, #12
>>>>>> +       cmp     curbit, divisor
>>>>>> +       blo     LSYM(Lthumb1_div12)
>>>>>> +       b       LSYM(Lthumb1_div16)
>>>>>> +LSYM(Lthumb1_div_loop):
>>>>>> +       lsr     divisor, divisor, #8
>>>>>> +LSYM(Lthumb1_div16):
>>>>>> +       Dodiv   #15
>>>>>> +       Dodiv   #14
>>>>>> +       Dodiv   #13
>>>>>> +       Dodiv   #12
>>>>>> +LSYM(Lthumb1_div12):
>>>>>> +       Dodiv   #11
>>>>>> +       Dodiv   #10
>>>>>> +       Dodiv   #9
>>>>>> +       Dodiv   #8
>>>>>> +       bcs     LSYM(Lthumb1_div_loop)
>>>>>> +LSYM(Lthumb1_div8):
>>>>>> +       Dodiv   #7
>>>>>> +       Dodiv   #6
>>>>>> +       Dodiv   #5
>>>>>> +LSYM(Lthumb1_div5):
>>>>>> +       Dodiv   #4
>>>>>> +LSYM(Lthumb1_div4):
>>>>>> +       Dodiv   #3
>>>>>> +LSYM(Lthumb1_div3):
>>>>>> +       Dodiv   #2
>>>>>> +LSYM(Lthumb1_div2):
>>>>>> +       Dodiv   #1
>>>>>> +LSYM(Lthumb1_div1):
>>>>>> +       sub     divisor, dividend, divisor
>>>>>> +       bcs     1f
>>>>>> +       cpy     divisor, dividend
>>>>>> +
>>>>>> +1:     adc     result, result
>>>>>> +       cpy     dividend, result
>>>>>> +       RET
>>>>>> +
>>>>>> +LSYM(Ldivbyzero_waypoint):
>>>>>> +       b       LSYM(Ldiv0)
>>>>>> +.endm
>>>>>> +
>>>>>> +.macro THUMB1_Div_Negative
>>>>>> +       lsr     result, divisor, #31
>>>>>> +       beq     1f
>>>>>> +       neg     divisor, divisor
>>>>>> +
>>>>>> +1:     asr     curbit, dividend, #32
>>>>>> +       bcc     2f
>>>>>> +       neg     dividend, dividend
>>>>>> +
>>>>>> +2:     eor     curbit, result
>>>>>> +       mov     result, #0
>>>>>> +       cpy     ip, curbit
>>>>>> +       BranchToDiv #4, LSYM(Lthumb1_div_negative4)
>>>>>> +       BranchToDiv #8, LSYM(Lthumb1_div_negative8)
>>>>>> +LSYM(Lthumb1_div_large):
>>>>>> +       mov     result, #0xfc
>>>>>> +       lsl     divisor, divisor, #6
>>>>>> +       rev     result, result
>>>>>> +       lsr     curbit, dividend, #8
>>>>>> +       cmp     curbit, divisor
>>>>>> +       blo     LSYM(Lthumb1_div_negative8)
>>>>>> +
>>>>>> +       lsl     divisor, divisor, #6
>>>>>> +       asr     result, result, #6
>>>>>> +       cmp     curbit, divisor
>>>>>> +       blo     LSYM(Lthumb1_div_negative8)
>>>>>> +
>>>>>> +       lsl     divisor, divisor, #6
>>>>>> +       asr     result, result, #6
>>>>>> +       cmp     curbit, divisor
>>>>>> +       blo     LSYM(Lthumb1_div_negative8)
>>>>>> +
>>>>>> +       lsl     divisor, divisor, #6
>>>>>> +       beq     LSYM(Ldivbyzero_negative)
>>>>>> +       asr     result, result, #6
>>>>>> +       b       LSYM(Lthumb1_div_negative8)
>>>>>> +LSYM(Lthumb1_div_negative_loop):
>>>>>> +       lsr     divisor, divisor, #6
>>>>>> +LSYM(Lthumb1_div_negative8):
>>>>>> +       DoDiv   #7
>>>>>> +       DoDiv   #6
>>>>>> +       DoDiv   #5
>>>>>> +       DoDiv   #4
>>>>>> +LSYM(Lthumb1_div_negative4):
>>>>>> +       DoDiv   #3
>>>>>> +       DoDiv   #2
>>>>>> +       bcs     LSYM(Lthumb1_div_negative_loop)
>>>>>> +       DoDiv   #1
>>>>>> +       sub     divisor, dividend, divisor
>>>>>> +       bcs     1f
>>>>>> +       cpy     divisor, dividend
>>>>>> +
>>>>>> +1:     cpy     curbit, ip
>>>>>> +       adc     result, result
>>>>>> +       asr     curbit, curbit, #1
>>>>>> +       cpy     dividend, result
>>>>>> +       bcc     2f
>>>>>> +       neg     dividend, dividend
>>>>>> +       cmp     curbit, #0
>>>>>> +
>>>>>> +2:     bpl     3f
>>>>>> +       neg     divisor, divisor
>>>>>> +
>>>>>> +3:     RET
>>>>>> +
>>>>>> +LSYM(Ldivbyzero_negative):
>>>>>> +       cpy     curbit, ip
>>>>>> +       asr     curbit, curbit, #1
>>>>>> +       bcc     LSYM(Ldiv0)
>>>>>> +       neg     dividend, dividend
>>>>>> +.endm
>>>>>> +#endif /* ARM Thumb version.  */
>>>>>> +
>>>>>>   /*
>>>>>> ------------------------------------------------------------------
>>>>>> -- 
>>>>>> -- 
>>>>>> -- 
>>>>>> */
>>>>>>   /*             Start of the Real Functions
>>>>>> */
>>>>>>   /*
>>>>>> ------------------------------------------------------------------
>>>>>> -- 
>>>>>> -- 
>>>>>> -- 
>>>>>> */
>>>>>> @@ -937,6 +1066,7 @@ LSYM(Lgot_result):
>>>>>>
>>>>>>          FUNC_START udivsi3
>>>>>>          FUNC_ALIAS aeabi_uidiv udivsi3
>>>>>> +#if defined(__OPTIMIZE_SIZE__)
>>>>>>
>>>>>>          cmp     divisor, #0
>>>>>>          beq     LSYM(Ldiv0)
>>>>>> @@ -954,6 +1084,13 @@ LSYM(udivsi3_skip_div0_test):
>>>>>>          pop     { work }
>>>>>>          RET
>>>>>>
>>>>>> +#else
>>>>>> +
>>>>>> +LSYM(udivsi3_skip_div0_test):
>>>>>> +       THUMB1_Div_Positive
>>>>>> +
>>>>>> +#endif
>>>>>> +
>>>>>>   #elif defined(__ARM_ARCH_EXT_IDIV__)
>>>>>>
>>>>>>          ARM_FUNC_START udivsi3
>>>>>> @@ -1066,7 +1203,7 @@ LSYM(Lover10):
>>>>>>          RET
>>>>>>
>>>>>>   #else  /* ARM version.  */
>>>>>> -
>>>>>> +
>>>>>>          FUNC_START umodsi3
>>>>>>
>>>>>>          subs    r2, r1, #1                      @ compare divisor
>>>>>> with 1
>>>>>> @@ -1091,8 +1228,9 @@ LSYM(Lover10):
>>>>>>
>>>>>>   #if defined(__prefer_thumb__)
>>>>>>
>>>>>> -       FUNC_START divsi3
>>>>>> +       FUNC_START divsi3
>>>>>>          FUNC_ALIAS aeabi_idiv divsi3
>>>>>> +#if defined(__OPTIMIZE_SIZE__)
>>>>>>
>>>>>>          cmp     divisor, #0
>>>>>>          beq     LSYM(Ldiv0)
>>>>>> @@ -1115,7 +1253,7 @@ LSYM(Lover11):
>>>>>>          blo     LSYM(Lgot_result)
>>>>>>
>>>>>>          THUMB_DIV_MOD_BODY 0
>>>>>> -
>>>>>> +
>>>>>>          mov     r0, result
>>>>>>          mov     work, ip
>>>>>>          cmp     work, #0
>>>>>> @@ -1124,6 +1262,20 @@ LSYM(Lover11):
>>>>>>   LSYM(Lover12):
>>>>>>          pop     { work }
>>>>>>          RET
>>>>>> +#else
>>>>>> +
>>>>>> +LSYM(divsi3_skip_div0_test):
>>>>>> +       cpy     curbit, dividend
>>>>>> +       orr     curbit, divisor
>>>>>> +       bmi     LSYM(Lthumb1_div_negative)
>>>>>> +
>>>>>> +LSYM(Lthumb1_div_positive):
>>>>>> +       THUMB1_Div_Positive
>>>>>> +
>>>>>> +LSYM(Lthumb1_div_negative):
>>>>>> +       THUMB1_Div_Negative
>>>>>> +
>>>>>> +#endif /* __OPTIMIZE_SIZE__ */
>>>>>>
>>>>>>   #elif defined(__ARM_ARCH_EXT_IDIV__)
>>>>>>
>>>>>> @@ -1136,8 +1288,8 @@ LSYM(Lover12):
>>>>>>          RET
>>>>>>
>>>>>>   #else /* ARM/Thumb-2 version.  */
>>>>>> -
>>>>>> -       ARM_FUNC_START divsi3
>>>>>> +
>>>>>> +       ARM_FUNC_START divsi3
>>>>>>          ARM_FUNC_ALIAS aeabi_idiv divsi3
>>>>>>
>>>>>>          cmp     r1, #0
>>>>>> ===============================================
>>>>
>>>>
>>>>
>>>>
>>
>>
>>
> 

Reply via email to