Hi Vladimir,

both patches applied and pushed,

best regards
 Waldemar

Vladimir Murzin wrote,

> O0 build result in the following codegen
> 
> 00000000 <ldrex>:
>    0: b480            push    {r7}
>    2: b085            sub     sp, #20
>    4: af00            add     r7, sp, #0
>    6: 6078            str     r0, [r7, #4]
>    8: 687b            ldr     r3, [r7, #4]
>    a: e853 3f00       ldrex   r3, [r3]
>    e: 60fb            str     r3, [r7, #12]
>   10: 68fb            ldr     r3, [r7, #12]
>   12: 4618            mov     r0, r3
>   14: 3714            adds    r7, #20
>   16: 46bd            mov     sp, r7
>   18: f85d 7b04       ldr.w   r7, [sp], #4
>   1c: 4770            bx      lr
> 
> 0000001e <strex>:
>   1e: b480            push    {r7}
>   20: b085            sub     sp, #20
>   22: af00            add     r7, sp, #0
>   24: 6078            str     r0, [r7, #4]
>   26: 6039            str     r1, [r7, #0]
>   28: 687b            ldr     r3, [r7, #4]
>   2a: 683a            ldr     r2, [r7, #0]
>   2c: e842 3300       strex   r3, r3, [r2]
>   30: 60fb            str     r3, [r7, #12]
>   32: 68fb            ldr     r3, [r7, #12]
>   34: 4618            mov     r0, r3
>   36: 3714            adds    r7, #20
>   38: 46bd            mov     sp, r7
>   3a: f85d 7b04       ldr.w   r7, [sp], #4
>   3e: 4770            bx      lr
> 
> 00000040 <testandset>:
>   40: b590            push    {r4, r7, lr}
>   42: b083            sub     sp, #12
>   44: af00            add     r7, sp, #0
>   46: 6078            str     r0, [r7, #4]
>   48: 6878            ldr     r0, [r7, #4]
>   4a: f7ff fffe       bl      0 <ldrex>
>   4e: 4603            mov     r3, r0
>   50: 461c            mov     r4, r3
>   52: 6879            ldr     r1, [r7, #4]
>   54: 2001            movs    r0, #1
>   56: f7ff fffe       bl      1e <strex>
>   5a: 4603            mov     r3, r0
>   5c: 2b00            cmp     r3, #0
>   5e: d1f3            bne.n   48 <testandset+0x8>
>   60: 4623            mov     r3, r4
>   62: 4618            mov     r0, r3
>   64: 370c            adds    r7, #12
>   66: 46bd            mov     sp, r7
>   68: bd90            pop     {r4, r7, pc}
> 
> ARM ARM suggests that LoadExcl/StoreExcl loops are guaranteed to make
> forward progress only if, for any LoadExcl/StoreExcl loop within a
> single thread of execution, the software meets all of the following
> conditions:
> 
> 1 Between the Load-Exclusive and the Store-Exclusive, there are no
>   explicit memory accesses, preloads, direct or indirect System
>   register writes, address translation instructions, cache or TLB
>   maintenance instructions, exception generating instructions,
>   exception returns, or indirect branches.
> 
> ...
> 
> Obviously condition is not met for O0 builds.
> 
> O2 build (which is highly likely the most common setting) able to do
> the right thing resulting in
> 
> 00000000 <ldrex>:
>    0: e850 0f00       ldrex   r0, [r0]
>    4: 4770            bx      lr
>    6: bf00            nop
> 
> 00000008 <strex>:
>    8: e841 0000       strex   r0, r0, [r1]
>    c: 4770            bx      lr
>    e: bf00            nop
> 
> 00000010 <testandset>:
>   10: 2101            movs    r1, #1
>   12: 4603            mov     r3, r0
>   14: e853 0f00       ldrex   r0, [r3]
>   18: e843 1200       strex   r2, r1, [r3]
>   1c: 2a00            cmp     r2, #0
>   1e: d1f9            bne.n   14 <testandset+0x4>
>   20: 4770            bx      lr
>   22: bf00            nop
> 
> Rather than depending on level of optimisation implement whole
> ldrex/strex loop in inline assembly.
> 
> Signed-off-by: Vladimir Murzin <vladimir.mur...@arm.com>
> ---
>  .../linuxthreads/sysdeps/arm/pt-machine.h     | 34 +++++--------------
>  1 file changed, 9 insertions(+), 25 deletions(-)
> 
> diff --git a/libpthread/linuxthreads/sysdeps/arm/pt-machine.h 
> b/libpthread/linuxthreads/sysdeps/arm/pt-machine.h
> index fc17e9bc7..b00b10495 100644
> --- a/libpthread/linuxthreads/sysdeps/arm/pt-machine.h
> +++ b/libpthread/linuxthreads/sysdeps/arm/pt-machine.h
> @@ -29,35 +29,19 @@
>  #endif
>  
>  #if defined(__thumb2__)
> -PT_EI long int ldrex(int *spinlock)
> -{
> -     long int ret;
> -     __asm__ __volatile__(
> -             "ldrex %0, [%1]\n"
> -             : "=r"(ret)
> -             : "r"(spinlock) : "memory");
> -     return ret;
> -}
> -
> -PT_EI long int strex(int val, int *spinlock)
> -{
> -     long int ret;
> -     __asm__ __volatile__(
> -             "strex %0, %1, [%2]\n"
> -             : "=r"(ret)
> -             : "r" (val), "r"(spinlock) : "memory");
> -     return ret;
> -}
> -
>  /* Spinlock implementation; required.  */
>  PT_EI long int
>  testandset (int *spinlock)
>  {
> -  register unsigned int ret;
> -
> -  do {
> -       ret = ldrex(spinlock);
> -  } while (strex(1, spinlock));
> +  unsigned int ret, tmp, val = 1;
> +
> +  __asm__ __volatile__ (
> +"0:    ldrex   %0, [%2]       \n"
> +"      strex   %1, %3, [%2]   \n"
> +"      cmp     %1, #0         \n"
> +"      bne     0b"
> +       : "=&r" (ret), "=&r" (tmp)
> +       : "r" (spinlock), "r" (val) : "memory", "cc");
>  
>    return ret;
>  }
> -- 
> 2.25.1
> 
_______________________________________________
devel mailing list -- devel@uclibc-ng.org
To unsubscribe send an email to devel-le...@uclibc-ng.org

Reply via email to