On 05/17/2016 03:12 PM, Richard Henderson wrote: > On 05/17/2016 01:01 PM, Sergey Fedorov wrote: >>> Sorry, I can't see reading ARMv6 ARM that 1-byte access can't be atomic. >>> What >>> I've found: >>> >>> B2.4.1 Normal memory attribute >>> (snip) >>> Shared Normal memory >>> >>> (snip) >>> ... Reads to Shared Normal Memory that are aligned in memory to the >>> size of the access must be atomic. > ... >> Looks like GCC has no trouble generating __atomic_store_n() for 1-byte >> bool... > > Not loads and stores, but other atomic ops like xchg. The native atomic > operations are all 4 bytes long. > > I suppose the compiler may well be able to synthesize sub-word atomic ops, but > it'll be 2 or 3 times the size of a word-sized atomic op, and for no good > reason.
Indeed, even with gcc 7 branch, struct foo { _Bool b; int i; } f; void a() { __atomic_exchange_n(&f.b, 1, __ATOMIC_ACQUIRE); __atomic_exchange_n(&f.i, 1, __ATOMIC_ACQUIRE); } void b() { __sync_lock_test_and_set(&f.b, 1); __sync_lock_test_and_set(&f.i, 1); } $ ./gcc/xgcc -B./gcc/ -O2 -S ~/z.c -march=armv6 $ cat z.s a: @ args = 0, pretend = 0, frame = 0 @ frame_needed = 0, uses_anonymous_args = 0 push {r4, r5, r6, lr} mov r6, #1 ldr r5, .L8 ldrb r3, [r5] @ zero_extendqisi2 .L2: mov r2, r6 sxtb r1, r3 mov r0, r5 mov r4, r3 bl __sync_val_compare_and_swap_1 uxtb r4, r4 uxtb r2, r0 cmp r2, r4 mov r3, r0 bne .L2 ldr r3, .L8+4 mov r2, #1 .L5: ldrex r1, [r3] strex r0, r2, [r3] cmp r0, #0 bne .L5 mcr p15, 0, r0, c7, c10, 5 pop {r4, r5, r6, pc} ... b: @ args = 0, pretend = 0, frame = 0 @ frame_needed = 0, uses_anonymous_args = 0 push {r4, lr} mov r1, #1 ldr r4, .L13 mov r0, r4 bl __sync_lock_test_and_set_1 add r4, r4, #4 mov r3, #1 .L11: ldrex r2, [r4] strex r1, r3, [r4] cmp r1, #0 bne .L11 mcr p15, 0, r0, c7, c10, 5 pop {r4, pc} r~