https://gcc.gnu.org/bugzilla/show_bug.cgi?id=116415
--- Comment #4 from Peter Bergner <bergner at gcc dot gnu.org> --- Here's a C testcase that shows the same problem: bergner@ltcden2-lp1:BUG$ cat bug.c #include <stdio.h> #include <stdint.h> typedef union { struct { uint64_t a; uint64_t b; } t; __uint128_t raw_data; } Value; Value value; static inline void foo (const uint64_t delta1, const uint64_t delta2) { Value cur; cur.raw_data = value.raw_data; for (;;) { Value next; next.t.a = cur.t.a+delta1; next.t.b = cur.t.b+delta2; if (__atomic_compare_exchange(&value.raw_data, &cur.raw_data, &next.raw_data, 0, __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE)) break; } } int main (void) { foo (1, 2); printf ("%lu %lu\n", value.t.a, value.t.b); return 0; } bergner@ltcden2-lp1:BUG$ gcc -O2 -mcpu=power8 -mno-optimize-swaps bug.c bergner@ltcden2-lp1:BUG$ ./a.out 1 2 bergner@ltcden2-lp1:BUG$ gcc -O2 -mcpu=power8 -moptimize-swaps bug.c bergner@ltcden2-lp1:BUG$ ./a.out 2 1