On Fri, Jul 01, 2016 at 10:04:50 -0700, Richard Henderson wrote: (snip) > [rth: Rearrange 128-bit cmpxchg helper. Enforce alignment on LL.] > > Signed-off-by: Emilio G. Cota <c...@braap.org> > Message-Id: <1467054136-10430-28-git-send-email-c...@braap.org> > Signed-off-by: Richard Henderson <r...@twiddle.net> > --- > target-arm/helper-a64.c | 135 > +++++++++++++++++++++++++++++++++++++++++++++ > target-arm/helper-a64.h | 2 + > target-arm/translate-a64.c | 106 ++++++++++++++++------------------- > 3 files changed, 185 insertions(+), 58 deletions(-) > > diff --git a/target-arm/helper-a64.c b/target-arm/helper-a64.c > index 41e48a4..d98d781 100644 > --- a/target-arm/helper-a64.c > +++ b/target-arm/helper-a64.c > @@ -27,6 +27,10 @@ > #include "qemu/bitops.h" > #include "internals.h" > #include "qemu/crc32c.h" > +#include "exec/exec-all.h" > +#include "exec/cpu_ldst.h" > +#include "qemu/int128.h" > +#include "tcg.h" > #include <zlib.h> /* For crc32 */ > > /* C2.4.7 Multiply and divide */ > @@ -444,3 +448,134 @@ uint64_t HELPER(crc32c_64)(uint64_t acc, uint64_t val, > uint32_t bytes) > /* Linux crc32c converts the output to one's complement. */ > return crc32c(acc, buf, bytes) ^ 0xffffffff; > } > + > +/* Returns 0 on success; 1 otherwise. */ > +uint64_t HELPER(paired_cmpxchg64_le)(CPUARMState *env, uint64_t addr, > + uint64_t new_lo, uint64_t new_hi) > +{ > +#ifndef CONFIG_USER_ONLY > + uintptr_t ra = GETPC(); > +#endif
This ifdef breaks the compilation for user-mode, where we need the retaddr when calling cpu_loop_exit_atomic. A possible fix would be: -#ifndef CONFIG_USER_ONLY +#if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_ATOMIC128) > + Int128 oldv, cmpv, newv; > + bool success; > + > + cmpv = int128_make128(env->exclusive_val, env->exclusive_high); > + newv = int128_make128(new_lo, new_hi); > + > + if (parallel_cpus) { > +#ifndef CONFIG_ATOMIC128 > + cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); > +#elif defined(CONFIG_USER_ONLY) > + Int128 *haddr = g2h(addr); > +#ifdef HOST_WORDS_BIGENDIAN > + cmpv = bswap128(cmpv); > + newv = bswap128(newv); > +#endif > + success = __atomic_compare_exchange_16(haddr, &cmpv, newv, false, > + __ATOMIC_SEQ_CST, > + __ATOMIC_SEQ_CST); > +#else > + int mem_idx = cpu_mmu_index(env, false); > + TCGMemOpIdx oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); > + oldv = helper_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv, oi, ra); > + success = int128_eq(oldv, cmpv); > +#endif > + } else { > + uint64_t o0, o1; > + > +#ifdef CONFIG_USER_ONLY > + /* ??? Enforce alignment. */ > + uint64_t *haddr = g2h(addr); > + o0 = ldq_le_p(haddr + 0); > + o1 = ldq_le_p(haddr + 1); > + oldv = int128_make128(o0, o1); > + > + success = int128_eq(oldv, cmpv); > + if (success) { > + stq_le_p(haddr + 0, int128_getlo(newv)); > + stq_le_p(haddr + 8, int128_gethi(newv)); haddr+1, as in ldq_le_p? > + } > +#else > + int mem_idx = cpu_mmu_index(env, false); > + TCGMemOpIdx oi0 = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); > + TCGMemOpIdx oi1 = make_memop_idx(MO_LEQ, mem_idx); > + > + o0 = helper_le_ldq_mmu(env, addr + 0, oi0, ra); > + o1 = helper_le_ldq_mmu(env, addr + 8, oi1, ra); > + oldv = int128_make128(o0, o1); > + > + success = int128_eq(oldv, cmpv); > + if (success) { > + helper_le_stq_mmu(env, addr + 0, int128_getlo(newv), oi1, ra); > + helper_le_stq_mmu(env, addr + 8, int128_gethi(newv), oi1, ra); > + } > +#endif > + } > + > + return !success; > +} > + > +uint64_t HELPER(paired_cmpxchg64_be)(CPUARMState *env, uint64_t addr, > + uint64_t new_lo, uint64_t new_hi) > +{ > +#ifndef CONFIG_USER_ONLY > + uintptr_t ra = GETPC(); > +#endif Ditto. > + Int128 oldv, cmpv, newv; > + bool success; > + > + cmpv = int128_make128(env->exclusive_val, env->exclusive_high); > + newv = int128_make128(new_lo, new_hi); > + > + if (parallel_cpus) { > +#ifndef CONFIG_ATOMIC128 > + cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); > +#elif defined(CONFIG_USER_ONLY) > + Int128 *haddr = g2h(addr); > +#ifndef HOST_WORDS_BIGENDIAN > + cmpv = bswap128(cmpv); > + newv = bswap128(newv); > +#endif > + success = __atomic_compare_exchange_16(haddr, &cmpv, newv, false, > + __ATOMIC_SEQ_CST, > + __ATOMIC_SEQ_CST); > +#else > + int mem_idx = cpu_mmu_index(env, false); > + TCGMemOpIdx oi = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx); > + oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra); > + success = int128_eq(oldv, cmpv); > +#endif > + } else { > + uint64_t o0, o1; > + > +#ifdef CONFIG_USER_ONLY > + /* ??? Enforce alignment. */ > + uint64_t *haddr = g2h(addr); > + o1 = ldq_be_p(haddr + 0); > + o0 = ldq_be_p(haddr + 1); > + oldv = int128_make128(o0, o1); > + > + success = int128_eq(oldv, cmpv); > + if (success) { > + stq_be_p(haddr + 0, int128_gethi(newv)); > + stq_be_p(haddr + 8, int128_getlo(newv)); Ditto. BTW I tested ck's ck_pr tests for x86_64-linux-user and aarch64-linux-user on an aarch64 host, with parallel_cpus={0,1}. It works, but note that ck doesn't generate paired ops, so those remain untested. Thanks, E.