Force the use of cmpxchg16b on x86_64. Wikipedia suggests that only very old AMD64 (circa 2004) did not have this instruction. Further, it's required by Windows 8 so no new cpus will ever omit it.
If we truely care about these, then we could check this at startup time and then avoid executing paths that use it. Signed-off-by: Richard Henderson <r...@twiddle.net> --- configure | 29 ++++++++++++- cputlb.c | 6 +++ include/qemu/int128.h | 6 +++ softmmu_template.h | 110 +++++++++++++++++++++++++++++++++++++------------- tcg/tcg.h | 22 ++++++++++ 5 files changed, 144 insertions(+), 29 deletions(-) diff --git a/configure b/configure index 59ea124..586abd6 100755 --- a/configure +++ b/configure @@ -1201,7 +1201,10 @@ case "$cpu" in cc_i386='$(CC) -m32' ;; x86_64) - CPU_CFLAGS="-m64" + # ??? Only extremely old AMD cpus do not have cmpxchg16b. + # If we truly care, we should simply detect this case at + # runtime and generate the fallback to serial emulation. + CPU_CFLAGS="-m64 -mcx16" LDFLAGS="-m64 $LDFLAGS" cc_i386='$(CC) -m32' ;; @@ -4434,6 +4437,26 @@ if compile_prog "" "" ; then int128=yes fi +######################################### +# See if 128-bit atomic operations are supported. + +atomic128=no +if test "$int128" = "yes"; then + cat > $TMPC << EOF +int main(void) +{ + unsigned __int128 x = 0, y = 0; + y = __atomic_load_16(&x, 0); + __atomic_store_16(&x, y, 0); + __atomic_compare_exchange_16(&x, &y, x, 0, 0, 0); + return 0; +} +EOF + if compile_prog "" "" ; then + atomic128=yes + fi +fi + ######################################## # check if getauxval is available. @@ -5383,6 +5406,10 @@ if test "$int128" = "yes" ; then echo "CONFIG_INT128=y" >> $config_host_mak fi +if test "$atomic128" = "yes" ; then + echo "CONFIG_ATOMIC128=y" >> $config_host_mak +fi + if test "$getauxval" = "yes" ; then echo "CONFIG_GETAUXVAL=y" >> $config_host_mak fi diff --git a/cputlb.c b/cputlb.c index 5272456..660f824 100644 --- a/cputlb.c +++ b/cputlb.c @@ -510,6 +510,12 @@ tb_page_addr_t get_page_addr_code(CPUArchState *env1, target_ulong addr) #define SHIFT 3 #include "softmmu_template.h" + +#ifdef CONFIG_ATOMIC128 +#define SHIFT 4 +#include "softmmu_template.h" +#endif + #undef MMUSUFFIX #define MMUSUFFIX _cmmu diff --git a/include/qemu/int128.h b/include/qemu/int128.h index ab67275..5819da4 100644 --- a/include/qemu/int128.h +++ b/include/qemu/int128.h @@ -2,6 +2,7 @@ #define INT128_H #ifdef CONFIG_INT128 +#include "qemu/bswap.h" typedef __int128 Int128; @@ -137,6 +138,11 @@ static inline void int128_subfrom(Int128 *a, Int128 b) *a -= b; } +static inline Int128 bswap128(Int128 a) +{ + return int128_make128(bswap64(int128_gethi(a)), bswap64(int128_getlo(a))); +} + #else /* !CONFIG_INT128 */ /* Here we are catering to the ABI of the host. If the host returns diff --git a/softmmu_template.h b/softmmu_template.h index 76712b9..0a9f49b 100644 --- a/softmmu_template.h +++ b/softmmu_template.h @@ -27,25 +27,30 @@ #define DATA_SIZE (1 << SHIFT) -#if DATA_SIZE == 8 -#define SUFFIX q -#define LSUFFIX q -#define SDATA_TYPE int64_t +#if DATA_SIZE == 16 +#define SUFFIX o +#define LSUFFIX o +#define SDATA_TYPE Int128 +#define DATA_TYPE Int128 +#elif DATA_SIZE == 8 +#define SUFFIX q +#define LSUFFIX q +#define SDATA_TYPE int64_t #define DATA_TYPE uint64_t #elif DATA_SIZE == 4 -#define SUFFIX l -#define LSUFFIX l -#define SDATA_TYPE int32_t +#define SUFFIX l +#define LSUFFIX l +#define SDATA_TYPE int32_t #define DATA_TYPE uint32_t #elif DATA_SIZE == 2 -#define SUFFIX w -#define LSUFFIX uw -#define SDATA_TYPE int16_t +#define SUFFIX w +#define LSUFFIX uw +#define SDATA_TYPE int16_t #define DATA_TYPE uint16_t #elif DATA_SIZE == 1 -#define SUFFIX b -#define LSUFFIX ub -#define SDATA_TYPE int8_t +#define SUFFIX b +#define LSUFFIX ub +#define SDATA_TYPE int8_t #define DATA_TYPE uint8_t #else #error unsupported data size @@ -56,7 +61,7 @@ to the register size of the host. This is tcg_target_long, except in the case of a 32-bit host and 64-bit data, and for that we always have uint64_t. Don't bother with this widened value for SOFTMMU_CODE_ACCESS. */ -#if defined(SOFTMMU_CODE_ACCESS) || DATA_SIZE == 8 +#if defined(SOFTMMU_CODE_ACCESS) || DATA_SIZE >= 8 # define WORD_TYPE DATA_TYPE # define USUFFIX SUFFIX #else @@ -73,7 +78,9 @@ #define ADDR_READ addr_read #endif -#if DATA_SIZE == 8 +#if DATA_SIZE == 16 +# define BSWAP(X) bswap128(X) +#elif DATA_SIZE == 8 # define BSWAP(X) bswap64(X) #elif DATA_SIZE == 4 # define BSWAP(X) bswap32(X) @@ -140,6 +147,7 @@ vidx >= 0; \ }) +#if DATA_SIZE < 16 #ifndef SOFTMMU_CODE_ACCESS static inline DATA_TYPE glue(io_read, SUFFIX)(CPUArchState *env, CPUIOTLBEntry *iotlbentry, @@ -307,9 +315,10 @@ WORD_TYPE helper_be_ld_name(CPUArchState *env, target_ulong addr, return res; } #endif /* DATA_SIZE > 1 */ +#endif /* DATA_SIZE < 16 */ #ifndef SOFTMMU_CODE_ACCESS - +#if DATA_SIZE < 16 /* Provide signed versions of the load routines as well. We can of course avoid this for 64-bit data, or for 32-bit data on 32-bit host. */ #if DATA_SIZE * 8 < TCG_TARGET_REG_BITS @@ -507,6 +516,7 @@ void probe_write(CPUArchState *env, target_ulong addr, int mmu_idx, } } #endif +#endif /* DATA_SIZE < 16 */ #if DATA_SIZE == 1 # define HE_SUFFIX _mmu @@ -573,9 +583,30 @@ DATA_TYPE glue(glue(helper_atomic_cmpxchg, SUFFIX), HE_SUFFIX) TCGMemOpIdx oi, uintptr_t retaddr) { ATOMIC_MMU_BODY; +#if DATA_SIZE < 16 return atomic_cmpxchg(haddr, cmpv, newv); +#else + __atomic_compare_exchange(haddr, &cmpv, &newv, false, + __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); + return cmpv; +#endif } +#if DATA_SIZE > 1 +DATA_TYPE glue(glue(helper_atomic_cmpxchg, SUFFIX), RE_SUFFIX) + (CPUArchState *env, target_ulong addr, DATA_TYPE cmpv, DATA_TYPE newv, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + DATA_TYPE retv; + cmpv = BSWAP(cmpv); + newv = BSWAP(newv); + retv = (glue(glue(helper_atomic_cmpxchg, SUFFIX), HE_SUFFIX) + (env, addr, cmpv, newv, oi, retaddr)); + return BSWAP(retv); +} +#endif + +#if DATA_SIZE < 16 #define GEN_ATOMIC_HELPER(NAME) \ DATA_TYPE glue(glue(glue(helper_atomic_, NAME), SUFFIX), HE_SUFFIX) \ (CPUArchState *env, target_ulong addr, DATA_TYPE val, \ @@ -600,18 +631,6 @@ GEN_ATOMIC_HELPER(xchg) #undef GEN_ATOMIC_HELPER #if DATA_SIZE > 1 -DATA_TYPE glue(glue(helper_atomic_cmpxchg, SUFFIX), RE_SUFFIX) - (CPUArchState *env, target_ulong addr, DATA_TYPE cmpv, DATA_TYPE newv, - TCGMemOpIdx oi, uintptr_t retaddr) -{ - DATA_TYPE retv; - cmpv = BSWAP(cmpv); - newv = BSWAP(newv); - retv = (glue(glue(helper_atomic_cmpxchg, SUFFIX), HE_SUFFIX) - (env, addr, cmpv, newv, oi, retaddr)); - return BSWAP(retv); -} - #define GEN_ATOMIC_HELPER(NAME) \ DATA_TYPE glue(glue(glue(helper_atomic_, NAME), SUFFIX), RE_SUFFIX) \ (CPUArchState *env, target_ulong addr, DATA_TYPE val, \ @@ -676,6 +695,41 @@ DATA_TYPE glue(glue(helper_atomic_add_fetch, SUFFIX), RE_SUFFIX) } } #endif /* DATA_SIZE > 1 */ +#else /* DATA_SIZE >= 16 */ +DATA_TYPE glue(glue(helper_atomic_ld, SUFFIX), HE_SUFFIX) + (CPUArchState *env, target_ulong addr, TCGMemOpIdx oi, uintptr_t retaddr) +{ + DATA_TYPE res; + ATOMIC_MMU_BODY; + __atomic_load(haddr, &res, __ATOMIC_RELAXED); + return res; +} + +DATA_TYPE glue(glue(helper_atomic_ld, SUFFIX), RE_SUFFIX) + (CPUArchState *env, target_ulong addr, TCGMemOpIdx oi, uintptr_t retaddr) +{ + DATA_TYPE res; + res = (glue(glue(helper_atomic_ld, SUFFIX), HE_SUFFIX) + (env, addr, oi, retaddr)); + return BSWAP(res); +} + +void glue(glue(helper_atomic_st, SUFFIX), HE_SUFFIX) + (CPUArchState *env, target_ulong addr, DATA_TYPE val, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + ATOMIC_MMU_BODY; + __atomic_store(haddr, &val, __ATOMIC_RELAXED); +} + +void glue(glue(helper_atomic_st, SUFFIX), RE_SUFFIX) + (CPUArchState *env, target_ulong addr, DATA_TYPE val, + TCGMemOpIdx oi, uintptr_t retaddr) +{ + (glue(glue(helper_atomic_st, SUFFIX), HE_SUFFIX) + (env, addr, BSWAP(val), oi, retaddr)); +} +#endif /* DATA_SIZE < 16 */ #undef ATOMIC_MMU_BODY diff --git a/tcg/tcg.h b/tcg/tcg.h index 4e60498..1304a42 100644 --- a/tcg/tcg.h +++ b/tcg/tcg.h @@ -1216,6 +1216,28 @@ GEN_ATOMIC_HELPER_ALL(xchg) #undef GEN_ATOMIC_HELPER_ALL #undef GEN_ATOMIC_HELPER +#ifdef CONFIG_ATOMIC128 +#include "qemu/int128.h" + +/* These aren't really a "proper" helpers because TCG cannot manage Int128. + However, use the same format as the others, for use by the backends. */ +Int128 helper_atomic_cmpxchgo_le_mmu(CPUArchState *env, target_ulong addr, + Int128 cmpv, Int128 newv, + TCGMemOpIdx oi, uintptr_t retaddr); +Int128 helper_atomic_cmpxchgo_be_mmu(CPUArchState *env, target_ulong addr, + Int128 cmpv, Int128 newv, + TCGMemOpIdx oi, uintptr_t retaddr); + +Int128 helper_atomic_ldo_le_mmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr); +Int128 helper_atomic_ldo_be_mmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr); +void helper_atomic_sto_le_mmu(CPUArchState *env, target_ulong addr, Int128 val, + TCGMemOpIdx oi, uintptr_t retaddr); +void helper_atomic_sto_be_mmu(CPUArchState *env, target_ulong addr, Int128 val, + TCGMemOpIdx oi, uintptr_t retaddr); + +#endif /* CONFIG_ATOMIC128 */ #endif /* CONFIG_SOFTMMU */ #endif /* TCG_H */ -- 2.5.5