When st:ld is not required by the guest but ld:st is, we can put ld:ld+ld:st barriers after loads, and then st:st barriers before stores to enforce all required barriers.
The st:st barrier is often special cased by hosts, and that is expected to be more efficient than a full barrier. Signed-off-by: Richard Henderson <richard.hender...@linaro.org> --- Redha, I expect this to produce exactly the same barriers as you did with your 'fix guest memory ordering enforcement' patch. While this compiles, it does not fix the failures that I see occasionally with our private gitlab runner. The standalone version of this failure is export QTEST_QEMU_BINARY=./qemu-system-i386 for i in `seq 1 100`; do ./tests/qtest/ahci-test > /dev/null & done wait About 10 to 15% of the runs will fail with ERROR:../src/tests/qtest/ahci-test.c:92:verify_state: assertion failed (ahci_fingerprint == ahci->fingerprint): (0xe0000000 == 0x29228086) Note that this test never seems to fail unless the system is under load, thus starting 100 tests on my 80 core neoverse-n1 system. r~ --- tcg/tcg-op.c | 55 +++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 48 insertions(+), 7 deletions(-) diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 5d48537927..4c568a2592 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -2834,9 +2834,6 @@ static void gen_ldst_i64(TCGOpcode opc, TCGv_i64 val, TCGv addr, static void tcg_gen_req_mo(TCGBar type) { -#ifdef TCG_GUEST_DEFAULT_MO - type &= TCG_GUEST_DEFAULT_MO; -#endif type &= ~TCG_TARGET_DEFAULT_MO; if (type) { tcg_gen_mb(type | TCG_BAR_SC); @@ -2868,12 +2865,49 @@ static void plugin_gen_mem_callbacks(TCGv vaddr, MemOpIdx oi, #endif } +typedef enum { + BAR_LD_BEFORE, + BAR_LD_AFTER, + BAR_ST_BEFORE, +} ChooseBarrier; + +static TCGBar choose_barrier(ChooseBarrier which) +{ +#ifdef TCG_GUEST_DEFAULT_MO + const TCGBar guest_mo = TCG_GUEST_DEFAULT_MO; +#else + const TCGBar guest_mo = TCG_MO_ALL; +#endif + TCGBar ret[3]; + + if (guest_mo == 0) { + return 0; + } + /* + * Special case for i386 and s390x. Because store-load is not + * required by the guest, we can split the barriers such that we + * wind up with a store-store barrier, which is expected to be + * quicker on some hosts. + */ + if (guest_mo == (TCG_MO_ALL & ~TCG_MO_ST_LD)) { + ret[BAR_LD_BEFORE] = 0; + ret[BAR_LD_AFTER] = TCG_MO_LD_LD | TCG_MO_LD_ST; + ret[BAR_ST_BEFORE] = TCG_MO_ST_ST; + } else { + ret[BAR_LD_BEFORE] = (TCG_MO_LD_LD | TCG_MO_ST_LD) & guest_mo; + ret[BAR_ST_BEFORE] = (TCG_MO_LD_ST | TCG_MO_ST_ST) & guest_mo; + ret[BAR_LD_AFTER] = 0; + } + return ret[which]; +} + void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop) { MemOp orig_memop; MemOpIdx oi; - tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD); + tcg_gen_req_mo(choose_barrier(BAR_LD_BEFORE)); + memop = tcg_canonicalize_memop(memop, 0, 0); oi = make_memop_idx(memop, idx); @@ -2904,6 +2938,8 @@ void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop) g_assert_not_reached(); } } + + tcg_gen_req_mo(choose_barrier(BAR_LD_AFTER)); } void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop) @@ -2911,7 +2947,8 @@ void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop) TCGv_i32 swap = NULL; MemOpIdx oi; - tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST); + tcg_gen_req_mo(choose_barrier(BAR_ST_BEFORE)); + memop = tcg_canonicalize_memop(memop, 0, 1); oi = make_memop_idx(memop, idx); @@ -2959,7 +2996,8 @@ void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop) return; } - tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD); + tcg_gen_req_mo(choose_barrier(BAR_LD_BEFORE)); + memop = tcg_canonicalize_memop(memop, 1, 0); oi = make_memop_idx(memop, idx); @@ -2994,6 +3032,8 @@ void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop) g_assert_not_reached(); } } + + tcg_gen_req_mo(choose_barrier(BAR_LD_AFTER)); } void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop) @@ -3006,7 +3046,8 @@ void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop) return; } - tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST); + tcg_gen_req_mo(choose_barrier(BAR_ST_BEFORE)); + memop = tcg_canonicalize_memop(memop, 1, 1); oi = make_memop_idx(memop, idx); -- 2.25.1