This fixes a bug in that these two insns should have been using atomic 16-byte stores, since MTE is ARMv8.5 and LSE2 is mandatory from ARMv8.4.
Signed-off-by: Richard Henderson <richard.hender...@linaro.org> --- target/arm/translate-a64.c | 18 +++++++++++------- tests/tcg/aarch64/mte-7.c | 3 +-- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index edf92a728f..b42e5848cc 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -4207,16 +4207,20 @@ static void disas_ldst_tag(DisasContext *s, uint32_t insn) if (is_zero) { TCGv_i64 clean_addr = clean_data_tbi(s, addr); - TCGv_i64 tcg_zero = tcg_constant_i64(0); + TCGv_i64 zero64 = tcg_constant_i64(0); + TCGv_i128 zero128 = tcg_temp_new_i128(); int mem_index = get_mem_index(s); - int i, n = (1 + is_pair) << LOG2_TAG_GRANULE; + MemOp mop = MO_128 | MO_ALIGN; - tcg_gen_qemu_st_i64(tcg_zero, clean_addr, mem_index, - MO_UQ | MO_ALIGN_16); - for (i = 8; i < n; i += 8) { - tcg_gen_addi_i64(clean_addr, clean_addr, 8); - tcg_gen_qemu_st_i64(tcg_zero, clean_addr, mem_index, MO_UQ); + tcg_gen_concat_i64_i128(zero128, zero64, zero64); + + /* This is 1 or 2 atomic 16-byte operations. */ + tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop); + if (is_pair) { + tcg_gen_addi_i64(clean_addr, clean_addr, 16); + tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop); } + tcg_temp_free_i128(zero128); } if (index != 0) { diff --git a/tests/tcg/aarch64/mte-7.c b/tests/tcg/aarch64/mte-7.c index a981de62d4..04974f9ebb 100644 --- a/tests/tcg/aarch64/mte-7.c +++ b/tests/tcg/aarch64/mte-7.c @@ -19,8 +19,7 @@ int main(int ac, char **av) p = (void *)((unsigned long)p | (1ul << 56)); /* Store tag in sequential granules. */ - asm("stg %0, [%0]" : : "r"(p + 0x0ff0)); - asm("stg %0, [%0]" : : "r"(p + 0x1000)); + asm("stz2g %0, [%0]" : : "r"(p + 0x0ff0)); /* * Perform an unaligned store with tag 1 crossing the pages. -- 2.34.1