This feature contains the LDCLRP, LDSETP, and SWPP instructions. Signed-off-by: Richard Henderson <richard.hender...@linaro.org> --- target/arm/cpu-features.h | 5 ++++ target/arm/tcg/translate-a64.c | 49 ++++++++++++++++++++++++++++++++++ target/arm/tcg/a64.decode | 7 +++++ 3 files changed, 61 insertions(+)
diff --git a/target/arm/cpu-features.h b/target/arm/cpu-features.h index e3d4c3d382..182b301c86 100644 --- a/target/arm/cpu-features.h +++ b/target/arm/cpu-features.h @@ -411,6 +411,11 @@ static inline bool isar_feature_aa64_lse(const ARMISARegisters *id) return FIELD_EX64_IDREG(id, ID_AA64ISAR0, ATOMIC) != 0; } +static inline bool isar_feature_aa64_lse128(const ARMISARegisters *id) +{ + return FIELD_EX64_IDREG(id, ID_AA64ISAR0, ATOMIC) >= 3; +} + static inline bool isar_feature_aa64_rdm(const ARMISARegisters *id) { return FIELD_EX64_IDREG(id, ID_AA64ISAR0, RDM) != 0; diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c index d0639e29cf..976bf4df32 100644 --- a/target/arm/tcg/translate-a64.c +++ b/target/arm/tcg/translate-a64.c @@ -3753,6 +3753,55 @@ TRANS_FEAT(LDUMAX, aa64_lse, do_atomic_ld, a, tcg_gen_atomic_fetch_umax_i64, 0, TRANS_FEAT(LDUMIN, aa64_lse, do_atomic_ld, a, tcg_gen_atomic_fetch_umin_i64, 0, false) TRANS_FEAT(SWP, aa64_lse, do_atomic_ld, a, tcg_gen_atomic_xchg_i64, 0, false) +typedef void Atomic128ThreeOpFn(TCGv_i128, TCGv_i64, TCGv_i128, TCGArg, MemOp); + +static bool do_atomic128_ld(DisasContext *s, arg_atomic128 *a, + Atomic128ThreeOpFn *fn, bool invert) +{ + MemOp mop; + int rlo, rhi; + TCGv_i64 clean_addr, tlo, thi; + TCGv_i128 t16; + + if (a->rt == 31 || a->rt2 == 31 || a->rt == a->rt2) { + return false; + } + if (a->rn == 31) { + gen_check_sp_alignment(s); + } + mop = check_atomic_align(s, a->rn, MO_128); + clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false, + a->rn != 31, mop); + + rlo = (s->be_data == MO_LE ? a->rt : a->rt2); + rhi = (s->be_data == MO_LE ? a->rt2 : a->rt); + + tlo = read_cpu_reg(s, rlo, true); + thi = read_cpu_reg(s, rhi, true); + if (invert) { + tcg_gen_not_i64(tlo, tlo); + tcg_gen_not_i64(thi, thi); + } + /* + * The tcg atomic primitives are all full barriers. Therefore we + * can ignore the Acquire and Release bits of this instruction. + */ + t16 = tcg_temp_new_i128(); + tcg_gen_concat_i64_i128(t16, tlo, thi); + + fn(t16, clean_addr, t16, get_mem_index(s), mop); + + tcg_gen_extr_i128_i64(cpu_reg(s, rlo), cpu_reg(s, rhi), t16); + return true; +} + +TRANS_FEAT(LDCLRP, aa64_lse128, do_atomic128_ld, + a, tcg_gen_atomic_fetch_and_i128, true) +TRANS_FEAT(LDSETP, aa64_lse128, do_atomic128_ld, + a, tcg_gen_atomic_fetch_or_i128, false) +TRANS_FEAT(SWPP, aa64_lse128, do_atomic128_ld, + a, tcg_gen_atomic_xchg_i128, false) + static bool trans_LDAPR(DisasContext *s, arg_LDAPR *a) { bool iss_sf = ldst_iss_sf(a->sz, false, false); diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode index 8c798cde2b..70ed9610af 100644 --- a/target/arm/tcg/a64.decode +++ b/target/arm/tcg/a64.decode @@ -536,6 +536,13 @@ SWP .. 111 0 00 . . 1 ..... 1000 00 ..... ..... @atomic LDAPR sz:2 111 0 00 1 0 1 11111 1100 00 rn:5 rt:5 +# Atomic 128-bit memory operations +&atomic128 rn rt rt2 a r +@atomic128 ........ a:1 r:1 . rt2:5 ...... rn:5 rt:5 &atomic128 +LDCLRP 00011001 . . 1 ..... 000100 ..... ..... @atomic128 +LDSETP 00011001 . . 1 ..... 001100 ..... ..... @atomic128 +SWPP 00011001 . . 1 ..... 100000 ..... ..... @atomic128 + # Load/store register (pointer authentication) # LDRA immediate is 10 bits signed and scaled, but the bits aren't all contiguous -- 2.43.0