Signed-off-by: Richard Henderson <richard.hender...@linaro.org> --- target/arm/tcg/helper-sve.h | 32 +++++++++ target/arm/tcg/sve_helper.c | 8 +++ target/arm/tcg/translate-sve.c | 126 ++++++++++++++++++++++++--------- target/arm/tcg/sve.decode | 31 ++++++++ 4 files changed, 162 insertions(+), 35 deletions(-)
diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h index 1999c4bb1f..ade76ff664 100644 --- a/target/arm/tcg/helper-sve.h +++ b/target/arm/tcg/helper-sve.h @@ -1658,6 +1658,14 @@ DEF_HELPER_FLAGS_4(sve_ld2dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ld3dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ld4dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld2qq_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld3qq_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld4qq_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ld2qq_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld3qq_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld4qq_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + DEF_HELPER_FLAGS_4(sve_ld1bhu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ld1bsu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ld1bdu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) @@ -1722,6 +1730,14 @@ DEF_HELPER_FLAGS_4(sve_ld2dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ld3dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ld4dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld2qq_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld3qq_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld4qq_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ld2qq_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld3qq_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld4qq_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + DEF_HELPER_FLAGS_4(sve_ld1bhu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ld1bsu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ld1bdu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) @@ -1946,6 +1962,14 @@ DEF_HELPER_FLAGS_4(sve_st2dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_st3dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_st4dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st2qq_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st3qq_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st4qq_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_st2qq_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st3qq_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st4qq_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + DEF_HELPER_FLAGS_4(sve_st1bh_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_st1bs_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_st1bd_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) @@ -1998,6 +2022,14 @@ DEF_HELPER_FLAGS_4(sve_st2dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_st3dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_st4dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st2qq_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st3qq_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st4qq_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_st2qq_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st3qq_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st4qq_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + DEF_HELPER_FLAGS_4(sve_st1bh_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_st1bs_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_st1bd_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c index 4bccc5b49c..65183692bc 100644 --- a/target/arm/tcg/sve_helper.c +++ b/target/arm/tcg/sve_helper.c @@ -6744,6 +6744,10 @@ DO_LDN_2(2, dd, MO_64) DO_LDN_2(3, dd, MO_64) DO_LDN_2(4, dd, MO_64) +DO_LDN_2(2, qq, MO_128) +DO_LDN_2(3, qq, MO_128) +DO_LDN_2(4, qq, MO_128) + #undef DO_LDN_1 #undef DO_LDN_2 @@ -7310,6 +7314,10 @@ DO_STN_2(4, dd, MO_64, MO_64) DO_STN_2(1, sq, MO_128, MO_32) DO_STN_2(1, dq, MO_128, MO_64) +DO_STN_2(2, qq, MO_128, MO_128) +DO_STN_2(3, qq, MO_128, MO_128) +DO_STN_2(4, qq, MO_128, MO_128) + #undef DO_STN_1 #undef DO_STN_2 diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c index 1d045625dc..fe4bf6c57c 100644 --- a/target/arm/tcg/translate-sve.c +++ b/target/arm/tcg/translate-sve.c @@ -89,7 +89,7 @@ static inline int expand_imm_sh8u(DisasContext *s, int x) */ static inline int msz_dtype(DisasContext *s, int msz) { - static const uint8_t dtype[4] = { 0, 5, 10, 15 }; + static const uint8_t dtype[5] = { 0, 5, 10, 15, 18 }; return dtype[msz]; } @@ -4775,23 +4775,25 @@ static bool trans_STR_pri(DisasContext *s, arg_rri *a) */ /* The memory mode of the dtype. */ -static const MemOp dtype_mop[18] = { +static const MemOp dtype_mop[19] = { MO_UB, MO_UB, MO_UB, MO_UB, MO_SL, MO_UW, MO_UW, MO_UW, MO_SW, MO_SW, MO_UL, MO_UL, MO_SB, MO_SB, MO_SB, MO_UQ, - MO_UL, MO_UQ, + /* Artificial values used by decode */ + MO_UL, MO_UQ, MO_128 }; #define dtype_msz(x) (dtype_mop[x] & MO_SIZE) /* The vector element size of dtype. */ -static const uint8_t dtype_esz[18] = { +static const uint8_t dtype_esz[19] = { 0, 1, 2, 3, 3, 1, 2, 3, 3, 2, 2, 3, 3, 2, 1, 3, - 4, 4, + /* Artificial values used by decode */ + 4, 4, 4, }; uint32_t make_svemte_desc(DisasContext *s, unsigned vsz, uint32_t nregs, @@ -4842,7 +4844,7 @@ static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, } /* Indexed by [mte][be][dtype][nreg] */ -static gen_helper_gvec_mem * const ldr_fns[2][2][18][4] = { +static gen_helper_gvec_mem * const ldr_fns[2][2][19][4] = { { /* mte inactive, little-endian */ { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r, gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r }, @@ -4870,6 +4872,8 @@ static gen_helper_gvec_mem * const ldr_fns[2][2][18][4] = { { gen_helper_sve_ld1squ_le_r, NULL, NULL, NULL }, { gen_helper_sve_ld1dqu_le_r, NULL, NULL, NULL }, + { NULL, gen_helper_sve_ld2qq_le_r, + gen_helper_sve_ld3qq_le_r, gen_helper_sve_ld4qq_le_r }, }, /* mte inactive, big-endian */ @@ -4899,6 +4903,8 @@ static gen_helper_gvec_mem * const ldr_fns[2][2][18][4] = { { gen_helper_sve_ld1squ_be_r, NULL, NULL, NULL }, { gen_helper_sve_ld1dqu_be_r, NULL, NULL, NULL }, + { NULL, gen_helper_sve_ld2qq_be_r, + gen_helper_sve_ld3qq_be_r, gen_helper_sve_ld4qq_be_r }, }, }, @@ -4937,6 +4943,10 @@ static gen_helper_gvec_mem * const ldr_fns[2][2][18][4] = { { gen_helper_sve_ld1squ_le_r_mte, NULL, NULL, NULL }, { gen_helper_sve_ld1dqu_le_r_mte, NULL, NULL, NULL }, + { NULL, + gen_helper_sve_ld2qq_le_r_mte, + gen_helper_sve_ld3qq_le_r_mte, + gen_helper_sve_ld4qq_le_r_mte }, }, /* mte active, big-endian */ @@ -4974,6 +4984,10 @@ static gen_helper_gvec_mem * const ldr_fns[2][2][18][4] = { { gen_helper_sve_ld1squ_be_r_mte, NULL, NULL, NULL }, { gen_helper_sve_ld1dqu_be_r_mte, NULL, NULL, NULL }, + { NULL, + gen_helper_sve_ld2qq_be_r_mte, + gen_helper_sve_ld3qq_be_r_mte, + gen_helper_sve_ld4qq_be_r_mte }, }, }, }; @@ -4998,16 +5012,26 @@ static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a) return false; } - /* dtypes 16 and 17 are artificial, representing 128-bit element */ - if (a->dtype < 16) { + /* dtypes 16-18 are artificial, representing 128-bit element */ + switch (a->dtype) { + case 0 ... 15: if (!dc_isar_feature(aa64_sve, s)) { return false; } - } else { + break; + case 16: case 17: if (!dc_isar_feature(aa64_sve2p1, s)) { return false; } s->is_nonstreaming = true; + break; + case 18: + if (!dc_isar_feature(aa64_sme2p1_or_sve2p1, s)) { + return false; + } + break; + default: + g_assert_not_reached(); } if (sve_access_check(s)) { @@ -5021,16 +5045,26 @@ static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a) static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a) { - /* dtypes 16 and 17 are artificial, representing 128-bit element */ - if (a->dtype < 16) { + /* dtypes 16-18 are artificial, representing 128-bit element */ + switch (a->dtype) { + case 0 ... 15: if (!dc_isar_feature(aa64_sve, s)) { return false; } - } else { + break; + case 16: case 17: if (!dc_isar_feature(aa64_sve2p1, s)) { return false; } s->is_nonstreaming = true; + break; + case 18: + if (!dc_isar_feature(aa64_sme2p1_or_sve2p1, s)) { + return false; + } + break; + default: + g_assert_not_reached(); } if (sve_access_check(s)) { @@ -5542,55 +5576,67 @@ static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, gen_helper_sve_st1dd_be_r_mte, gen_helper_sve_st1dq_be_r_mte } } }, }; - static gen_helper_gvec_mem * const fn_multiple[2][2][3][4] = { + static gen_helper_gvec_mem * const fn_multiple[2][2][3][5] = { { { { gen_helper_sve_st2bb_r, gen_helper_sve_st2hh_le_r, gen_helper_sve_st2ss_le_r, - gen_helper_sve_st2dd_le_r }, + gen_helper_sve_st2dd_le_r, + gen_helper_sve_st2qq_le_r }, { gen_helper_sve_st3bb_r, gen_helper_sve_st3hh_le_r, gen_helper_sve_st3ss_le_r, - gen_helper_sve_st3dd_le_r }, + gen_helper_sve_st3dd_le_r, + gen_helper_sve_st3qq_le_r }, { gen_helper_sve_st4bb_r, gen_helper_sve_st4hh_le_r, gen_helper_sve_st4ss_le_r, - gen_helper_sve_st4dd_le_r } }, + gen_helper_sve_st4dd_le_r, + gen_helper_sve_st4qq_le_r } }, { { gen_helper_sve_st2bb_r, gen_helper_sve_st2hh_be_r, gen_helper_sve_st2ss_be_r, - gen_helper_sve_st2dd_be_r }, + gen_helper_sve_st2dd_be_r, + gen_helper_sve_st2qq_be_r }, { gen_helper_sve_st3bb_r, gen_helper_sve_st3hh_be_r, gen_helper_sve_st3ss_be_r, - gen_helper_sve_st3dd_be_r }, + gen_helper_sve_st3dd_be_r, + gen_helper_sve_st3qq_be_r }, { gen_helper_sve_st4bb_r, gen_helper_sve_st4hh_be_r, gen_helper_sve_st4ss_be_r, - gen_helper_sve_st4dd_be_r } } }, + gen_helper_sve_st4dd_be_r, + gen_helper_sve_st4qq_be_r } } }, { { { gen_helper_sve_st2bb_r_mte, gen_helper_sve_st2hh_le_r_mte, gen_helper_sve_st2ss_le_r_mte, - gen_helper_sve_st2dd_le_r_mte }, + gen_helper_sve_st2dd_le_r_mte, + gen_helper_sve_st2qq_le_r_mte }, { gen_helper_sve_st3bb_r_mte, gen_helper_sve_st3hh_le_r_mte, gen_helper_sve_st3ss_le_r_mte, - gen_helper_sve_st3dd_le_r_mte }, + gen_helper_sve_st3dd_le_r_mte, + gen_helper_sve_st3qq_le_r_mte }, { gen_helper_sve_st4bb_r_mte, gen_helper_sve_st4hh_le_r_mte, gen_helper_sve_st4ss_le_r_mte, - gen_helper_sve_st4dd_le_r_mte } }, + gen_helper_sve_st4dd_le_r_mte, + gen_helper_sve_st4qq_le_r_mte } }, { { gen_helper_sve_st2bb_r_mte, gen_helper_sve_st2hh_be_r_mte, gen_helper_sve_st2ss_be_r_mte, - gen_helper_sve_st2dd_be_r_mte }, + gen_helper_sve_st2dd_be_r_mte, + gen_helper_sve_st2qq_be_r_mte }, { gen_helper_sve_st3bb_r_mte, gen_helper_sve_st3hh_be_r_mte, gen_helper_sve_st3ss_be_r_mte, - gen_helper_sve_st3dd_be_r_mte }, + gen_helper_sve_st3dd_be_r_mte, + gen_helper_sve_st3qq_be_r_mte }, { gen_helper_sve_st4bb_r_mte, gen_helper_sve_st4hh_be_r_mte, gen_helper_sve_st4ss_be_r_mte, - gen_helper_sve_st4dd_be_r_mte } } }, + gen_helper_sve_st4dd_be_r_mte, + gen_helper_sve_st4qq_be_r_mte } } }, }; gen_helper_gvec_mem *fn; int be = s->be_data == MO_BE; @@ -5619,12 +5665,17 @@ static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a) } break; case MO_128: - assert(a->msz < a->esz); - assert(a->nreg == 0); - if (!dc_isar_feature(aa64_sve2p1, s)) { - return false; + if (a->nreg == 0) { + assert(a->msz < a->esz); + if (!dc_isar_feature(aa64_sve2p1, s)) { + return false; + } + s->is_nonstreaming = true; + } else { + if (!dc_isar_feature(aa64_sme2p1_or_sve2p1, s)) { + return false; + } } - s->is_nonstreaming = true; break; default: g_assert_not_reached(); @@ -5651,12 +5702,17 @@ static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a) } break; case MO_128: - assert(a->msz < a->esz); - assert(a->nreg == 0); - if (!dc_isar_feature(aa64_sve2p1, s)) { - return false; + if (a->nreg == 0) { + assert(a->msz < a->esz); + if (!dc_isar_feature(aa64_sve2p1, s)) { + return false; + } + s->is_nonstreaming = true; + } else { + if (!dc_isar_feature(aa64_sme2p1_or_sve2p1, s)) { + return false; + } } - s->is_nonstreaming = true; break; default: g_assert_not_reached(); diff --git a/target/arm/tcg/sve.decode b/target/arm/tcg/sve.decode index a8e9b8afec..f481f1479d 100644 --- a/target/arm/tcg/sve.decode +++ b/target/arm/tcg/sve.decode @@ -229,6 +229,9 @@ @rprr_load_dt ....... dtype:4 rm:5 ... pg:3 rn:5 rd:5 &rprr_load @rpri_load_dt ....... dtype:4 . imm:s4 ... pg:3 rn:5 rd:5 &rpri_load +@rprr_load ....... .... rm:5 ... pg:3 rn:5 rd:5 &rprr_load +@rpri_load ....... .... . imm:s4 ... pg:3 rn:5 rd:5 &rpri_load + @rprr_load_msz ....... .... rm:5 ... pg:3 rn:5 rd:5 \ &rprr_load dtype=%msz_dtype @rpri_load_msz ....... .... . imm:s4 ... pg:3 rn:5 rd:5 \ @@ -1267,12 +1270,26 @@ LDNF1_zpri 1010010 .... 1.... 101 ... ..... ..... @rpri_load_dt nreg=0 # SVE load multiple structures (scalar plus scalar) # LD2B, LD2H, LD2W, LD2D; etc. LD_zprr 1010010 .. nreg:2 ..... 110 ... ..... ..... @rprr_load_msz +# LD[234]Q +LD_zprr 1010010 01 01 ..... 100 ... ..... ..... \ + @rprr_load dtype=18 nreg=1 +LD_zprr 1010010 10 01 ..... 100 ... ..... ..... \ + @rprr_load dtype=18 nreg=2 +LD_zprr 1010010 11 01 ..... 100 ... ..... ..... \ + @rprr_load dtype=18 nreg=3 # SVE contiguous non-temporal load (scalar plus immediate) # LDNT1B, LDNT1H, LDNT1W, LDNT1D # SVE load multiple structures (scalar plus immediate) # LD2B, LD2H, LD2W, LD2D; etc. LD_zpri 1010010 .. nreg:2 0.... 111 ... ..... ..... @rpri_load_msz +# LD[234]Q +LD_zpri 1010010 01 001 .... 111 ... ..... ..... \ + @rpri_load dtype=18 nreg=1 +LD_zpri 1010010 10 001 .... 111 ... ..... ..... \ + @rpri_load dtype=18 nreg=2 +LD_zpri 1010010 11 001 .... 111 ... ..... ..... \ + @rpri_load dtype=18 nreg=3 # SVE load and broadcast quadword (scalar plus scalar) LD1RQ_zprr 1010010 .. 00 ..... 000 ... ..... ..... \ @@ -1383,11 +1400,25 @@ ST_zprr 1110010 11 10 ..... 010 ... ..... ..... \ # SVE store multiple structures (scalar plus immediate) (nreg != 0) ST_zpri 1110010 .. nreg:2 1.... 111 ... ..... ..... \ @rpri_store msz=%size_23 esz=%size_23 +# ST[234]Q +ST_zpri 11100100 01 00 .... 000 ... ..... ..... \ + @rpri_store msz=4 esz=4 nreg=1 +ST_zpri 11100100 10 00 .... 000 ... ..... ..... \ + @rpri_store msz=4 esz=4 nreg=2 +ST_zpri 11100100 11 00 .... 000 ... ..... ..... \ + @rpri_store msz=4 esz=4 nreg=3 # SVE contiguous non-temporal store (scalar plus scalar) (nreg == 0) # SVE store multiple structures (scalar plus scalar) (nreg != 0) ST_zprr 1110010 .. nreg:2 ..... 011 ... ..... ..... \ @rprr_store msz=%size_23 esz=%size_23 +# ST[234]Q +ST_zprr 11100100 01 1 ..... 000 ... ..... ..... \ + @rprr_store msz=4 esz=4 nreg=1 +ST_zprr 11100100 10 1 ..... 000 ... ..... ..... \ + @rprr_store msz=4 esz=4 nreg=2 +ST_zprr 11100100 11 1 ..... 000 ... ..... ..... \ + @rprr_store msz=4 esz=4 nreg=3 # SVE 32-bit scatter store (scalar plus 32-bit scaled offsets) # Require msz > 0 && msz <= esz. -- 2.43.0