Signed-off-by: Richard Henderson <richard.hender...@linaro.org> --- target/arm/tcg/translate-a64.c | 84 ++++++++++++++++------------------ target/arm/tcg/a64.decode | 3 ++ 2 files changed, 43 insertions(+), 44 deletions(-)
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c index 627d4311bb..2a9cb3fbe0 100644 --- a/target/arm/tcg/translate-a64.c +++ b/target/arm/tcg/translate-a64.c @@ -6972,6 +6972,45 @@ TRANS(SRI_v, do_vec_shift_imm, a, gen_gvec_sri) TRANS(SHL_v, do_vec_shift_imm, a, tcg_gen_gvec_shli) TRANS(SLI_v, do_vec_shift_imm, a, gen_gvec_sli); +static bool do_vec_shift_imm_wide(DisasContext *s, arg_qrri_e *a, bool is_u) +{ + TCGv_i64 tcg_rn, tcg_rd; + int esz = a->esz; + int esize; + + if (esz < 0 || esz >= MO_64) { + return false; + } + if (!fp_access_check(s)) { + return true; + } + + /* + * For the LL variants the store is larger than the load, + * so if rd == rn we would overwrite parts of our input. + * So load everything right now and use shifts in the main loop. + */ + tcg_rd = tcg_temp_new_i64(); + tcg_rn = tcg_temp_new_i64(); + read_vec_element(s, tcg_rn, a->rn, a->q, MO_64); + + esize = 8 << esz; + for (int i = 0, elements = 8 >> esz; i < elements; i++) { + if (is_u) { + tcg_gen_extract_i64(tcg_rd, tcg_rn, i * esize, esize); + } else { + tcg_gen_sextract_i64(tcg_rd, tcg_rn, i * esize, esize); + } + tcg_gen_shli_i64(tcg_rd, tcg_rd, a->imm); + write_vec_element(s, tcg_rd, a->rd, i, esz + 1); + } + clear_vec_high(s, true, a->rd); + return true; +} + +TRANS(SSHLL_v, do_vec_shift_imm_wide, a, false) +TRANS(USHLL_v, do_vec_shift_imm_wide, a, true) + /* Shift a TCGv src by TCGv shift_amount, put result in dst. * Note that it is the caller's responsibility to ensure that the * shift amount is in range (ie 0..31 or 0..63) and provide the ARM @@ -10436,47 +10475,6 @@ static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn) } } -/* USHLL/SHLL - Vector shift left with widening */ -static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u, - int immh, int immb, int opcode, int rn, int rd) -{ - int size = 32 - clz32(immh) - 1; - int immhb = immh << 3 | immb; - int shift = immhb - (8 << size); - int dsize = 64; - int esize = 8 << size; - int elements = dsize/esize; - TCGv_i64 tcg_rn = tcg_temp_new_i64(); - TCGv_i64 tcg_rd = tcg_temp_new_i64(); - int i; - - if (size >= 3) { - unallocated_encoding(s); - return; - } - - if (!fp_access_check(s)) { - return; - } - - /* For the LL variants the store is larger than the load, - * so if rd == rn we would overwrite parts of our input. - * So load everything right now and use shifts in the main loop. - */ - read_vec_element(s, tcg_rn, rn, is_q ? 1 : 0, MO_64); - - for (i = 0; i < elements; i++) { - if (is_u) { - tcg_gen_extract_i64(tcg_rd, tcg_rn, i * esize, esize); - } else { - tcg_gen_sextract_i64(tcg_rd, tcg_rn, i * esize, esize); - } - tcg_gen_shli_i64(tcg_rd, tcg_rd, shift); - write_vec_element(s, tcg_rd, rd, i, size + 1); - } - clear_vec_high(s, true, rd); -} - /* SHRN/RSHRN - Shift right with narrowing (and potential rounding) */ static void handle_vec_simd_shrn(DisasContext *s, bool is_q, int immh, int immb, int opcode, int rn, int rd) @@ -10566,9 +10564,6 @@ static void disas_simd_shift_imm(DisasContext *s, uint32_t insn) handle_vec_simd_sqshrn(s, false, is_q, is_u, is_u, immh, immb, opcode, rn, rd); break; - case 0x14: /* SSHLL / USHLL */ - handle_vec_simd_wshli(s, is_q, is_u, immh, immb, opcode, rn, rd); - break; case 0x1c: /* SCVTF / UCVTF */ handle_simd_shift_intfp_conv(s, false, is_q, is_u, immh, immb, opcode, rn, rd); @@ -10593,6 +10588,7 @@ static void disas_simd_shift_imm(DisasContext *s, uint32_t insn) case 0x06: /* SRSRA / URSRA (accum + rounding) */ case 0x08: /* SRI */ case 0x0a: /* SHL / SLI */ + case 0x14: /* SSHLL / USHLL */ unallocated_encoding(s); return; } diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode index 6aa8a18240..d13d680589 100644 --- a/target/arm/tcg/a64.decode +++ b/target/arm/tcg/a64.decode @@ -1218,5 +1218,8 @@ FMOVI_v_h 0 q:1 00 1111 00000 ... 1111 11 ..... rd:5 %abcdefgh SHL_v 0.00 11110 .... ... 01010 1 ..... ..... @qlshifti SLI_v 0.10 11110 .... ... 01010 1 ..... ..... @qlshifti + + SSHLL_v 0.00 11110 .... ... 10100 1 ..... ..... @qlshifti + USHLL_v 0.10 11110 .... ... 10100 1 ..... ..... @qlshifti ] } -- 2.43.0