Convert the 64-bit element insns in the 3-reg-same group to decodetree. This covers VQSHL, VRSHL and VQRSHL where size==0b11.
Signed-off-by: Peter Maydell <peter.mayd...@linaro.org> --- target/arm/translate-neon.inc.c | 62 +++++++++++++++++++++++++++++++++ target/arm/translate.c | 38 ++------------------ target/arm/neon-dp.decode | 11 ++++++ 3 files changed, 75 insertions(+), 36 deletions(-) diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c index 161313ad879..bc5afb368e3 100644 --- a/target/arm/translate-neon.inc.c +++ b/target/arm/translate-neon.inc.c @@ -908,3 +908,65 @@ static bool trans_SHA256SU1_3s(DisasContext *s, arg_SHA256SU1_3s *a) return true; } + +static bool do_3same_64(DisasContext *s, arg_3same *a, NeonGenTwo64OpFn *fn) +{ + /* Handle 3-reg-same operations to be performed 64 bits at a time */ + TCGv_i64 rn, rm, rd; + int pass; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vn | a->vm) & 0x10)) { + return false; + } + + if ((a->vn | a->vm | a->vd) & a->q) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + rn = tcg_temp_new_i64(); + rm = tcg_temp_new_i64(); + rd = tcg_temp_new_i64(); + + for (pass = 0; pass < (a->q ? 2 : 1); pass++) { + neon_load_reg64(rn, a->vn + pass); + neon_load_reg64(rm, a->vm + pass); + fn(rd, rm, rn); + neon_store_reg64(rd, a->vd + pass); + } + + tcg_temp_free_i64(rn); + tcg_temp_free_i64(rm); + tcg_temp_free_i64(rd); + + return true; +} + +#define DO_3SAME_64(INSN, FUNC) \ + static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \ + { \ + return do_3same_64(s, a, FUNC); \ + } + +#define DO_3SAME_64_ENV(INSN, FUNC) \ + static void gen_##INSN##_3s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) \ + { \ + FUNC(d, cpu_env, n, m); \ + } \ + DO_3SAME_64(INSN, gen_##INSN##_3s) + +DO_3SAME_64(VRSHL_S64, gen_helper_neon_rshl_s64) +DO_3SAME_64(VRSHL_U64, gen_helper_neon_rshl_u64) +DO_3SAME_64_ENV(VQSHL_S64, gen_helper_neon_qshl_s64) +DO_3SAME_64_ENV(VQSHL_U64, gen_helper_neon_qshl_u64) +DO_3SAME_64_ENV(VQRSHL_S64, gen_helper_neon_qrshl_s64) +DO_3SAME_64_ENV(VQRSHL_U64, gen_helper_neon_qrshl_u64) diff --git a/target/arm/translate.c b/target/arm/translate.c index 160638e2a7c..fb64eb3a800 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -4791,42 +4791,8 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) } if (size == 3) { - /* 64-bit element instructions. */ - for (pass = 0; pass < (q ? 2 : 1); pass++) { - neon_load_reg64(cpu_V0, rn + pass); - neon_load_reg64(cpu_V1, rm + pass); - switch (op) { - case NEON_3R_VQSHL: - if (u) { - gen_helper_neon_qshl_u64(cpu_V0, cpu_env, - cpu_V1, cpu_V0); - } else { - gen_helper_neon_qshl_s64(cpu_V0, cpu_env, - cpu_V1, cpu_V0); - } - break; - case NEON_3R_VRSHL: - if (u) { - gen_helper_neon_rshl_u64(cpu_V0, cpu_V1, cpu_V0); - } else { - gen_helper_neon_rshl_s64(cpu_V0, cpu_V1, cpu_V0); - } - break; - case NEON_3R_VQRSHL: - if (u) { - gen_helper_neon_qrshl_u64(cpu_V0, cpu_env, - cpu_V1, cpu_V0); - } else { - gen_helper_neon_qrshl_s64(cpu_V0, cpu_env, - cpu_V1, cpu_V0); - } - break; - default: - abort(); - } - neon_store_reg64(cpu_V0, rd + pass); - } - return 0; + /* 64-bit element instructions: handled by decodetree */ + return 1; } pairwise = 0; switch (op) { diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode index f22606b2bd5..a4932e550ed 100644 --- a/target/arm/neon-dp.decode +++ b/target/arm/neon-dp.decode @@ -68,6 +68,17 @@ VCGE_U_3s 1111 001 1 0 . .. .... .... 0011 . . . 1 .... @3same VSHL_S_3s 1111 001 0 0 . .. .... .... 0100 . . . 0 .... @3same VSHL_U_3s 1111 001 1 0 . .. .... .... 0100 . . . 0 .... @3same +# Insns operating on 64-bit elements (size!=0b11 handled elsewhere) +@3same_64 .... ... . . . 11 .... .... .... . q:1 . . .... \ + &3same vm=%vm_dp vn=%vn_dp vd=%vd_dp size=3 + +VQSHL_S64_3s 1111 001 0 0 . .. .... .... 0100 . . . 1 .... @3same_64 +VQSHL_U64_3s 1111 001 1 0 . .. .... .... 0100 . . . 1 .... @3same_64 +VRSHL_S64_3s 1111 001 0 0 . .. .... .... 0101 . . . 0 .... @3same_64 +VRSHL_U64_3s 1111 001 1 0 . .. .... .... 0101 . . . 0 .... @3same_64 +VQRSHL_S64_3s 1111 001 0 0 . .. .... .... 0101 . . . 1 .... @3same_64 +VQRSHL_U64_3s 1111 001 1 0 . .. .... .... 0101 . . . 1 .... @3same_64 + VMAX_S_3s 1111 001 0 0 . .. .... .... 0110 . . . 0 .... @3same VMAX_U_3s 1111 001 1 0 . .. .... .... 0110 . . . 0 .... @3same VMIN_S_3s 1111 001 0 0 . .. .... .... 0110 . . . 1 .... @3same -- 2.20.1