Convert the VQSHL, VRSHL and VQRSHL insns in the 3-reg-same group to decodetree. We have already implemented the size==0b11 case of these insns; this commit handles the remaining sizes.
TODO: find out from rth why decodetree insists on VSHL going into the group... Signed-off-by: Peter Maydell <peter.mayd...@linaro.org> --- target/arm/translate-neon.inc.c | 93 +++++++++++++++++++++++++++++++++ target/arm/translate.c | 23 ++------ target/arm/neon-dp.decode | 30 ++++++++--- 3 files changed, 120 insertions(+), 26 deletions(-) diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c index bdd5f33214e..084c78eea58 100644 --- a/target/arm/translate-neon.inc.c +++ b/target/arm/translate-neon.inc.c @@ -1035,3 +1035,96 @@ DO_3SAME_32(VHADD, hadd) DO_3SAME_32(VHSUB, hsub) DO_3SAME_32(VRHADD, rhadd) DO_3SAME_32(VABD, abd) + +static bool do_3same_qs32(DisasContext *s, arg_3same *a, NeonGenTwoOpEnvFn *fn) +{ + /* + * Saturating shift operations handled elementwise 32 bits at a + * time which need to pass cpu_env to the helper and where the rn + * and rm operands are reversed from the usual do_3same() order. + */ + TCGv_i32 tmp, tmp2; + int pass; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vn | a->vm) & 0x10)) { + return false; + } + + if ((a->vn | a->vm | a->vd) & a->q) { + return false; + } + + if (a->size == 3) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + for (pass = 0; pass < (a->q ? 4 : 2); pass++) { + /* Note reversal of operand order */ + tmp = neon_load_reg(a->vm, pass); + tmp2 = neon_load_reg(a->vn, pass); + fn(tmp, cpu_env, tmp, tmp2); + tcg_temp_free_i32(tmp2); + neon_store_reg(a->vd, pass, tmp); + } + return true; +} + +/* + * Handling for shifts with sizes 8/16/32 bits. 64-bit shifts are + * covered by the *_S64_3s and *_U64_3s patterns and the grouping in + * the decode file means those functions are called first for + * size==0b11. Note that we must 'return false' here for the + * size==0b11 case rather than asserting, because where the 64-bit + * function has an UNDEF case and returns false the decoder will fall + * through to trying these functions. + */ +#define DO_3SAME_QS32(INSN, func) \ + static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \ + { \ + static NeonGenTwoOpEnvFn * const fns[] = { \ + gen_helper_neon_##func##8, \ + gen_helper_neon_##func##16, \ + gen_helper_neon_##func##32, \ + }; \ + if (a->size > 2) { \ + return false; \ + } \ + return do_3same_qs32(s, a, fns[a->size]); \ + } + +DO_3SAME_QS32(VQSHL_S,qshl_s) +DO_3SAME_QS32(VQSHL_U,qshl_u) +DO_3SAME_QS32(VQRSHL_S,qrshl_s) +DO_3SAME_QS32(VQRSHL_U,qrshl_u) + +#define DO_3SAME_SHIFT32(INSN, func) \ + static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \ + { \ + static NeonGenTwoOpFn * const fns[] = { \ + gen_helper_neon_##func##8, \ + gen_helper_neon_##func##16, \ + gen_helper_neon_##func##32, \ + }; \ + int rtmp; \ + if (a->size > 2) { \ + return false; \ + } \ + /* Shift operand order is reversed */ \ + rtmp = a->vn; \ + a->vn = a->vm; \ + a->vm = rtmp; \ + return do_3same_32(s, a, fns[a->size]); \ + } + +DO_3SAME_SHIFT32(VRSHL_S, rshl_s) +DO_3SAME_SHIFT32(VRSHL_U, rshl_u) diff --git a/target/arm/translate.c b/target/arm/translate.c index 29301061ca5..4406fe54647 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -4790,6 +4790,9 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) case NEON_3R_VRHADD: case NEON_3R_VHSUB: case NEON_3R_VABD: + case NEON_3R_VQSHL: + case NEON_3R_VRSHL: + case NEON_3R_VQRSHL: /* Already handled by decodetree */ return 1; } @@ -4800,17 +4803,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) } pairwise = 0; switch (op) { - case NEON_3R_VQSHL: - case NEON_3R_VRSHL: - case NEON_3R_VQRSHL: - { - int rtmp; - /* Shift instruction operands are reversed. */ - rtmp = rn; - rn = rm; - rm = rtmp; - } - break; case NEON_3R_VPADD_VQRDMLAH: case NEON_3R_VPMAX: case NEON_3R_VPMIN: @@ -4870,15 +4862,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) tmp2 = neon_load_reg(rm, pass); } switch (op) { - case NEON_3R_VQSHL: - GEN_NEON_INTEGER_OP_ENV(qshl); - break; - case NEON_3R_VRSHL: - GEN_NEON_INTEGER_OP(rshl); - break; - case NEON_3R_VQRSHL: - GEN_NEON_INTEGER_OP_ENV(qrshl); - break; case NEON_3R_VABA: GEN_NEON_INTEGER_OP(abd); tcg_temp_free_i32(tmp2); diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode index 4b15e52221b..ae442071ef1 100644 --- a/target/arm/neon-dp.decode +++ b/target/arm/neon-dp.decode @@ -80,12 +80,30 @@ VSHL_U_3s 1111 001 1 0 . .. .... .... 0100 . . . 0 .... @3same @3same_64 .... ... . . . 11 .... .... .... . q:1 . . .... \ &3same vm=%vm_dp vn=%vn_dp vd=%vd_dp size=3 -VQSHL_S64_3s 1111 001 0 0 . .. .... .... 0100 . . . 1 .... @3same_64 -VQSHL_U64_3s 1111 001 1 0 . .. .... .... 0100 . . . 1 .... @3same_64 -VRSHL_S64_3s 1111 001 0 0 . .. .... .... 0101 . . . 0 .... @3same_64 -VRSHL_U64_3s 1111 001 1 0 . .. .... .... 0101 . . . 0 .... @3same_64 -VQRSHL_S64_3s 1111 001 0 0 . .. .... .... 0101 . . . 1 .... @3same_64 -VQRSHL_U64_3s 1111 001 1 0 . .. .... .... 0101 . . . 1 .... @3same_64 +{ + VQSHL_S64_3s 1111 001 0 0 . .. .... .... 0100 . . . 1 .... @3same_64 + VQSHL_S_3s 1111 001 0 0 . .. .... .... 0100 . . . 1 .... @3same +} +{ + VQSHL_U64_3s 1111 001 1 0 . .. .... .... 0100 . . . 1 .... @3same_64 + VQSHL_U_3s 1111 001 1 0 . .. .... .... 0100 . . . 1 .... @3same +} +{ + VRSHL_S64_3s 1111 001 0 0 . .. .... .... 0101 . . . 0 .... @3same_64 + VRSHL_S_3s 1111 001 0 0 . .. .... .... 0101 . . . 0 .... @3same +} +{ + VRSHL_U64_3s 1111 001 1 0 . .. .... .... 0101 . . . 0 .... @3same_64 + VRSHL_U_3s 1111 001 1 0 . .. .... .... 0101 . . . 0 .... @3same +} +{ + VQRSHL_S64_3s 1111 001 0 0 . .. .... .... 0101 . . . 1 .... @3same_64 + VQRSHL_S_3s 1111 001 0 0 . .. .... .... 0101 . . . 1 .... @3same +} +{ + VQRSHL_U64_3s 1111 001 1 0 . .. .... .... 0101 . . . 1 .... @3same_64 + VQRSHL_U_3s 1111 001 1 0 . .. .... .... 0101 . . . 1 .... @3same +} VMAX_S_3s 1111 001 0 0 . .. .... .... 0110 . . . 0 .... @3same VMAX_U_3s 1111 001 1 0 . .. .... .... 0110 . . . 0 .... @3same -- 2.20.1