Optimize the MVE VNEG and VABS insns by using TCG vector ops when possible.
Signed-off-by: Peter Maydell <peter.mayd...@linaro.org> Reviewed-by: Philippe Mathieu-Daudé <f4...@amsat.org> Reviewed-by: Richard Henderson <richard.hender...@linaro.org> --- target/arm/translate-mve.c | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c index 255cb860fec..d30c7e57ea3 100644 --- a/target/arm/translate-mve.c +++ b/target/arm/translate-mve.c @@ -510,7 +510,8 @@ static bool trans_VDUP(DisasContext *s, arg_VDUP *a) return true; } -static bool do_1op(DisasContext *s, arg_1op *a, MVEGenOneOpFn fn) +static bool do_1op_vec(DisasContext *s, arg_1op *a, MVEGenOneOpFn fn, + GVecGen2Fn vecfn) { TCGv_ptr qd, qm; @@ -524,16 +525,25 @@ static bool do_1op(DisasContext *s, arg_1op *a, MVEGenOneOpFn fn) return true; } - qd = mve_qreg_ptr(a->qd); - qm = mve_qreg_ptr(a->qm); - fn(cpu_env, qd, qm); - tcg_temp_free_ptr(qd); - tcg_temp_free_ptr(qm); + if (vecfn && mve_no_predication(s)) { + vecfn(a->size, mve_qreg_offset(a->qd), mve_qreg_offset(a->qm), 16, 16); + } else { + qd = mve_qreg_ptr(a->qd); + qm = mve_qreg_ptr(a->qm); + fn(cpu_env, qd, qm); + tcg_temp_free_ptr(qd); + tcg_temp_free_ptr(qm); + } mve_update_eci(s); return true; } -#define DO_1OP(INSN, FN) \ +static bool do_1op(DisasContext *s, arg_1op *a, MVEGenOneOpFn fn) +{ + return do_1op_vec(s, a, fn, NULL); +} + +#define DO_1OP_VEC(INSN, FN, VECFN) \ static bool trans_##INSN(DisasContext *s, arg_1op *a) \ { \ static MVEGenOneOpFn * const fns[] = { \ @@ -542,13 +552,15 @@ static bool do_1op(DisasContext *s, arg_1op *a, MVEGenOneOpFn fn) gen_helper_mve_##FN##w, \ NULL, \ }; \ - return do_1op(s, a, fns[a->size]); \ + return do_1op_vec(s, a, fns[a->size], VECFN); \ } +#define DO_1OP(INSN, FN) DO_1OP_VEC(INSN, FN, NULL) + DO_1OP(VCLZ, vclz) DO_1OP(VCLS, vcls) -DO_1OP(VABS, vabs) -DO_1OP(VNEG, vneg) +DO_1OP_VEC(VABS, vabs, tcg_gen_gvec_abs) +DO_1OP_VEC(VNEG, vneg, tcg_gen_gvec_neg) DO_1OP(VQABS, vqabs) DO_1OP(VQNEG, vqneg) DO_1OP(VMAXA, vmaxa) -- 2.20.1