Instead of cmp+and or cmp+andc, use cmpsel. This will be better for hosts that use predicate registers for cmp.
Signed-off-by: Richard Henderson <richard.hender...@linaro.org> --- target/arm/tcg/gengvec.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/target/arm/tcg/gengvec.c b/target/arm/tcg/gengvec.c index 726a1383ae..3edbf3a262 100644 --- a/target/arm/tcg/gengvec.c +++ b/target/arm/tcg/gengvec.c @@ -1041,7 +1041,7 @@ static void gen_ushl_vec(unsigned vece, TCGv_vec dst, TCGv_vec rval = tcg_temp_new_vec_matching(dst); TCGv_vec lsh = tcg_temp_new_vec_matching(dst); TCGv_vec rsh = tcg_temp_new_vec_matching(dst); - TCGv_vec max; + TCGv_vec max, zero; tcg_gen_neg_vec(vece, rsh, shift); if (vece == MO_8) { @@ -1061,23 +1061,20 @@ static void gen_ushl_vec(unsigned vece, TCGv_vec dst, tcg_gen_shrv_vec(vece, rval, src, rsh); /* - * The choice of LT (signed) and GEU (unsigned) are biased toward + * The choice of GE (signed) and GEU (unsigned) are biased toward * the instructions of the x86_64 host. For MO_8, the whole byte * is significant so we must use an unsigned compare; otherwise we * have already masked to a byte and so a signed compare works. * Other tcg hosts have a full set of comparisons and do not care. */ + zero = tcg_constant_vec_matching(dst, vece, 0); max = tcg_constant_vec_matching(dst, vece, 8 << vece); if (vece == MO_8) { - tcg_gen_cmp_vec(TCG_COND_GEU, vece, lsh, lsh, max); - tcg_gen_cmp_vec(TCG_COND_GEU, vece, rsh, rsh, max); - tcg_gen_andc_vec(vece, lval, lval, lsh); - tcg_gen_andc_vec(vece, rval, rval, rsh); + tcg_gen_cmpsel_vec(TCG_COND_GEU, vece, lval, lsh, max, zero, lval); + tcg_gen_cmpsel_vec(TCG_COND_GEU, vece, rval, rsh, max, zero, rval); } else { - tcg_gen_cmp_vec(TCG_COND_LT, vece, lsh, lsh, max); - tcg_gen_cmp_vec(TCG_COND_LT, vece, rsh, rsh, max); - tcg_gen_and_vec(vece, lval, lval, lsh); - tcg_gen_and_vec(vece, rval, rval, rsh); + tcg_gen_cmpsel_vec(TCG_COND_GE, vece, lval, lsh, max, zero, lval); + tcg_gen_cmpsel_vec(TCG_COND_GE, vece, rval, rsh, max, zero, rval); } tcg_gen_or_vec(vece, dst, lval, rval); } @@ -1087,7 +1084,7 @@ void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, { static const TCGOpcode vecop_list[] = { INDEX_op_neg_vec, INDEX_op_shlv_vec, - INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0 + INDEX_op_shrv_vec, INDEX_op_cmpsel_vec, 0 }; static const GVecGen3 ops[4] = { { .fniv = gen_ushl_vec, -- 2.43.0