Compute both partial results separately and accumulate at the end, instead of accumulating in the middle.
Signed-off-by: Richard Henderson <richard.hender...@linaro.org> --- target/ppc/translate/vmx-impl.c.inc | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/target/ppc/translate/vmx-impl.c.inc b/target/ppc/translate/vmx-impl.c.inc index 7af6d7217d..ca27c11d87 100644 --- a/target/ppc/translate/vmx-impl.c.inc +++ b/target/ppc/translate/vmx-impl.c.inc @@ -2236,24 +2236,25 @@ static bool trans_MTVSRBMI(DisasContext *ctx, arg_DX_b *a) static bool do_vcntmb(DisasContext *ctx, arg_VX_mp *a, int vece) { - TCGv_i64 rt, vrb, mask; - rt = tcg_const_i64(0); - vrb = tcg_temp_new_i64(); + TCGv_i64 r[2], mask; + + r[0] = tcg_temp_new_i64(); + r[1] = tcg_temp_new_i64(); mask = tcg_constant_i64(dup_const(vece, 1ULL << ((8 << vece) - 1))); for (int i = 0; i < 2; i++) { - get_avr64(vrb, a->vrb, i); + get_avr64(r[i], a->vrb, i); if (a->mp) { - tcg_gen_and_i64(vrb, mask, vrb); + tcg_gen_and_i64(r[i], mask, r[i]); } else { - tcg_gen_andc_i64(vrb, mask, vrb); + tcg_gen_andc_i64(r[i], mask, r[i]); } - tcg_gen_ctpop_i64(vrb, vrb); - tcg_gen_add_i64(rt, rt, vrb); + tcg_gen_ctpop_i64(r[i], r[i]); } - tcg_gen_shli_i64(rt, rt, TARGET_LONG_BITS - 8 + vece); - tcg_gen_trunc_i64_tl(cpu_gpr[a->rt], rt); + tcg_gen_add_i64(r[0], r[0], r[1]); + tcg_gen_shli_i64(r[0], r[0], TARGET_LONG_BITS - 8 + vece); + tcg_gen_trunc_i64_tl(cpu_gpr[a->rt], r[0]); return true; } -- 2.34.1