Avoid the three-operand CC_OP_ADD and CC_OP_ADC in these relatively common cases.
Signed-off-by: Paolo Bonzini <pbonz...@redhat.com> --- target/i386/tcg/translate.c | 20 +++++++++++ target/i386/tcg/emit.c.inc | 71 +++++++++++++++++++++++++++++++++---- 2 files changed, 85 insertions(+), 6 deletions(-) diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index a8935f487aa..aee33428989 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -1183,6 +1183,26 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg) return cc; } +static void gen_neg_setcc(DisasContext *s, int b, TCGv reg) +{ + CCPrepare cc = gen_prepare_cc(s, b, reg); + + if (cc.no_setcond) { + if (cc.cond == TCG_COND_EQ) { + tcg_gen_addi_tl(reg, cc.reg, -1); + } else { + tcg_gen_neg_tl(reg, cc.reg); + } + return; + } + + if (cc.use_reg2) { + tcg_gen_negsetcond_tl(cc.cond, reg, cc.reg, cc.reg2); + } else { + tcg_gen_negsetcondi_tl(cc.cond, reg, cc.reg, cc.imm); + } +} + static void gen_setcc(DisasContext *s, int b, TCGv reg) { CCPrepare cc = gen_prepare_cc(s, b, reg); diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc index 0fa1664a24f..f477a2da992 100644 --- a/target/i386/tcg/emit.c.inc +++ b/target/i386/tcg/emit.c.inc @@ -1170,11 +1170,28 @@ static void gen_AAS(DisasContext *s, X86DecodedInsn *decode) assume_cc_op(s, CC_OP_EFLAGS); } +static void gen_ADD(DisasContext *s, X86DecodedInsn *decode); static void gen_ADC(DisasContext *s, X86DecodedInsn *decode) { MemOp ot = decode->op[1].ot; - TCGv c_in = tcg_temp_new(); + TCGv c_in; + /* + * Try to avoid CC_OP_ADC by transforming as follows: + * CC_ADC: src1 = dst + c_in, src2 = 0, src3 = c_in + * CC_ADD: src1 = dst + c_in, src2 = c_in (no src3) + * + * In general src2 vs. src3 matters when computing AF and OF, but not here: + * - AF is bit 4 of dst^src1^src2, which is bit 4 of dst^src1 in both cases + * - OF is a function of the two MSBs, and in both cases they are zero for src2 + */ + if (decode->e.op2 == X86_TYPE_I && decode->immediate == 0) { + gen_compute_eflags_c(s, s->T1); + gen_ADD(s, decode); + return; + } + + c_in = tcg_temp_new(); gen_compute_eflags_c(s, c_in); if (s->prefix & PREFIX_LOCK) { tcg_gen_add_tl(s->T0, c_in, s->T1); @@ -3830,22 +3847,64 @@ static void gen_SARX(DisasContext *s, X86DecodedInsn *decode) tcg_gen_sar_tl(s->T0, s->T0, s->T1); } +static void gen_SUB(DisasContext *s, X86DecodedInsn *decode); static void gen_SBB(DisasContext *s, X86DecodedInsn *decode) { MemOp ot = decode->op[0].ot; - TCGv c_in = tcg_temp_new(); + TCGv c_in; + /* + * Try to avoid CC_OP_SBB by transforming as follows: + * CC_SBB: src1 = dst + c_in, src2 = 0, src3 = c_in + * CC_SUB: src1 = dst + c_in, src2 = c_in (no src3) + * + * In general src2 vs. src3 matters when computing AF and OF, but not here: + * - AF is bit 4 of dst^src1^src2, which is bit 4 of dst^src1 in both cases + * - OF is a function of the two MSBs, and in both cases they are zero for src2 + */ + if (decode->e.op2 == X86_TYPE_I && decode->immediate == 0) { + gen_compute_eflags_c(s, s->T1); + gen_SUB(s, decode); + return; + } + + c_in = tcg_temp_new(); gen_compute_eflags_c(s, c_in); + + /* + * Here the change is as follows: + * CC_SBB: src1 = T0, src2 = T0, src3 = c_in + * CC_SUB: src1 = 0, src2 = c_in (no src3) + * + * The difference also does not matter: + * - AF is bit 4 of dst^src1^src2, but bit 4 of src1^src2 is zero in both cases + * therefore AF comes straight from dst (in fact it is c_in) + * - for OF, src1 and src2 have the same sign in both cases, meaning there + * can be no overflow + */ + if (decode->e.op2 != X86_TYPE_I && !decode->op[0].has_ea && decode->op[0].n == decode->op[2].n) { + if (s->cc_op == CC_OP_DYNAMIC) { + tcg_gen_neg_tl(s->T0, c_in); + } else { + /* + * Do not negate c_in because it will often be dead and only the + * instruction generated by negsetcond will survive. + */ + gen_neg_setcc(s, JCC_B << 1, s->T0); + } + tcg_gen_movi_tl(s->cc_srcT, 0); + decode->cc_src = c_in; + decode->cc_dst = s->T0; + decode->cc_op = CC_OP_SUBB + ot; + return; + } + if (s->prefix & PREFIX_LOCK) { tcg_gen_add_tl(s->T0, s->T1, c_in); tcg_gen_neg_tl(s->T0, s->T0); tcg_gen_atomic_add_fetch_tl(s->T0, s->A0, s->T0, s->mem_index, ot | MO_LE); } else { - /* - * TODO: SBB reg, reg could use gen_prepare_eflags_c followed by - * negsetcond, and CC_OP_SUBB as the cc_op. - */ tcg_gen_sub_tl(s->T0, s->T0, s->T1); tcg_gen_sub_tl(s->T0, s->T0, c_in); } -- 2.49.0