Avoid the three-operand CC_OP_ADD and CC_OP_ADC in these relatively common cases.
Signed-off-by: Paolo Bonzini <pbonz...@redhat.com> --- target/i386/tcg/translate.c | 20 ++++++++++++ target/i386/tcg/emit.c.inc | 65 ++++++++++++++++++++++++++++++++++--- 2 files changed, 80 insertions(+), 5 deletions(-) diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index a8935f487aa..aee33428989 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -1183,6 +1183,26 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg) return cc; } +static void gen_neg_setcc(DisasContext *s, int b, TCGv reg) +{ + CCPrepare cc = gen_prepare_cc(s, b, reg); + + if (cc.no_setcond) { + if (cc.cond == TCG_COND_EQ) { + tcg_gen_addi_tl(reg, cc.reg, -1); + } else { + tcg_gen_neg_tl(reg, cc.reg); + } + return; + } + + if (cc.use_reg2) { + tcg_gen_negsetcond_tl(cc.cond, reg, cc.reg, cc.reg2); + } else { + tcg_gen_negsetcondi_tl(cc.cond, reg, cc.reg, cc.imm); + } +} + static void gen_setcc(DisasContext *s, int b, TCGv reg) { CCPrepare cc = gen_prepare_cc(s, b, reg); diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc index 0fa1664a24f..76cd7f00308 100644 --- a/target/i386/tcg/emit.c.inc +++ b/target/i386/tcg/emit.c.inc @@ -1170,11 +1170,28 @@ static void gen_AAS(DisasContext *s, X86DecodedInsn *decode) assume_cc_op(s, CC_OP_EFLAGS); } +static void gen_ADD(DisasContext *s, X86DecodedInsn *decode); static void gen_ADC(DisasContext *s, X86DecodedInsn *decode) { MemOp ot = decode->op[1].ot; - TCGv c_in = tcg_temp_new(); + TCGv c_in; + /* + * Try to avoid CC_OP_ADC. The definition of ADD and ADC is different + * for AF and OF: CC_OP_ADC would make the second source argument 0 and + * the incoming carry would not be taken into account; whereas with ADD + * the second source argument is the incoming carry (c_in). However it does + * not matter here: + * - for AF, only bit 4 matters and it's zero for both 0 and c_in + * - for OF, only the sign bit matters and it's zero for both 0 and c_in + */ + if (decode->e.op2 == X86_TYPE_I && decode->immediate == 0) { + gen_compute_eflags_c(s, s->T1); + gen_ADD(s, decode); + return; + } + + c_in = tcg_temp_new(); gen_compute_eflags_c(s, c_in); if (s->prefix & PREFIX_LOCK) { tcg_gen_add_tl(s->T0, c_in, s->T1); @@ -3830,22 +3847,60 @@ static void gen_SARX(DisasContext *s, X86DecodedInsn *decode) tcg_gen_sar_tl(s->T0, s->T0, s->T1); } +static void gen_SUB(DisasContext *s, X86DecodedInsn *decode); static void gen_SBB(DisasContext *s, X86DecodedInsn *decode) { MemOp ot = decode->op[0].ot; TCGv c_in = tcg_temp_new(); + /* + * Try to avoid CC_OP_SBB. The definition of SUB and SBB is different + * for AF and OF: CC_OP_SBB would make the second source argument 0 and + * the incoming carry would not be taken into account; whereas with SUB + * the second source argument is the incoming carry (c_in). However it does + * not matter here: + * - for AF, only bit 4 matters and it's zero for both 0 and c_in + * - for OF, only the sign bit matters and it's zero for both 0 and c_in + */ + if (decode->e.op2 == X86_TYPE_I && decode->immediate == 0) { + gen_compute_eflags_c(s, s->T1); + gen_SUB(s, decode); + return; + } + gen_compute_eflags_c(s, c_in); + + /* + * Here, src1 is changed from T0 to 0, and src2 is changed from T1 to c_in + * (and T0 = T1). AF and OF are unaffected because: + * - for AF, only bit 4 of src1^src2 matters, and it's zero for both + * T0^T1 and 0^c_in + * - for OF, the sign bit of both T0^T1 and 0^c_in is zero, so there can + * be no overflow. + */ + if (decode->e.op2 != X86_TYPE_I && !decode->op[0].has_ea && decode->op[0].n == decode->op[2].n) { + if (s->cc_op == CC_OP_DYNAMIC) { + tcg_gen_neg_tl(s->T0, c_in); + } else { + /* + * Do not negate c_in because it will often be dead and only the + * instruction generated by negsetcond will survive. + */ + gen_neg_setcc(s, JCC_B << 1, s->T0); + } + tcg_gen_movi_tl(s->cc_srcT, 0); + decode->cc_src = c_in; + decode->cc_dst = s->T0; + decode->cc_op = CC_OP_SUBB + ot; + return; + } + if (s->prefix & PREFIX_LOCK) { tcg_gen_add_tl(s->T0, s->T1, c_in); tcg_gen_neg_tl(s->T0, s->T0); tcg_gen_atomic_add_fetch_tl(s->T0, s->A0, s->T0, s->mem_index, ot | MO_LE); } else { - /* - * TODO: SBB reg, reg could use gen_prepare_eflags_c followed by - * negsetcond, and CC_OP_SUBB as the cc_op. - */ tcg_gen_sub_tl(s->T0, s->T0, s->T1); tcg_gen_sub_tl(s->T0, s->T0, c_in); } -- 2.49.0