On Thu, Dec 17, 2009 at 6:32 PM, Richard Henderson <r...@twiddle.net> wrote: > Implement conditional moves in the x86_64 backend. > > Signed-off-by: Richard Henderson <r...@twiddle.net> > --- > tcg/x86_64/tcg-target.c | 65 ++++++++++++++++++++++++++++++++++++++++++++-- > 1 files changed, 62 insertions(+), 3 deletions(-) > > diff --git a/tcg/x86_64/tcg-target.c b/tcg/x86_64/tcg-target.c > index 2339091..e411755 100644 > --- a/tcg/x86_64/tcg-target.c > +++ b/tcg/x86_64/tcg-target.c > @@ -491,9 +491,8 @@ static void tcg_out_jxx(TCGContext *s, int opc, int > label_index) > } > } > > -static void tcg_out_brcond(TCGContext *s, int cond, > - TCGArg arg1, TCGArg arg2, int const_arg2, > - int label_index, int rexw) > +static void tcg_out_cond(TCGContext *s, int cond, TCGArg arg1, > + TCGArg arg2, int const_arg2, int rexw) > { > if (const_arg2) { > if (arg2 == 0) { > @@ -508,9 +507,45 @@ static void tcg_out_brcond(TCGContext *s, int cond, > } else { > tcg_out_modrm(s, 0x01 | (ARITH_CMP << 3) | rexw, arg2, arg1); > } > +} > + > +static void tcg_out_brcond(TCGContext *s, int cond, > + TCGArg arg1, TCGArg arg2, int const_arg2, > + int label_index, int rexw) > +{ > + tcg_out_cond(s, cond, arg1, arg2, const_arg2, rexw); > tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index); > } > > +static void tcg_out_setcond(TCGContext *s, int cond, TCGArg arg0, > + TCGArg arg1, TCGArg arg2, int const_arg2, int > rexw)
Perhaps renaming arg0 to dest would make things slightly more readable. > +{ > + int use_xor = (arg0 != arg1 && (const_arg2 || arg0 != arg2)); > + > + if (use_xor) > + tcg_out_movi(s, TCG_TYPE_I32, arg0, 0); > + tcg_out_cond(s, cond, arg1, arg2, const_arg2, rexw); > + tcg_out_modrm(s, 0x90 | tcg_cond_to_jcc[cond] | P_EXT | P_REXB, 0, arg0); A comment saying this is a setcc would be nice. Also note that tcg_out_modrm will generate an unneeded prefix for some registers. cf. the patch I sent to the list months ago. > + if (!use_xor) > + tgen_arithi32(s, ARITH_AND, arg0, 0xff); Wouldn't movzbl be better? Regarding the xor optimization, I tested it on my i7 and it was (very) slightly slower running a 64-bit SPEC2k gcc. > +} > + > +static void tcg_out_movcond(TCGContext *s, int cond, TCGArg arg0, > + TCGArg arg1, TCGArg arg2, int const_arg2, > + TCGArg arg3, TCGArg arg4, int rexw) Perhaps renaming arg0 to dest would make things slightly more readable. You should also add a note stating that arg3 != arg4. > +{ > + if (arg0 == arg3) { > + cond = tcg_invert_cond(cond); > + arg3 = arg4; > + arg4 = arg0; > + } > + > + tcg_out_cond(s, cond, arg1, arg2, const_arg2, rexw); > + if (arg0 != arg4) > + tcg_out_mov(s, arg0, arg4); > + tcg_out_modrm(s, 0x40 | tcg_cond_to_jcc[cond] | P_EXT | rexw, arg0, > arg3); A comment saying this is cmovcc would be nice. Note, I didn't check the correctness, though it looks OK. I'll make real tests at the next iteration :-) > +} > + > #if defined(CONFIG_SOFTMMU) > > #include "../../softmmu_defs.h" > @@ -1197,6 +1232,24 @@ static inline void tcg_out_op(TCGContext *s, int opc, > const TCGArg *args, > tcg_out_modrm(s, 0x8b, args[0], args[1]); > break; > > + case INDEX_op_setcond_i32: > + tcg_out_setcond(s, args[3], args[0], args[1], args[2], > + const_args[2], 0); > + break; > + case INDEX_op_setcond_i64: > + tcg_out_setcond(s, args[3], args[0], args[1], args[2], > + const_args[2], P_REXW); > + break; > + > + case INDEX_op_movcond_i32: > + tcg_out_movcond(s, args[5], args[0], args[1], args[2], > + const_args[2], args[3], args[4], 0); > + break; > + case INDEX_op_movcond_i64: > + tcg_out_movcond(s, args[5], args[0], args[1], args[2], > + const_args[2], args[3], args[4], P_REXW); > + break; > + > case INDEX_op_qemu_ld8u: > tcg_out_qemu_ld(s, args, 0); > break; > @@ -1376,6 +1429,12 @@ static const TCGTargetOpDef x86_64_op_defs[] = { > { INDEX_op_ext16u_i64, { "r", "r"} }, > { INDEX_op_ext32u_i64, { "r", "r"} }, > > + { INDEX_op_setcond_i32, { "r", "r", "re" } }, > + { INDEX_op_setcond_i64, { "r", "r", "re" } }, > + > + { INDEX_op_movcond_i32, { "r", "r", "re", "r", "r" } }, > + { INDEX_op_movcond_i64, { "r", "r", "re", "r", "r" } }, For the i32 variants, "ri" instead of "re" is enough. Laurent > + > { INDEX_op_qemu_ld8u, { "r", "L" } }, > { INDEX_op_qemu_ld8s, { "r", "L" } }, > { INDEX_op_qemu_ld16u, { "r", "L" } }, > -- > 1.6.5.2 > >