On 12/18/2009 03:37 AM, Laurent Desnogues wrote:
tcg: Generic support for conditional set and conditional move.
Needs cosmetics changes.
Fixed, attachment 1.
tcg-x86_64: Implement setcond and movcond.
Some cosmetics and comments, but overall good.
Fixed, attachment 2.
tcg-i386: Implement small forward branches.
I think this contains a bug.
Fixed, attachment 3. I've added an abort to patch_reloc to verify that
the relocation is in range. I've propagated the "small" flag to all of
the branch functions so that...
tcg-i386: Simplify brcond2.
I don't like the rewrite of brcond2.
... this patch is dropped.
tcg-i386: Implement setcond, movcond, setcond2.
Not yet reviewed.
Fixed, attachment 4. Similar changes to the amd64 patch.
r~
commit b89e4fc848abd936554ce48b7f9cf23d27ed9eb5
Author: Richard Henderson <r...@twiddle.net>
Date: Fri Dec 18 11:02:06 2009 -0800
tcg: Generic support for conditional set and conditional move.
Defines setcond and movcond for implementing conditional moves at
the tcg opcode level. 64-bit-on-32-bit is expanded via a setcond2
primitive plus other operations.
Signed-off-by: Richard Henderson <r...@twiddle.net>
diff --git a/tcg/README b/tcg/README
index e672258..6163abe 100644
--- a/tcg/README
+++ b/tcg/README
@@ -152,6 +152,11 @@ Conditional jump if t0 cond t1 is true. cond can be:
TCG_COND_LEU /* unsigned */
TCG_COND_GTU /* unsigned */
+* brcond2_i32 cond, t0_low, t0_high, t1_low, t1_high, label
+
+Similar to brcond, except that the 64-bit values T0 and T1
+are formed from two 32-bit arguments.
+
********* Arithmetic
* add_i32/i64 t0, t1, t2
@@ -282,6 +287,25 @@ order bytes must be set to zero.
Indicate that the value of t0 won't be used later. It is useful to
force dead code elimination.
+********* Conditional moves
+
+* setcond_i32/i64 cond, dest, t1, t2
+
+dest = (t1 cond t2)
+
+Set DEST to 1 if (T1 cond T2) is true, otherwise set to 0.
+
+* movcond_i32/i64 cond, dest, c1, c2, vtrue, vfalse
+
+dest = (c1 cond c2 ? vtrue : vfalse)
+
+Set DEST to VTRUE if (C1 cond C2) is true, otherwise set to VFALSE.
+
+* setcond2_i32 cond, dest, t1_low, t1_high, t2_low, t2_high
+
+Similar to setcond, except that the 64-bit values T1 and T2 are
+formed from two 32-bit arguments. The result is a 32-bit value.
+
********* Type conversions
* ext_i32_i64 t0, t1
@@ -375,7 +399,7 @@ The target word size (TCG_TARGET_REG_BITS) is expected to
be 32 bit or
On a 32 bit target, all 64 bit operations are converted to 32 bits. A
few specific operations must be implemented to allow it (see add2_i32,
-sub2_i32, brcond2_i32).
+sub2_i32, brcond2_i32, setcond2_i32).
Floating point operations are not supported in this version. A
previous incarnation of the code generator had full support of them,
diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
index faf2e8b..f43ed16 100644
--- a/tcg/tcg-op.h
+++ b/tcg/tcg-op.h
@@ -280,6 +280,32 @@ static inline void tcg_gen_op6_i64(int opc, TCGv_i64 arg1,
TCGv_i64 arg2,
*gen_opparam_ptr++ = GET_TCGV_I64(arg6);
}
+static inline void tcg_gen_op6i_i32(int opc, TCGv_i32 arg1, TCGv_i32 arg2,
+ TCGv_i32 arg3, TCGv_i32 arg4,
+ TCGv_i32 arg5, TCGArg arg6)
+{
+ *gen_opc_ptr++ = opc;
+ *gen_opparam_ptr++ = GET_TCGV_I32(arg1);
+ *gen_opparam_ptr++ = GET_TCGV_I32(arg2);
+ *gen_opparam_ptr++ = GET_TCGV_I32(arg3);
+ *gen_opparam_ptr++ = GET_TCGV_I32(arg4);
+ *gen_opparam_ptr++ = GET_TCGV_I32(arg5);
+ *gen_opparam_ptr++ = arg6;
+}
+
+static inline void tcg_gen_op6i_i64(int opc, TCGv_i64 arg1, TCGv_i64 arg2,
+ TCGv_i64 arg3, TCGv_i64 arg4,
+ TCGv_i64 arg5, TCGArg arg6)
+{
+ *gen_opc_ptr++ = opc;
+ *gen_opparam_ptr++ = GET_TCGV_I64(arg1);
+ *gen_opparam_ptr++ = GET_TCGV_I64(arg2);
+ *gen_opparam_ptr++ = GET_TCGV_I64(arg3);
+ *gen_opparam_ptr++ = GET_TCGV_I64(arg4);
+ *gen_opparam_ptr++ = GET_TCGV_I64(arg5);
+ *gen_opparam_ptr++ = arg6;
+}
+
static inline void tcg_gen_op6ii_i32(int opc, TCGv_i32 arg1, TCGv_i32 arg2,
TCGv_i32 arg3, TCGv_i32 arg4, TCGArg arg5,
TCGArg arg6)
@@ -1795,6 +1821,67 @@ static inline void tcg_gen_rotri_i64(TCGv_i64 ret,
TCGv_i64 arg1, int64_t arg2)
}
}
+static inline void tcg_gen_setcond_i32(int cond, TCGv_i32 ret,
+ TCGv_i32 arg1, TCGv_i32 arg2)
+{
+ tcg_gen_op4i_i32(INDEX_op_setcond_i32, ret, arg1, arg2, cond);
+}
+
+static inline void tcg_gen_setcond_i64(int cond, TCGv_i64 ret,
+ TCGv_i64 arg1, TCGv_i64 arg2)
+{
+#if TCG_TARGET_REG_BITS == 64
+ tcg_gen_op4i_i64(INDEX_op_setcond_i64, ret, arg1, arg2, cond);
+#else
+ tcg_gen_op6i_i32(INDEX_op_setcond2_i32, TCGV_LOW(ret),
+ TCGV_LOW(arg1), TCGV_HIGH(arg1),
+ TCGV_LOW(arg2), TCGV_HIGH(arg2), cond);
+ tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+#endif
+}
+
+static inline void tcg_gen_movcond_i32(int cond, TCGv_i32 ret,
+ TCGv_i32 cmp1, TCGv_i32 cmp2,
+ TCGv_i32 op_t, TCGv_i32 op_f)
+{
+ if (TCGV_EQUAL_I32(op_t, op_f)) {
+ tcg_gen_mov_i32(ret, op_t);
+ return;
+ }
+ tcg_gen_op6i_i32(INDEX_op_movcond_i32, ret, cmp1, cmp2, op_t, op_f, cond);
+}
+
+static inline void tcg_gen_movcond_i64(int cond, TCGv_i64 ret,
+ TCGv_i64 cmp1, TCGv_i64 cmp2,
+ TCGv_i64 op_t, TCGv_i64 op_f)
+{
+ if (TCGV_EQUAL_I64(op_t, op_f)) {
+ tcg_gen_mov_i64(ret, op_t);
+ return;
+ }
+#if TCG_TARGET_REG_BITS == 64
+ tcg_gen_op6i_i64(INDEX_op_movcond_i64, ret, cmp1, cmp2, op_t, op_f, cond);
+#else
+ {
+ TCGv_i32 t0 = tcg_temp_new_i32();
+ TCGv_i32 zero = tcg_const_i32(0);
+
+ tcg_gen_op6i_i32(INDEX_op_setcond2_i32, t0,
+ TCGV_LOW(cmp1), TCGV_HIGH(cmp1),
+ TCGV_LOW(cmp2), TCGV_HIGH(cmp2), cond);
+
+ /* ??? We could perhaps conditionally define a movcond2_i32. */
+ tcg_gen_movcond_i32(TCG_COND_NE, TCGV_LOW(ret), t0, zero,
+ TCGV_LOW(op_t), TCGV_LOW(op_f));
+ tcg_gen_movcond_i32(TCG_COND_NE, TCGV_HIGH(ret), t0, zero,
+ TCGV_HIGH(op_t), TCGV_HIGH(op_f));
+
+ tcg_temp_free_i32(t0);
+ tcg_temp_free_i32(zero);
+ }
+#endif
+}
+
/***************************************/
/* QEMU specific operations. Their type depend on the QEMU CPU
type. */
@@ -2067,6 +2154,8 @@ static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv
addr, int mem_index)
#define tcg_gen_sari_tl tcg_gen_sari_i64
#define tcg_gen_brcond_tl tcg_gen_brcond_i64
#define tcg_gen_brcondi_tl tcg_gen_brcondi_i64
+#define tcg_gen_setcond_tl tcg_gen_setcond_i64
+#define tcg_gen_movcond_tl tcg_gen_movcond_i64
#define tcg_gen_mul_tl tcg_gen_mul_i64
#define tcg_gen_muli_tl tcg_gen_muli_i64
#define tcg_gen_div_tl tcg_gen_div_i64
@@ -2137,6 +2226,8 @@ static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv
addr, int mem_index)
#define tcg_gen_sari_tl tcg_gen_sari_i32
#define tcg_gen_brcond_tl tcg_gen_brcond_i32
#define tcg_gen_brcondi_tl tcg_gen_brcondi_i32
+#define tcg_gen_setcond_tl tcg_gen_setcond_i32
+#define tcg_gen_movcond_tl tcg_gen_movcond_i32
#define tcg_gen_mul_tl tcg_gen_mul_i32
#define tcg_gen_muli_tl tcg_gen_muli_i32
#define tcg_gen_div_tl tcg_gen_div_i32
diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h
index b7f3fd7..086968c 100644
--- a/tcg/tcg-opc.h
+++ b/tcg/tcg-opc.h
@@ -42,6 +42,8 @@ DEF2(br, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
DEF2(mov_i32, 1, 1, 0, 0)
DEF2(movi_i32, 1, 0, 1, 0)
+DEF2(setcond_i32, 1, 2, 1, 0)
+DEF2(movcond_i32, 1, 4, 1, 0)
/* load/store */
DEF2(ld8u_i32, 1, 1, 1, 0)
DEF2(ld8s_i32, 1, 1, 1, 0)
@@ -82,6 +84,7 @@ DEF2(add2_i32, 2, 4, 0, 0)
DEF2(sub2_i32, 2, 4, 0, 0)
DEF2(brcond2_i32, 0, 4, 2, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
DEF2(mulu2_i32, 2, 2, 0, 0)
+DEF2(setcond2_i32, 1, 4, 1, 0)
#endif
#ifdef TCG_TARGET_HAS_ext8s_i32
DEF2(ext8s_i32, 1, 1, 0, 0)
@@ -111,6 +114,8 @@ DEF2(neg_i32, 1, 1, 0, 0)
#if TCG_TARGET_REG_BITS == 64
DEF2(mov_i64, 1, 1, 0, 0)
DEF2(movi_i64, 1, 0, 1, 0)
+DEF2(setcond_i64, 1, 2, 1, 0)
+DEF2(movcond_i64, 1, 4, 1, 0)
/* load/store */
DEF2(ld8u_i64, 1, 1, 1, 0)
DEF2(ld8s_i64, 1, 1, 1, 0)
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 3c0e296..f7ea727 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -670,6 +670,7 @@ void tcg_gen_shifti_i64(TCGv_i64 ret, TCGv_i64 arg1,
}
#endif
+
static void tcg_reg_alloc_start(TCGContext *s)
{
int i;
@@ -888,21 +889,31 @@ void tcg_dump_ops(TCGContext *s, FILE *outfile)
fprintf(outfile, "%s",
tcg_get_arg_str_idx(s, buf, sizeof(buf), args[k++]));
}
- if (c == INDEX_op_brcond_i32
+ switch (c) {
+ case INDEX_op_brcond_i32:
+#if TCG_TARGET_REG_BITS == 32
+ case INDEX_op_brcond2_i32:
+#elif TCG_TARGET_REG_BITS == 64
+ case INDEX_op_brcond_i64:
+#endif
+ case INDEX_op_setcond_i32:
+ case INDEX_op_movcond_i32:
#if TCG_TARGET_REG_BITS == 32
- || c == INDEX_op_brcond2_i32
+ case INDEX_op_setcond2_i32:
#elif TCG_TARGET_REG_BITS == 64
- || c == INDEX_op_brcond_i64
+ case INDEX_op_setcond_i64:
+ case INDEX_op_movcond_i64:
#endif
- ) {
if (args[k] < ARRAY_SIZE(cond_name) && cond_name[args[k]])
fprintf(outfile, ",%s", cond_name[args[k++]]);
else
fprintf(outfile, ",$0x%" TCG_PRIlx, args[k++]);
i = 1;
- }
- else
+ break;
+ default:
i = 0;
+ break;
+ }
for(; i < nb_cargs; i++) {
if (k != 0)
fprintf(outfile, ",");
commit eb326c39503ff265dc83e8792e87e6308d9dfd71
Author: Richard Henderson <r...@twiddle.net>
Date: Fri Dec 18 11:51:35 2009 -0800
tcg-x86_64: Implement setcond and movcond.
Implement conditional moves in the x86_64 backend.
Signed-off-by: Richard Henderson <r...@twiddle.net>
diff --git a/tcg/x86_64/tcg-target.c b/tcg/x86_64/tcg-target.c
index 2339091..1f86946 100644
--- a/tcg/x86_64/tcg-target.c
+++ b/tcg/x86_64/tcg-target.c
@@ -491,9 +491,8 @@ static void tcg_out_jxx(TCGContext *s, int opc, int
label_index)
}
}
-static void tcg_out_brcond(TCGContext *s, int cond,
- TCGArg arg1, TCGArg arg2, int const_arg2,
- int label_index, int rexw)
+static void tcg_out_cond(TCGContext *s, int cond, TCGArg arg1,
+ TCGArg arg2, int const_arg2, int rexw)
{
if (const_arg2) {
if (arg2 == 0) {
@@ -508,9 +507,51 @@ static void tcg_out_brcond(TCGContext *s, int cond,
} else {
tcg_out_modrm(s, 0x01 | (ARITH_CMP << 3) | rexw, arg2, arg1);
}
+}
+
+static void tcg_out_brcond(TCGContext *s, int cond,
+ TCGArg arg1, TCGArg arg2, int const_arg2,
+ int label_index, int rexw)
+{
+ tcg_out_cond(s, cond, arg1, arg2, const_arg2, rexw);
tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index);
}
+static void tcg_out_setcond(TCGContext *s, int cond, TCGArg dest,
+ TCGArg arg1, TCGArg arg2, int const_arg2, int rexw)
+{
+ int use_xor = (dest != arg1 && (const_arg2 || dest != arg2));
+
+ if (use_xor)
+ tcg_out_movi(s, TCG_TYPE_I32, dest, 0);
+ tcg_out_cond(s, cond, arg1, arg2, const_arg2, rexw);
+ /* setcc */
+ tcg_out_modrm(s, 0x90 | tcg_cond_to_jcc[cond] | P_EXT | P_REXB, 0, dest);
+ if (!use_xor)
+ tgen_arithi32(s, ARITH_AND, dest, 0xff);
+}
+
+static void tcg_out_movcond(TCGContext *s, int cond, TCGArg dest,
+ TCGArg cmp1, TCGArg cmp2, int const_cmp2,
+ TCGArg vtrue, TCGArg vfalse, int rexw)
+{
+ if (vtrue == vfalse) {
+ tcg_out_mov(s, dest, vtrue);
+ return;
+ }
+ if (dest == vtrue) {
+ cond = tcg_invert_cond(cond);
+ vtrue = vfalse;
+ vfalse = dest;
+ }
+
+ tcg_out_cond(s, cond, cmp1, cmp2, const_cmp2, rexw);
+ if (dest != vfalse)
+ tcg_out_mov(s, dest, vfalse);
+ /* cmovcc */
+ tcg_out_modrm(s, 0x40 | tcg_cond_to_jcc[cond] | P_EXT | rexw, dest, vtrue);
+}
+
#if defined(CONFIG_SOFTMMU)
#include "../../softmmu_defs.h"
@@ -1197,6 +1238,24 @@ static inline void tcg_out_op(TCGContext *s, int opc,
const TCGArg *args,
tcg_out_modrm(s, 0x8b, args[0], args[1]);
break;
+ case INDEX_op_setcond_i32:
+ tcg_out_setcond(s, args[3], args[0], args[1], args[2],
+ const_args[2], 0);
+ break;
+ case INDEX_op_setcond_i64:
+ tcg_out_setcond(s, args[3], args[0], args[1], args[2],
+ const_args[2], P_REXW);
+ break;
+
+ case INDEX_op_movcond_i32:
+ tcg_out_movcond(s, args[5], args[0], args[1], args[2],
+ const_args[2], args[3], args[4], 0);
+ break;
+ case INDEX_op_movcond_i64:
+ tcg_out_movcond(s, args[5], args[0], args[1], args[2],
+ const_args[2], args[3], args[4], P_REXW);
+ break;
+
case INDEX_op_qemu_ld8u:
tcg_out_qemu_ld(s, args, 0);
break;
@@ -1376,6 +1435,12 @@ static const TCGTargetOpDef x86_64_op_defs[] = {
{ INDEX_op_ext16u_i64, { "r", "r"} },
{ INDEX_op_ext32u_i64, { "r", "r"} },
+ { INDEX_op_setcond_i32, { "r", "r", "ri" } },
+ { INDEX_op_setcond_i64, { "r", "r", "re" } },
+
+ { INDEX_op_movcond_i32, { "r", "r", "ri", "r", "r" } },
+ { INDEX_op_movcond_i64, { "r", "r", "re", "r", "r" } },
+
{ INDEX_op_qemu_ld8u, { "r", "L" } },
{ INDEX_op_qemu_ld8s, { "r", "L" } },
{ INDEX_op_qemu_ld16u, { "r", "L" } },
commit 798a4a5bfc491fef81cf968cd0039ad60f34366a
Author: Richard Henderson <r...@twiddle.net>
Date: Fri Dec 18 13:01:30 2009 -0800
tcg-i386: Implement small forward branches.
There are places, like brcond2, where we know that the destination
of a forward branch will be within 127 bytes.
Add the R_386_PC8 relocation type to support this. Add a flag to
tcg_out_jxx and tcg_out_brcond* to enable it. Set the flag in the
brcond2 label_next branches; pass along the input flag otherwise.
Signed-off-by: Richard Henderson <r...@twiddle.net>
diff --git a/elf.h b/elf.h
index 11674d7..c84c8ab 100644
--- a/elf.h
+++ b/elf.h
@@ -243,6 +243,8 @@ typedef struct {
#define R_386_GOTOFF 9
#define R_386_GOTPC 10
#define R_386_NUM 11
+/* Not a dynamic reloc, so not included in R_386_NUM. Used in TCG. */
+#define R_386_PC8 23
#define R_MIPS_NONE 0
#define R_MIPS_16 1
diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index 972b102..48c9bc8 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -61,6 +61,12 @@ static void patch_reloc(uint8_t *code_ptr, int type,
case R_386_PC32:
*(uint32_t *)code_ptr = value - (long)code_ptr;
break;
+ case R_386_PC8:
+ value -= (long)code_ptr;
+ if (value != (int8_t)value)
+ tcg_abort();
+ *(uint8_t *)code_ptr = value;
+ break;
default:
tcg_abort();
}
@@ -305,7 +311,8 @@ static void tcg_out_addi(TCGContext *s, int reg,
tcg_target_long val)
tgen_arithi(s, ARITH_ADD, reg, val, 0);
}
-static void tcg_out_jxx(TCGContext *s, int opc, int label_index)
+/* Use SMALL != 0 to force a short forward branch. */
+static void tcg_out_jxx(TCGContext *s, int opc, int label_index, int small)
{
int32_t val, val1;
TCGLabel *l = &s->labels[label_index];
@@ -320,6 +327,8 @@ static void tcg_out_jxx(TCGContext *s, int opc, int
label_index)
tcg_out8(s, 0x70 + opc);
tcg_out8(s, val1);
} else {
+ if (small)
+ tcg_abort();
if (opc == -1) {
tcg_out8(s, 0xe9);
tcg_out32(s, val - 5);
@@ -329,6 +338,14 @@ static void tcg_out_jxx(TCGContext *s, int opc, int
label_index)
tcg_out32(s, val - 6);
}
}
+ } else if (small) {
+ if (opc == -1) {
+ tcg_out8(s, 0xeb);
+ } else {
+ tcg_out8(s, 0x70 + opc);
+ }
+ tcg_out_reloc(s, s->code_ptr, R_386_PC8, label_index, -1);
+ s->code_ptr += 1;
} else {
if (opc == -1) {
tcg_out8(s, 0xe9);
@@ -343,7 +360,7 @@ static void tcg_out_jxx(TCGContext *s, int opc, int
label_index)
static void tcg_out_brcond(TCGContext *s, int cond,
TCGArg arg1, TCGArg arg2, int const_arg2,
- int label_index)
+ int label_index, int small)
{
if (const_arg2) {
if (arg2 == 0) {
@@ -355,64 +372,84 @@ static void tcg_out_brcond(TCGContext *s, int cond,
} else {
tcg_out_modrm(s, 0x01 | (ARITH_CMP << 3), arg2, arg1);
}
- tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index);
+ tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small);
}
/* XXX: we implement it at the target level to avoid having to
handle cross basic blocks temporaries */
-static void tcg_out_brcond2(TCGContext *s,
- const TCGArg *args, const int *const_args)
+static void tcg_out_brcond2(TCGContext *s, const TCGArg *args,
+ const int *const_args, int small)
{
int label_next;
label_next = gen_new_label();
switch(args[4]) {
case TCG_COND_EQ:
- tcg_out_brcond(s, TCG_COND_NE, args[0], args[2], const_args[2],
label_next);
- tcg_out_brcond(s, TCG_COND_EQ, args[1], args[3], const_args[3],
args[5]);
+ tcg_out_brcond(s, TCG_COND_NE, args[0], args[2], const_args[2],
+ label_next, small);
+ tcg_out_brcond(s, TCG_COND_EQ, args[1], args[3], const_args[3],
+ args[5], small);
break;
case TCG_COND_NE:
- tcg_out_brcond(s, TCG_COND_NE, args[0], args[2], const_args[2],
args[5]);
- tcg_out_brcond(s, TCG_COND_NE, args[1], args[3], const_args[3],
args[5]);
+ tcg_out_brcond(s, TCG_COND_NE, args[0], args[2], const_args[2],
+ args[5], small);
+ tcg_out_brcond(s, TCG_COND_NE, args[1], args[3], const_args[3],
+ args[5], small);
break;
case TCG_COND_LT:
- tcg_out_brcond(s, TCG_COND_LT, args[1], args[3], const_args[3],
args[5]);
- tcg_out_jxx(s, JCC_JNE, label_next);
- tcg_out_brcond(s, TCG_COND_LTU, args[0], args[2], const_args[2],
args[5]);
+ tcg_out_brcond(s, TCG_COND_LT, args[1], args[3], const_args[3],
+ args[5], small);
+ tcg_out_jxx(s, JCC_JNE, label_next, 1);
+ tcg_out_brcond(s, TCG_COND_LTU, args[0], args[2], const_args[2],
+ args[5], small);
break;
case TCG_COND_LE:
- tcg_out_brcond(s, TCG_COND_LT, args[1], args[3], const_args[3],
args[5]);
- tcg_out_jxx(s, JCC_JNE, label_next);
- tcg_out_brcond(s, TCG_COND_LEU, args[0], args[2], const_args[2],
args[5]);
+ tcg_out_brcond(s, TCG_COND_LT, args[1], args[3], const_args[3],
+ args[5], small);
+ tcg_out_jxx(s, JCC_JNE, label_next, 1);
+ tcg_out_brcond(s, TCG_COND_LEU, args[0], args[2], const_args[2],
+ args[5], small);
break;
case TCG_COND_GT:
- tcg_out_brcond(s, TCG_COND_GT, args[1], args[3], const_args[3],
args[5]);
- tcg_out_jxx(s, JCC_JNE, label_next);
- tcg_out_brcond(s, TCG_COND_GTU, args[0], args[2], const_args[2],
args[5]);
+ tcg_out_brcond(s, TCG_COND_GT, args[1], args[3], const_args[3],
+ args[5], small);
+ tcg_out_jxx(s, JCC_JNE, label_next, 1);
+ tcg_out_brcond(s, TCG_COND_GTU, args[0], args[2], const_args[2],
+ args[5], small);
break;
case TCG_COND_GE:
- tcg_out_brcond(s, TCG_COND_GT, args[1], args[3], const_args[3],
args[5]);
- tcg_out_jxx(s, JCC_JNE, label_next);
- tcg_out_brcond(s, TCG_COND_GEU, args[0], args[2], const_args[2],
args[5]);
+ tcg_out_brcond(s, TCG_COND_GT, args[1], args[3], const_args[3],
+ args[5], small);
+ tcg_out_jxx(s, JCC_JNE, label_next, 1);
+ tcg_out_brcond(s, TCG_COND_GEU, args[0], args[2], const_args[2],
+ args[5], small);
break;
case TCG_COND_LTU:
- tcg_out_brcond(s, TCG_COND_LTU, args[1], args[3], const_args[3],
args[5]);
- tcg_out_jxx(s, JCC_JNE, label_next);
- tcg_out_brcond(s, TCG_COND_LTU, args[0], args[2], const_args[2],
args[5]);
+ tcg_out_brcond(s, TCG_COND_LTU, args[1], args[3], const_args[3],
+ args[5], small);
+ tcg_out_jxx(s, JCC_JNE, label_next, 1);
+ tcg_out_brcond(s, TCG_COND_LTU, args[0], args[2], const_args[2],
+ args[5], small);
break;
case TCG_COND_LEU:
- tcg_out_brcond(s, TCG_COND_LTU, args[1], args[3], const_args[3],
args[5]);
- tcg_out_jxx(s, JCC_JNE, label_next);
- tcg_out_brcond(s, TCG_COND_LEU, args[0], args[2], const_args[2],
args[5]);
+ tcg_out_brcond(s, TCG_COND_LTU, args[1], args[3], const_args[3],
+ args[5], small);
+ tcg_out_jxx(s, JCC_JNE, label_next, 1);
+ tcg_out_brcond(s, TCG_COND_LEU, args[0], args[2], const_args[2],
+ args[5], small);
break;
case TCG_COND_GTU:
- tcg_out_brcond(s, TCG_COND_GTU, args[1], args[3], const_args[3],
args[5]);
- tcg_out_jxx(s, JCC_JNE, label_next);
- tcg_out_brcond(s, TCG_COND_GTU, args[0], args[2], const_args[2],
args[5]);
+ tcg_out_brcond(s, TCG_COND_GTU, args[1], args[3], const_args[3],
+ args[5], small);
+ tcg_out_jxx(s, JCC_JNE, label_next, 1);
+ tcg_out_brcond(s, TCG_COND_GTU, args[0], args[2], const_args[2],
+ args[5], small);
break;
case TCG_COND_GEU:
- tcg_out_brcond(s, TCG_COND_GTU, args[1], args[3], const_args[3],
args[5]);
- tcg_out_jxx(s, JCC_JNE, label_next);
- tcg_out_brcond(s, TCG_COND_GEU, args[0], args[2], const_args[2],
args[5]);
+ tcg_out_brcond(s, TCG_COND_GTU, args[1], args[3], const_args[3],
+ args[5], small);
+ tcg_out_jxx(s, JCC_JNE, label_next, 1);
+ tcg_out_brcond(s, TCG_COND_GEU, args[0], args[2], const_args[2],
+ args[5], small);
break;
default:
tcg_abort();
@@ -913,7 +950,7 @@ static inline void tcg_out_op(TCGContext *s, int opc,
}
break;
case INDEX_op_br:
- tcg_out_jxx(s, JCC_JMP, args[0]);
+ tcg_out_jxx(s, JCC_JMP, args[0], 0);
break;
case INDEX_op_movi_i32:
tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1]);
@@ -1044,10 +1081,11 @@ static inline void tcg_out_op(TCGContext *s, int opc,
tcg_out_modrm(s, 0x01 | (ARITH_SBB << 3), args[5], args[1]);
break;
case INDEX_op_brcond_i32:
- tcg_out_brcond(s, args[2], args[0], args[1], const_args[1], args[3]);
+ tcg_out_brcond(s, args[2], args[0], args[1], const_args[1],
+ args[3], 0);
break;
case INDEX_op_brcond2_i32:
- tcg_out_brcond2(s, args, const_args);
+ tcg_out_brcond2(s, args, const_args, 0);
break;
case INDEX_op_bswap16_i32:
commit b2c6cc4c5efa898b53653a35dd957dc653513a29
Author: Richard Henderson <r...@twiddle.net>
Date: Fri Dec 18 13:29:53 2009 -0800
tcg-i386: Implement setcond, movcond, setcond2.
Implement conditional moves in the i386 backend.
Signed-off-by: Richard Henderson <r...@twiddle.net>
diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index 48c9bc8..cec0663 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -358,9 +358,8 @@ static void tcg_out_jxx(TCGContext *s, int opc, int
label_index, int small)
}
}
-static void tcg_out_brcond(TCGContext *s, int cond,
- TCGArg arg1, TCGArg arg2, int const_arg2,
- int label_index, int small)
+static void tcg_out_cond(TCGContext *s, int cond, TCGArg arg1,
+ TCGArg arg2, int const_arg2)
{
if (const_arg2) {
if (arg2 == 0) {
@@ -372,6 +371,13 @@ static void tcg_out_brcond(TCGContext *s, int cond,
} else {
tcg_out_modrm(s, 0x01 | (ARITH_CMP << 3), arg2, arg1);
}
+}
+
+static void tcg_out_brcond(TCGContext *s, int cond,
+ TCGArg arg1, TCGArg arg2, int const_arg2,
+ int label_index, int small)
+{
+ tcg_out_cond(s, cond, arg1, arg2, const_arg2);
tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small);
}
@@ -457,6 +463,168 @@ static void tcg_out_brcond2(TCGContext *s, const TCGArg
*args,
tcg_out_label(s, label_next, (tcg_target_long)s->code_ptr);
}
+static void tcg_out_setcond(TCGContext *s, int cond, TCGArg arg0,
+ TCGArg arg1, TCGArg arg2, int const_arg2)
+{
+ int use_xor = (arg0 != arg1 && (const_arg2 || arg0 != arg2));
+
+ if (use_xor)
+ tcg_out_movi(s, TCG_TYPE_I32, arg0, 0);
+ tcg_out_cond(s, cond, arg1, arg2, const_arg2);
+ /* setcc */
+ tcg_out_modrm(s, 0x90 | tcg_cond_to_jcc[cond] | P_EXT, 0, arg0);
+ if (!use_xor)
+ tgen_arithi(s, ARITH_AND, arg0, 0xff, 0);
+}
+
+static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
+ const int *const_args)
+{
+ TCGArg new_args[6];
+ int label_true, label_over;
+
+ memcpy(new_args, args+1, 5*sizeof(TCGArg));
+
+ if (args[0] == args[1] || args[0] == args[2]
+ || (!const_args[3] && args[0] == args[3])
+ || (!const_args[4] && args[0] == args[4])) {
+ /* When the destination overlaps with one of the argument
+ registers, don't do anything tricky. */
+ label_true = gen_new_label();
+ label_over = gen_new_label();
+
+ new_args[5] = label_true;
+ tcg_out_brcond2(s, new_args, const_args+1, 1);
+
+ tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
+ tcg_out_jxx(s, JCC_JMP, label_over, 1);
+ tcg_out_label(s, label_true, (tcg_target_long)s->code_ptr);
+
+ tcg_out_movi(s, TCG_TYPE_I32, args[0], 1);
+ tcg_out_label(s, label_over, (tcg_target_long)s->code_ptr);
+ } else {
+ /* When the destination does not overlap one of the arguments,
+ clear the destination first, jump if cond false, and emit an
+ increment in the true case. This results in smaller code. */
+
+ tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
+
+ label_over = gen_new_label();
+ new_args[4] = tcg_invert_cond(new_args[4]);
+ new_args[5] = label_over;
+ tcg_out_brcond2(s, new_args, const_args+1, 1);
+
+ tgen_arithi(s, ARITH_ADD, args[0], 1, 0);
+ tcg_out_label(s, label_over, (tcg_target_long)s->code_ptr);
+ }
+}
+
+static inline int have_cmov(void)
+{
+#ifdef __i686__
+ /* Compiler options say that cmov is available. */
+ return 1;
+#else
+ /* ??? Use cpuid or something and figure out what's running. */
+ return 0;
+#endif
+}
+
+static void tcg_out_movcond(TCGContext *s, const TCGArg *args,
+ const int *const_args)
+{
+ int vtc, vfc, cond, use_cmov = 0, do_swap = 0;
+ TCGArg d, vt, vf;
+
+ d = args[0];
+ vt = args[3];
+ vf = args[4];
+ vtc = const_args[3];
+ vfc = const_args[4];
+
+ /* ??? The jcc code path below assumes that one mov insn must be skipped.
+ Rather than complicate the code below, make sure to simplify the
+ conditional move here. */
+ if (vtc == vfc && vt == vf) {
+ if (vtc)
+ tcg_out_movi(s, TCG_TYPE_I32, d, vt);
+ else
+ tcg_out_mov(s, d, vt);
+ return;
+ }
+
+ cond = args[5];
+
+ /* If both arguments are constants, we *could* do all the funny bits that
+ gcc does with sbc, masks, etc. There's likely no point. Just use the
+ jcc version in this case. We also have to be careful about clobbering
+ inputs when trying to move constants into position. */
+
+ if (have_cmov()) {
+ use_cmov = 1;
+ if (vtc) {
+ if (vfc || d == vf)
+ use_cmov = 0;
+ else
+ do_swap = 1;
+ } else if (d == vt) {
+ if (vfc)
+ use_cmov = 0;
+ else
+ do_swap = 1;
+ }
+ }
+
+ if (!use_cmov) {
+ /* We're going to follow the lead of cmov and set D=VF first,
+ which means inverting the condition upon which we jump. */
+ cond = tcg_invert_cond(cond);
+
+ /* Don't allow the move we jump over to be a nop. */
+ do_swap = (!vtc && d == vt);
+ }
+
+ if (do_swap) {
+ TCGArg t;
+ cond = tcg_invert_cond(cond);
+ t = vf, vf = vt, vt = t;
+ t = vfc, vfc = vtc, vtc = t;
+ }
+
+ /* If possible, set D=0 before the compare, so that we can use XOR. */
+ if (vfc && vf == 0 && d != args[1] && (const_args[2] || d != args[2])) {
+ tcg_out_movi(s, TCG_TYPE_I32, d, 0);
+ vf = d, vfc = 0;
+ }
+
+ tcg_out_cond(s, cond, args[1], args[2], const_args[2]);
+
+ if (vfc) {
+ /* Force the use of "mov $0, d" to avoid clobbering flags. */
+ tcg_out8(s, 0xb8 + d);
+ tcg_out32(s, vf);
+ } else {
+ tcg_out_mov(s, d, vf);
+ }
+
+ if (use_cmov) {
+ if (vtc)
+ tcg_abort();
+ /* cmovcc */
+ tcg_out_modrm(s, 0x40 | tcg_cond_to_jcc[cond] | P_EXT, d, vt);
+ } else {
+ int label_next = gen_new_label();
+
+ tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_next, 1);
+ if (vtc)
+ tcg_out_movi(s, TCG_TYPE_I32, d, vt);
+ else
+ tcg_out_mov(s, d, vt);
+
+ tcg_out_label(s, label_next, (tcg_target_long)s->code_ptr);
+ }
+}
+
#if defined(CONFIG_SOFTMMU)
#include "../../softmmu_defs.h"
@@ -1118,6 +1286,16 @@ static inline void tcg_out_op(TCGContext *s, int opc,
tcg_out_modrm(s, 0xb7 | P_EXT, args[0], args[1]);
break;
+ case INDEX_op_setcond_i32:
+ tcg_out_setcond(s, args[3], args[0], args[1], args[2], const_args[2]);
+ break;
+ case INDEX_op_movcond_i32:
+ tcg_out_movcond(s, args, const_args);
+ break;
+ case INDEX_op_setcond2_i32:
+ tcg_out_setcond2(s, args, const_args);
+ break;
+
case INDEX_op_qemu_ld8u:
tcg_out_qemu_ld(s, args, 0);
break;
@@ -1206,6 +1384,10 @@ static const TCGTargetOpDef x86_op_defs[] = {
{ INDEX_op_ext8u_i32, { "r", "q"} },
{ INDEX_op_ext16u_i32, { "r", "r"} },
+ { INDEX_op_setcond_i32, { "q", "r", "ri" } },
+ { INDEX_op_movcond_i32, { "r", "r", "ri", "ri", "ri" } },
+ { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } },
+
#if TARGET_LONG_BITS == 32
{ INDEX_op_qemu_ld8u, { "r", "L" } },
{ INDEX_op_qemu_ld8s, { "r", "L" } },