In addition to better compile-time detection, perform runtime detection. Signed-off-by: Richard Henderson <r...@twiddle.net> --- tcg/i386/tcg-target.c | 34 +++++++++++++++++++++++++++++++++- tcg/i386/tcg-target.h | 5 ----- 2 files changed, 33 insertions(+), 6 deletions(-)
Yall are right that there's no particularly good method with which to detect i686 *or later*, and thus cmov support, in gcc. If one uses -march=native with any processor made in the last 5 years, one will have at least SSE1 support. So we can reasonably use that as a clue. To fill in the holes, we can do the check at runtime. That does involve a tiny amount of runtime overhead, testing a global variable. I suspect that this is overhead is unmeasurable. r~ diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c index 6f3ad3c..b333b46 100644 --- a/tcg/i386/tcg-target.c +++ b/tcg/i386/tcg-target.c @@ -97,6 +97,20 @@ static const int tcg_target_call_oarg_regs[] = { # define TCG_REG_L1 TCG_REG_EDX #endif +/* Attempt to determine at compile-time whether the compiler assumes that + cmov is available. We get 64-bit for free. P6 (i686) and later include + support for cmov, but there is no one preprocessor define that determines + this. Assume that all processors that include sse also support cmov, so + that we sorta future-proof this test against new preprocessor defines. */ +#include <cpuid.h> +#if (TCG_TARGET_REG_BITS == 64 \ + || defined(__i686__) || defined(__pentium4__) \ + || defined(__athlon__) || defined(__SSE__)) +# define have_cmov 1 +#else +static bool have_cmov; +#endif + static uint8_t *tb_ret_addr; static void patch_reloc(uint8_t *code_ptr, int type, @@ -943,7 +957,14 @@ static void tcg_out_movcond32(TCGContext *s, TCGCond cond, TCGArg dest, TCGArg v1) { tcg_out_cmp(s, c1, c2, const_c2, 0); - tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond], dest, v1); + if (have_cmov) { + tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond], dest, v1); + } else { + int over = gen_new_label(); + tcg_out_jxx(s, tcg_cond_to_jcc[tcg_invert_cond(cond)], over, 1); + tcg_out_mov(s, TCG_TYPE_I32, dest, v1); + tcg_out_label(s, over, s->code_ptr); + } } #if TCG_TARGET_REG_BITS == 64 @@ -2243,6 +2264,17 @@ static void tcg_target_qemu_prologue(TCGContext *s) static void tcg_target_init(TCGContext *s) { + /* If we could not determine cmov availablity at compile time, perform + the check at runtime. 99% certainty that we're running on hardware + that supports cmov, but we still need to check. In case cmov is not + available, we'll use a small forward branch. */ +#ifndef have_cmov + { + unsigned a, b, c, d; + have_cmov = (__get_cpuid(1, &a, &b, &c, &d) && (d & bit_CMOV)); + } +#endif + #if !defined(CONFIG_USER_ONLY) /* fail safe */ if ((1 << CPU_TLB_ENTRY_BITS) != sizeof(CPUTLBEntry)) diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h index dbc6756..450078b 100644 --- a/tcg/i386/tcg-target.h +++ b/tcg/i386/tcg-target.h @@ -90,12 +90,7 @@ typedef enum { #define TCG_TARGET_HAS_nand_i32 0 #define TCG_TARGET_HAS_nor_i32 0 #define TCG_TARGET_HAS_deposit_i32 1 -#if defined(__x86_64__) || defined(__i686__) -/* Use cmov only if the compiler is already doing so. */ #define TCG_TARGET_HAS_movcond_i32 1 -#else -#define TCG_TARGET_HAS_movcond_i32 0 -#endif #if TCG_TARGET_REG_BITS == 64 #define TCG_TARGET_HAS_div2_i64 1 -- 1.7.11.7