From: Richard Henderson <r...@twiddle.net> We were loading 16 bytes for both single and double-precision scalar comparisons.
Reported-by: Alexander Bluhm <bl...@openbsd.org> Signed-off-by: Richard Henderson <r...@twiddle.net> (cherry picked from commit cb48da7f8140b5cbb648d990876720da9cd04d8f) Conflicts: target-i386/translate.c *removed dependency on 323d1876 Signed-off-by: Michael Roth <mdr...@linux.vnet.ibm.com> --- target-i386/translate.c | 44 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 35 insertions(+), 9 deletions(-) diff --git a/target-i386/translate.c b/target-i386/translate.c index b19ea14..4a25486 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -4565,21 +4565,47 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, if (is_xmm) { op1_offset = offsetof(CPUX86State,xmm_regs[reg]); if (mod != 3) { + int sz = 4; + gen_lea_modrm(env, s, modrm, ®_addr, &offset_addr); op2_offset = offsetof(CPUX86State,xmm_t0); - if (b1 >= 2 && ((b >= 0x50 && b <= 0x5f && b != 0x5b) || - b == 0xc2)) { - /* specific case for SSE single instructions */ + + switch (b) { + case 0x50 ... 0x5a: + case 0x5c ... 0x5f: + case 0xc2: + /* Most sse scalar operations. */ if (b1 == 2) { - /* 32 bit access */ - gen_op_ld_T0_A0(OT_LONG + s->mem_index); - tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_t0.XMM_L(0))); + sz = 2; + } else if (b1 == 3) { + sz = 3; + } + break; + + case 0x2e: /* ucomis[sd] */ + case 0x2f: /* comis[sd] */ + if (b1 == 0) { + sz = 2; } else { - /* 64 bit access */ - gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_t0.XMM_D(0))); + sz = 3; } - } else { + break; + } + + switch (sz) { + case 2: + /* 32 bit access */ + gen_op_ld_T0_A0(OT_LONG + s->mem_index); + tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_t0.XMM_L(0))); + break; + case 3: + /* 64 bit access */ + gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_t0.XMM_D(0))); + break; + default: + /* 128 bit access */ gen_ldo_env_A0(s->mem_index, op2_offset); + break; } } else { rm = (modrm & 7) | REX_B(s); -- 1.9.1