On 2015-07-16 22:25, Richard Henderson wrote: > Removing the ??? comment explaining why it (mostly) worked. > > Signed-off-by: Richard Henderson <r...@twiddle.net> > --- > tcg/i386/tcg-target.c | 105 > +++++++++++++++++++++++++++++++------------------- > 1 file changed, 65 insertions(+), 40 deletions(-) > > diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c > index ff4d9cf..bbe2963 100644 > --- a/tcg/i386/tcg-target.c > +++ b/tcg/i386/tcg-target.c > @@ -1434,8 +1434,8 @@ static inline void setup_guest_base_seg(void) { } > #endif /* SOFTMMU */ > > static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg > datahi, > - TCGReg base, intptr_t ofs, int seg, > - TCGMemOp memop) > + TCGReg base, int index, intptr_t ofs, > + int seg, TCGMemOp memop) > { > const TCGMemOp real_bswap = memop & MO_BSWAP; > TCGMemOp bswap = real_bswap; > @@ -1448,13 +1448,16 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, > TCGReg datalo, TCGReg datahi, > > switch (memop & MO_SSIZE) { > case MO_UB: > - tcg_out_modrm_offset(s, OPC_MOVZBL + seg, datalo, base, ofs); > + tcg_out_modrm_sib_offset(s, OPC_MOVZBL + seg, datalo, > + base, index, 0, ofs); > break; > case MO_SB: > - tcg_out_modrm_offset(s, OPC_MOVSBL + P_REXW + seg, datalo, base, > ofs); > + tcg_out_modrm_sib_offset(s, OPC_MOVSBL + P_REXW + seg, datalo, > + base, index, 0, ofs); > break; > case MO_UW: > - tcg_out_modrm_offset(s, OPC_MOVZWL + seg, datalo, base, ofs); > + tcg_out_modrm_sib_offset(s, OPC_MOVZWL + seg, datalo, > + base, index, 0, ofs); > if (real_bswap) { > tcg_out_rolw_8(s, datalo); > } > @@ -1462,20 +1465,21 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, > TCGReg datalo, TCGReg datahi, > case MO_SW: > if (real_bswap) { > if (have_movbe) { > - tcg_out_modrm_offset(s, OPC_MOVBE_GyMy + P_DATA16 + seg, > - datalo, base, ofs); > + tcg_out_modrm_sib_offset(s, OPC_MOVBE_GyMy + P_DATA16 + seg, > + datalo, base, index, 0, ofs); > } else { > - tcg_out_modrm_offset(s, OPC_MOVZWL + seg, datalo, base, ofs); > + tcg_out_modrm_sib_offset(s, OPC_MOVZWL + seg, datalo, > + base, index, 0, ofs); > tcg_out_rolw_8(s, datalo); > } > tcg_out_modrm(s, OPC_MOVSWL + P_REXW, datalo, datalo); > } else { > - tcg_out_modrm_offset(s, OPC_MOVSWL + P_REXW + seg, > - datalo, base, ofs); > + tcg_out_modrm_sib_offset(s, OPC_MOVSWL + P_REXW + seg, > + datalo, base, index, 0, ofs); > } > break; > case MO_UL: > - tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs); > + tcg_out_modrm_sib_offset(s, movop + seg, datalo, base, index, 0, > ofs); > if (bswap) { > tcg_out_bswap32(s, datalo); > } > @@ -1483,19 +1487,22 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, > TCGReg datalo, TCGReg datahi, > #if TCG_TARGET_REG_BITS == 64 > case MO_SL: > if (real_bswap) { > - tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs); > + tcg_out_modrm_sib_offset(s, movop + seg, datalo, > + base, index, 0, ofs); > if (bswap) { > tcg_out_bswap32(s, datalo); > } > tcg_out_ext32s(s, datalo, datalo); > } else { > - tcg_out_modrm_offset(s, OPC_MOVSLQ + seg, datalo, base, ofs); > + tcg_out_modrm_sib_offset(s, OPC_MOVSLQ + seg, datalo, > + base, index, 0, ofs); > } > break; > #endif > case MO_Q: > if (TCG_TARGET_REG_BITS == 64) { > - tcg_out_modrm_offset(s, movop + P_REXW + seg, datalo, base, ofs); > + tcg_out_modrm_sib_offset(s, movop + P_REXW + seg, datalo, > + base, index, 0, ofs); > if (bswap) { > tcg_out_bswap64(s, datalo); > } > @@ -1506,11 +1513,15 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, > TCGReg datalo, TCGReg datahi, > datahi = t; > } > if (base != datalo) { > - tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs); > - tcg_out_modrm_offset(s, movop + seg, datahi, base, ofs + 4); > + tcg_out_modrm_sib_offset(s, movop + seg, datalo, > + base, index, 0, ofs); > + tcg_out_modrm_sib_offset(s, movop + seg, datahi, > + base, index, 0, ofs + 4); > } else { > - tcg_out_modrm_offset(s, movop + seg, datahi, base, ofs + 4); > - tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs); > + tcg_out_modrm_sib_offset(s, movop + seg, datahi, > + base, index, 0, ofs + 4); > + tcg_out_modrm_sib_offset(s, movop + seg, datalo, > + base, index, 0, ofs); > } > if (bswap) { > tcg_out_bswap32(s, datalo); > @@ -1553,7 +1564,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg > *args, bool is64) > label_ptr, offsetof(CPUTLBEntry, addr_read)); > > /* TLB Hit. */ > - tcg_out_qemu_ld_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc); > + tcg_out_qemu_ld_direct(s, datalo, datahi, TCG_REG_L1, -1, 0, 0, opc); > > /* Record the current context of a load into ldst label */ > add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi, > @@ -1562,24 +1573,29 @@ static void tcg_out_qemu_ld(TCGContext *s, const > TCGArg *args, bool is64) > { > int32_t offset = GUEST_BASE; > TCGReg base = addrlo; > + int index = -1; > int seg = 0; > > - /* ??? We assume all operations have left us with register contents > - that are zero extended. So far this appears to be true. If we > - want to enforce this, we can either do an explicit zero-extension > - here, or (if GUEST_BASE == 0, or a segment register is in use) > - use the ADDR32 prefix. For now, do nothing. */
I think we should keep a comment about why we need to use addr32 or zero-extend the value. > if (GUEST_BASE && guest_base_flags) { > seg = guest_base_flags; > offset = 0; > - } else if (TCG_TARGET_REG_BITS == 64 && offset != GUEST_BASE) { > - tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, GUEST_BASE); > - tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L1, base); > - base = TCG_REG_L1; > - offset = 0; > + if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 32) { You can write that as (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) > + seg |= P_ADDR32; > + } > + } else if (TCG_TARGET_REG_BITS == 64) { > + if (TARGET_LONG_BITS == 32) { > + tcg_out_ext32u(s, TCG_REG_L0, base); > + base = TCG_REG_L0; > + } > + if (offset != GUEST_BASE) { > + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, GUEST_BASE); > + index = TCG_REG_L1; > + offset = 0; > + } > } > > - tcg_out_qemu_ld_direct(s, datalo, datahi, base, offset, seg, opc); > + tcg_out_qemu_ld_direct(s, datalo, datahi, > + base, index, offset, seg, opc); > } > #endif > } > @@ -1697,19 +1713,28 @@ static void tcg_out_qemu_st(TCGContext *s, const > TCGArg *args, bool is64) > TCGReg base = addrlo; > int seg = 0; > > - /* ??? We assume all operations have left us with register contents > - that are zero extended. So far this appears to be true. If we > - want to enforce this, we can either do an explicit zero-extension > - here, or (if GUEST_BASE == 0, or a segment register is in use) > - use the ADDR32 prefix. For now, do nothing. */ > if (GUEST_BASE && guest_base_flags) { > seg = guest_base_flags; > offset = 0; > - } else if (TCG_TARGET_REG_BITS == 64 && offset != GUEST_BASE) { > - tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, GUEST_BASE); > - tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L1, base); > - base = TCG_REG_L1; > - offset = 0; > + if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 32) { > + seg |= P_ADDR32; > + } > + } else if (TCG_TARGET_REG_BITS == 64) { > + /* ??? Note that we can't use the same SIB addressing scheme > + as for loads, since we require L0 free for bswap. */ > + if (offset != GUEST_BASE) { > + if (TARGET_LONG_BITS == 32) { > + tcg_out_ext32u(s, TCG_REG_L0, base); > + base = TCG_REG_L0; > + } > + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, GUEST_BASE); > + tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L1, base); > + base = TCG_REG_L1; > + offset = 0; > + } else if (TARGET_LONG_BITS == 32) { > + tcg_out_ext32u(s, TCG_REG_L1, base); > + base = TCG_REG_L1; > + } > } > > tcg_out_qemu_st_direct(s, datalo, datahi, base, offset, seg, opc); Besides the small nitpicks above: Reviewed-by: Aurelien Jarno <aurel...@aurel32.net> -- Aurelien Jarno GPG: 4096R/1DDD8C9B aurel...@aurel32.net http://www.aurel32.net