On Thu, Jan 14, 2010 at 02:59:51PM -0800, Richard Henderson wrote: > The existing P_REXB internal opcode flag unconditionally emits > the REX prefix. Technically it's not needed if the register in > question is %al, %bl, %cl, %dl. > > Eliding the prefix requires splitting the P_REXB flag into two, > in order to indicate whether the byte register in question is > in the REG or the R/M field. Within TCG, the byte register is > in the REG field only for stores.
Thanks, applied. > Signed-off-by: Richard Henderson <r...@twiddle.net> > --- > tcg/x86_64/tcg-target.c | 46 ++++++++++++++++++++++++++++++---------------- > 1 files changed, 30 insertions(+), 16 deletions(-) > > diff --git a/tcg/x86_64/tcg-target.c b/tcg/x86_64/tcg-target.c > index 8c7e738..cbaabef 100644 > --- a/tcg/x86_64/tcg-target.c > +++ b/tcg/x86_64/tcg-target.c > @@ -217,9 +217,10 @@ static inline int tcg_target_const_match(tcg_target_long > val, > #define JCC_JLE 0xe > #define JCC_JG 0xf > > -#define P_EXT 0x100 /* 0x0f opcode prefix */ > -#define P_REXW 0x200 /* set rex.w = 1 */ > -#define P_REXB 0x400 /* force rex use for byte registers */ > +#define P_EXT 0x100 /* 0x0f opcode prefix */ > +#define P_REXW 0x200 /* set rex.w = 1 */ > +#define P_REXB_R 0x400 /* REG field as byte register */ > +#define P_REXB_RM 0x800 /* R/M field as byte register */ > > static const uint8_t tcg_cond_to_jcc[10] = { > [TCG_COND_EQ] = JCC_JE, > @@ -234,16 +235,29 @@ static const uint8_t tcg_cond_to_jcc[10] = { > [TCG_COND_GTU] = JCC_JA, > }; > > -static inline void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x) > +static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x) > { > - int rex; > - rex = ((opc >> 6) & 0x8) | ((r >> 1) & 0x4) | > - ((x >> 2) & 2) | ((rm >> 3) & 1); > - if (rex || (opc & P_REXB)) { > - tcg_out8(s, rex | 0x40); > + int rex = 0; > + > + rex |= (opc & P_REXW) >> 6; /* REX.W */ > + rex |= (r & 8) >> 1; /* REX.R */ > + rex |= (x & 8) >> 2; /* REX.X */ > + rex |= (rm & 8) >> 3; /* REX.B */ > + > + /* P_REXB_{R,RM} indicates that the given register is the low byte. > + For %[abcd]l we need no REX prefix, but for %{si,di,bp,sp}l we do, > + as otherwise the encoding indicates %[abcd]h. Note that the values > + that are ORed in merely indicate that the REX byte must be present; > + those bits get discarded in output. */ > + rex |= opc & (r >= 4 ? P_REXB_R : 0); > + rex |= opc & (rm >= 4 ? P_REXB_RM : 0); > + > + if (rex) { > + tcg_out8(s, (uint8_t)(rex | 0x40)); > } > - if (opc & P_EXT) > + if (opc & P_EXT) { > tcg_out8(s, 0x0f); > + } > tcg_out8(s, opc & 0xff); > } > > @@ -408,7 +422,7 @@ static inline void tgen_arithi32(TCGContext *s, int c, > int r0, int32_t val) > tcg_out8(s, val); > } else if (c == ARITH_AND && val == 0xffu) { > /* movzbl */ > - tcg_out_modrm(s, 0xb6 | P_EXT | P_REXB, r0, r0); > + tcg_out_modrm(s, 0xb6 | P_EXT | P_REXB_RM, r0, r0); > } else if (c == ARITH_AND && val == 0xffffu) { > /* movzwl */ > tcg_out_modrm(s, 0xb7 | P_EXT, r0, r0); > @@ -776,7 +790,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg > *args, > switch(opc) { > case 0: > /* movzbl */ > - tcg_out_modrm(s, 0xb6 | P_EXT | P_REXB, TCG_REG_RSI, data_reg); > + tcg_out_modrm(s, 0xb6 | P_EXT | P_REXB_RM, TCG_REG_RSI, data_reg); > break; > case 1: > /* movzwl */ > @@ -829,7 +843,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg > *args, > switch(opc) { > case 0: > /* movb */ > - tcg_out_modrm_offset(s, 0x88 | P_REXB, data_reg, r0, offset); > + tcg_out_modrm_offset(s, 0x88 | P_REXB_R, data_reg, r0, offset); > break; > case 1: > if (bswap) { > @@ -964,7 +978,7 @@ static inline void tcg_out_op(TCGContext *s, int opc, > const TCGArg *args, > case INDEX_op_st8_i32: > case INDEX_op_st8_i64: > /* movb */ > - tcg_out_modrm_offset(s, 0x88 | P_REXB, args[0], args[1], args[2]); > + tcg_out_modrm_offset(s, 0x88 | P_REXB_R, args[0], args[1], args[2]); > break; > case INDEX_op_st16_i32: > case INDEX_op_st16_i64: > @@ -1161,7 +1175,7 @@ static inline void tcg_out_op(TCGContext *s, int opc, > const TCGArg *args, > break; > > case INDEX_op_ext8s_i32: > - tcg_out_modrm(s, 0xbe | P_EXT | P_REXB, args[0], args[1]); > + tcg_out_modrm(s, 0xbe | P_EXT | P_REXB_RM, args[0], args[1]); > break; > case INDEX_op_ext16s_i32: > tcg_out_modrm(s, 0xbf | P_EXT, args[0], args[1]); > @@ -1177,7 +1191,7 @@ static inline void tcg_out_op(TCGContext *s, int opc, > const TCGArg *args, > break; > case INDEX_op_ext8u_i32: > case INDEX_op_ext8u_i64: > - tcg_out_modrm(s, 0xb6 | P_EXT | P_REXB, args[0], args[1]); > + tcg_out_modrm(s, 0xb6 | P_EXT | P_REXB_RM, args[0], args[1]); > break; > case INDEX_op_ext16u_i32: > case INDEX_op_ext16u_i64: > -- > 1.6.5.2 > > -- Aurelien Jarno GPG: 1024D/F1BCDB73 aurel...@aurel32.net http://www.aurel32.net