Signed-off-by: Paolo Bonzini <pbonz...@redhat.com> --- target/i386/tcg/decode-new.c.inc | 160 ++++++++++++++++++++++++++++++- target/i386/tcg/decode-new.h | 32 +++++++ target/i386/tcg/emit.c.inc | 34 ++++++- target/i386/tcg/translate.c | 17 ++-- 4 files changed, 232 insertions(+), 11 deletions(-)
diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc index a9b8b6c05f..f6c032c694 100644 --- a/target/i386/tcg/decode-new.c.inc +++ b/target/i386/tcg/decode-new.c.inc @@ -91,6 +91,23 @@ #define zext0 .special = X86_SPECIAL_ZExtOp0, #define zext2 .special = X86_SPECIAL_ZExtOp2, +#define vex1 .vex_class = 1, +#define vex1_rep3 .vex_class = 1, .vex_special = X86_VEX_REPScalar, +#define vex2 .vex_class = 2, +#define vex2_rep3 .vex_class = 2, .vex_special = X86_VEX_REPScalar, +#define vex3 .vex_class = 3, +#define vex4 .vex_class = 4, +#define vex4_unal .vex_class = 4, .vex_special = X86_VEX_SSEUnaligned, +#define vex5 .vex_class = 5, +#define vex6 .vex_class = 6, +#define vex7 .vex_class = 7, +#define vex8 .vex_class = 8, +#define vex11 .vex_class = 11, +#define vex12 .vex_class = 12, +#define vex13 .vex_class = 13, + +#define avx2_256 .vex_special = X86_VEX_AVX2_256, + static uint8_t get_modrm(DisasContext *s, CPUX86State *env) { if (!s->has_modrm) { @@ -155,6 +172,18 @@ static const X86OpEntry opcodes_root[256] = { }; #undef mmx +#undef vex1 +#undef vex2 +#undef vex3 +#undef vex4 +#undef vex4_unal +#undef vex5 +#undef vex6 +#undef vex7 +#undef vex8 +#undef vex11 +#undef vex12 +#undef vex13 /* * Decode the fixed part of the opcode and place the last @@ -553,6 +582,132 @@ static bool has_cpuid_feature(DisasContext *s, X86CPUIDFeature cpuid) } } +static bool validate_vex(DisasContext *s, X86DecodedInsn *decode) +{ + X86OpEntry *e = &decode->e; + + switch (e->vex_special) { + case X86_VEX_REPScalar: + /* + * Instructions which differ between 00/66 and F2/F3 in the + * exception classification and the size of the memory operand. + */ + assert(e->vex_class == 1 || e->vex_class == 2); + if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) { + e->vex_class = 3; + if (s->vex_l) { + goto illegal; + } + assert(decode->e.s2 == X86_SIZE_x); + if (decode->op[2].has_ea) { + decode->op[2].ot = s->prefix & PREFIX_REPZ ? MO_32 : MO_64; + } + } + break; + + case X86_VEX_SSEUnaligned: + /* handled in sse_needs_alignment. */ + break; + + case X86_VEX_AVX2_256: + if ((s->prefix & PREFIX_VEX) && s->vex_l && !has_cpuid_feature(s, X86_FEAT_AVX2)) { + goto illegal; + } + } + + /* TODO: instructions that require VEX.W=0 (Table 2-16) */ + + switch (e->vex_class) { + case 0: + if (s->prefix & PREFIX_VEX) { + goto illegal; + } + return true; + case 1: + case 2: + case 3: + case 4: + case 5: + case 7: + if (s->prefix & PREFIX_VEX) { + if (!(s->flags & HF_AVX_EN_MASK)) { + goto illegal; + } + } else { + if (!(s->flags & HF_OSFXSR_MASK)) { + goto illegal; + } + } + break; + case 12: + assert(s->has_modrm); + /* Must have a VSIB byte and no address prefix. */ + if ((s->modrm & 7) != 4 || s->aflag == MO_16) { + goto illegal; + } + /* Check no overlap between registers. */ + if (decode->op[0].unit == decode->op[1].unit && decode->op[0].n == decode->op[1].n) { + goto illegal; + } + if (decode->op[0].unit == X86_OP_SSE && decode->op[0].n == decode->mem.index) { + goto illegal; + } + if (decode->op[1].unit == X86_OP_SSE && decode->op[1].n == decode->mem.index) { + goto illegal; + } + /* fall through */ + case 6: + case 11: + if (!(s->prefix & PREFIX_VEX)) { + goto illegal; + } + if (!(s->flags & HF_AVX_EN_MASK)) { + goto illegal; + } + break; + case 8: + if (!(s->prefix & PREFIX_VEX)) { + /* EMMS */ + return true; + } + if (!(s->flags & HF_AVX_EN_MASK)) { + goto illegal; + } + break; + case 13: + if (!(s->prefix & PREFIX_VEX)) { + goto illegal; + } + if (s->vex_l) { + goto illegal; + } + /* All integer instructions use VEX.vvvv, so exit. */ + return true; + } + + if (s->vex_v != 0 && + e->op0 != X86_TYPE_H && e->op0 != X86_TYPE_B && + e->op1 != X86_TYPE_H && e->op1 != X86_TYPE_B && + e->op2 != X86_TYPE_H && e->op2 != X86_TYPE_B) { + goto illegal; + } + + if (s->flags & HF_TS_MASK) { + goto nm_exception; + } + if (s->flags & HF_EM_MASK) { + goto illegal; + } + return true; + +nm_exception: + gen_NM_exception(s); + return false; +illegal: + gen_illegal_opcode(s); + return false; +} + /* convert one instruction. s->base.is_jmp is set if the translation must be stopped. Return the next pc value */ static target_ulong disas_insn_new(DisasContext *s, CPUState *cpu, int b) @@ -789,8 +944,11 @@ static target_ulong disas_insn_new(DisasContext *s, CPUState *cpu, int b) break; } + if (!validate_vex(s, &decode)) { + return s->pc; + } if (decode.op[0].has_ea || decode.op[1].has_ea || decode.op[2].has_ea) { - gen_load_ea(s, &decode.mem); + gen_load_ea(s, &decode.mem, decode.e.vex_class == 12); } if (s->prefix & PREFIX_LOCK) { if (decode.op[0].unit != X86_OP_INT || !decode.op[0].has_ea) { diff --git a/target/i386/tcg/decode-new.h b/target/i386/tcg/decode-new.h index 6fb2d9151e..b5299d0dd2 100644 --- a/target/i386/tcg/decode-new.h +++ b/target/i386/tcg/decode-new.h @@ -152,6 +152,36 @@ typedef enum X86InsnSpecial { X86_SPECIAL_o64, } X86InsnSpecial; +/* + * Special cases for instructions that operate on XMM/YMM registers. Intel + * retconned all of them to have VEX exception classes other than 0 and 13, so + * all these only matter for instructions that have a VEX exception class. + * Based on tables in the "AVX and SSE Instruction Exception Specification" + * section of the manual. + */ +typedef enum X86VEXSpecial { + /* Legacy SSE instructions that allow unaligned operands */ + X86_VEX_SSEUnaligned, + + /* + * Used for instructions that distinguish the XMM operand type with an + * instruction prefix; legacy SSE encodings will allow unaligned operands + * for scalar operands only (identified by a REP prefix). In this case, + * the decoding table uses "x" for the vector operands instead of specifying + * pd/ps/sd/ss individually. + */ + X86_VEX_REPScalar, + + /* + * VEX instructions that only support 256-bit operands with AVX2 (Table 2-17 + * column 3). Columns 2 and 4 (instructions limited to 256- and 127-bit + * operands respectively) are implicit in the presence of dq and qq + * operands, and thus handled by decode_op_size. + */ + X86_VEX_AVX2_256, +} X86VEXSpecial; + + typedef struct X86OpEntry X86OpEntry; typedef struct X86DecodedInsn X86DecodedInsn; @@ -180,6 +210,8 @@ struct X86OpEntry { X86InsnSpecial special : 8; X86CPUIDFeature cpuid : 8; + uint8_t vex_class : 8; + X86VEXSpecial vex_special : 8; bool is_decode : 1; }; diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc index 6fa0062d6a..ce0205e05a 100644 --- a/target/i386/tcg/emit.c.inc +++ b/target/i386/tcg/emit.c.inc @@ -19,14 +19,19 @@ * License along with this library; if not, see <http://www.gnu.org/licenses/>. */ +static void gen_NM_exception(DisasContext *s) +{ + gen_exception(s, EXCP07_PREX, s->pc_start - s->cs_base); +} + static void gen_illegal(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) { gen_illegal_opcode(s); } -static void gen_load_ea(DisasContext *s, AddressParts *mem) +static void gen_load_ea(DisasContext *s, AddressParts *mem, bool is_vsib) { - TCGv ea = gen_lea_modrm_1(s, *mem); + TCGv ea = gen_lea_modrm_1(s, *mem, is_vsib); gen_lea_v_seg(s, s->aflag, ea, mem->def_seg, s->override); } @@ -102,6 +107,25 @@ static void gen_load_sse(DisasContext *s, TCGv temp, MemOp ot, int dest_ofs) } } +static inline bool sse_needs_alignment(DisasContext *s, X86DecodedInsn *decode, X86DecodedOp *op) +{ + switch (decode->e.vex_class) { + case 2: + case 4: + if ((s->prefix & PREFIX_VEX) || + decode->e.vex_special == X86_VEX_SSEUnaligned) { + /* MOST legacy SSE instructions require aligned memory operands, but not all. */ + return false; + } + /* fall through */ + case 1: + return op->has_ea && op->ot >= MO_128; + + default: + return false; + } +} + static void gen_load(DisasContext *s, TCGv v, TCGv_ptr ptr, X86DecodedOp *op, uint64_t imm) { switch (op->unit) { @@ -175,7 +199,13 @@ static void gen_writeback(DisasContext *s, X86DecodedOp *op) } break; case X86_OP_MMX: + break; case X86_OP_SSE: + if ((s->prefix & PREFIX_VEX) && op->ot == MO_128) { + tcg_gen_gvec_dup_imm(MO_64, + offsetof(CPUX86State, xmm_regs[op->n].ZMM_X(1)), + 16, 16, 0); + } break; default: abort(); diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index a92ef61527..4ecf75ede3 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -2217,11 +2217,11 @@ static AddressParts gen_lea_modrm_0(CPUX86State *env, DisasContext *s, } /* Compute the address, with a minimum number of TCG ops. */ -static TCGv gen_lea_modrm_1(DisasContext *s, AddressParts a) +static TCGv gen_lea_modrm_1(DisasContext *s, AddressParts a, bool is_vsib) { TCGv ea = NULL; - if (a.index >= 0) { + if (a.index >= 0 && !is_vsib) { if (a.scale == 0) { ea = cpu_regs[a.index]; } else { @@ -2249,7 +2249,7 @@ static TCGv gen_lea_modrm_1(DisasContext *s, AddressParts a) static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm) { AddressParts a = gen_lea_modrm_0(env, s, modrm); - TCGv ea = gen_lea_modrm_1(s, a); + TCGv ea = gen_lea_modrm_1(s, a, false); gen_lea_v_seg(s, s->aflag, ea, a.def_seg, s->override); } @@ -2262,7 +2262,8 @@ static void gen_nop_modrm(CPUX86State *env, DisasContext *s, int modrm) static void gen_bndck(CPUX86State *env, DisasContext *s, int modrm, TCGCond cond, TCGv_i64 bndv) { - TCGv ea = gen_lea_modrm_1(s, gen_lea_modrm_0(env, s, modrm)); + AddressParts a = gen_lea_modrm_0(env, s, modrm); + TCGv ea = gen_lea_modrm_1(s, a, false); tcg_gen_extu_tl_i64(s->tmp1_i64, ea); if (!CODE64(s)) { @@ -5953,7 +5954,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) reg = ((modrm >> 3) & 7) | REX_R(s); { AddressParts a = gen_lea_modrm_0(env, s, modrm); - TCGv ea = gen_lea_modrm_1(s, a); + TCGv ea = gen_lea_modrm_1(s, a, false); gen_lea_v_seg(s, s->aflag, ea, -1, -1); gen_op_mov_reg_v(s, dflag, reg, s->A0); } @@ -6180,7 +6181,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) if (mod != 3) { /* memory op */ AddressParts a = gen_lea_modrm_0(env, s, modrm); - TCGv ea = gen_lea_modrm_1(s, a); + TCGv ea = gen_lea_modrm_1(s, a, false); TCGv last_addr = tcg_temp_new(); bool update_fdp = true; @@ -7210,7 +7211,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) gen_exts(ot, s->T1); tcg_gen_sari_tl(s->tmp0, s->T1, 3 + ot); tcg_gen_shli_tl(s->tmp0, s->tmp0, ot); - tcg_gen_add_tl(s->A0, gen_lea_modrm_1(s, a), s->tmp0); + tcg_gen_add_tl(s->A0, gen_lea_modrm_1(s, a, false), s->tmp0); gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override); if (!(s->prefix & PREFIX_LOCK)) { gen_op_ld_v(s, ot, s->T0, s->A0); @@ -8281,7 +8282,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) /* rip-relative generates #ud */ goto illegal_op; } - tcg_gen_not_tl(s->A0, gen_lea_modrm_1(s, a)); + tcg_gen_not_tl(s->A0, gen_lea_modrm_1(s, a, false)); if (!CODE64(s)) { tcg_gen_ext32u_tl(s->A0, s->A0); } -- 2.37.2