Add CHECK_AVX* macros, and use them to validate VEX encoded AVX instructions
All AVX instructions require both CPU and OS support, this is encapsulated by HF_AVX_EN. Some also require specific values in the VEX.L and VEX.V fields. Some (mostly integer operations) also require AVX2 Signed-off-by: Paul Brook <p...@nowt.org> --- target/i386/tcg/translate.c | 159 +++++++++++++++++++++++++++++++++--- 1 file changed, 149 insertions(+), 10 deletions(-) diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index 66ba690b7d..2f5cc24e0c 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -3185,10 +3185,54 @@ static const struct SSEOpHelper_table7 sse_op_table7[256] = { goto illegal_op; \ } while (0) +/* + * VEX encodings require AVX + * Allow legacy SSE encodings even if AVX not enabled + */ +#define CHECK_AVX(s) do { \ + if ((s->prefix & PREFIX_VEX) \ + && !(env->hflags & HF_AVX_EN_MASK)) \ + goto illegal_op; \ + } while (0) + +/* If a VEX prefix is used then it must have V=1111b */ +#define CHECK_AVX_V0(s) do { \ + CHECK_AVX(s); \ + if ((s->prefix & PREFIX_VEX) && (s->vex_v != 0)) \ + goto illegal_op; \ + } while (0) + +/* If a VEX prefix is used then it must have L=0 */ +#define CHECK_AVX_128(s) do { \ + CHECK_AVX(s); \ + if ((s->prefix & PREFIX_VEX) && (s->vex_l != 0)) \ + goto illegal_op; \ + } while (0) + +/* If a VEX prefix is used then it must have V=1111b and L=0 */ +#define CHECK_AVX_V0_128(s) do { \ + CHECK_AVX(s); \ + if ((s->prefix & PREFIX_VEX) && (s->vex_v != 0 || s->vex_l != 0)) \ + goto illegal_op; \ + } while (0) + +/* 256-bit (ymm) variants require AVX2 */ +#define CHECK_AVX2_256(s) do { \ + if (s->vex_l && !(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_AVX2)) \ + goto illegal_op; \ + } while (0) + +/* Requires AVX2 and VEX encoding */ +#define CHECK_AVX2(s) do { \ + if ((s->prefix & PREFIX_VEX) == 0 \ + || !(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_AVX2)) \ + goto illegal_op; \ + } while (0) + static void gen_sse(CPUX86State *env, DisasContext *s, int b, target_ulong pc_start) { - int b1, op1_offset, op2_offset, is_xmm, val; + int b1, op1_offset, op2_offset, is_xmm, val, scalar_op; int modrm, mod, rm, reg; struct SSEOpHelper_table1 sse_op; struct SSEOpHelper_table6 op6; @@ -3228,15 +3272,18 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, gen_exception(s, EXCP07_PREX, pc_start - s->cs_base); return; } - if (s->flags & HF_EM_MASK) { - illegal_op: - gen_illegal_opcode(s); - return; - } - if (is_xmm - && !(s->flags & HF_OSFXSR_MASK) - && (b != 0x38 && b != 0x3a)) { - goto unknown_op; + /* VEX encoded instuctions ignore EM bit. See also CHECK_AVX */ + if (!(s->prefix & PREFIX_VEX)) { + if (s->flags & HF_EM_MASK) { + illegal_op: + gen_illegal_opcode(s); + return; + } + if (is_xmm + && !(s->flags & HF_OSFXSR_MASK) + && (b != 0x38 && b != 0x3a)) { + goto unknown_op; + } } if (b == 0x0e) { if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) { @@ -3278,12 +3325,14 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, case 0x1e7: /* movntdq */ case 0x02b: /* movntps */ case 0x12b: /* movntps */ + CHECK_AVX_V0(s); if (mod == 3) goto illegal_op; gen_lea_modrm(env, s, modrm); gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg])); break; case 0x3f0: /* lddqu */ + CHECK_AVX_V0(s); if (mod == 3) goto illegal_op; gen_lea_modrm(env, s, modrm); @@ -3291,6 +3340,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, break; case 0x22b: /* movntss */ case 0x32b: /* movntsd */ + CHECK_AVX_V0_128(s); if (mod == 3) goto illegal_op; gen_lea_modrm(env, s, modrm); @@ -3321,6 +3371,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, } break; case 0x16e: /* movd xmm, ea */ + CHECK_AVX_V0_128(s); #ifdef TARGET_X86_64 if (s->dflag == MO_64) { gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0); @@ -3356,6 +3407,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, case 0x128: /* movapd */ case 0x16f: /* movdqa xmm, ea */ case 0x26f: /* movdqu xmm, ea */ + CHECK_AVX_V0(s); if (mod != 3) { gen_lea_modrm(env, s, modrm); gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg])); @@ -3367,6 +3419,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, break; case 0x210: /* movss xmm, ea */ if (mod != 3) { + CHECK_AVX_V0_128(s); gen_lea_modrm(env, s, modrm); gen_op_ld_v(s, MO_32, s->T0, s->A0); tcg_gen_st32_tl(s->T0, cpu_env, @@ -3379,6 +3432,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, tcg_gen_st32_tl(s->T0, cpu_env, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3))); } else { + CHECK_AVX_128(s); rm = (modrm & 7) | REX_B(s); gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)), offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0))); @@ -3386,6 +3440,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, break; case 0x310: /* movsd xmm, ea */ if (mod != 3) { + CHECK_AVX_V0_128(s); gen_lea_modrm(env, s, modrm); gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0))); @@ -3395,6 +3450,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, tcg_gen_st32_tl(s->T0, cpu_env, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3))); } else { + CHECK_AVX_128(s); rm = (modrm & 7) | REX_B(s); gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)), offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0))); @@ -3402,6 +3458,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, break; case 0x012: /* movlps */ case 0x112: /* movlpd */ + CHECK_AVX_128(s); if (mod != 3) { gen_lea_modrm(env, s, modrm); gen_ldq_env_A0(s, offsetof(CPUX86State, @@ -3414,6 +3471,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, } break; case 0x212: /* movsldup */ + CHECK_AVX_V0(s); if (mod != 3) { gen_lea_modrm(env, s, modrm); gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg])); @@ -3430,6 +3488,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2))); break; case 0x312: /* movddup */ + CHECK_AVX_V0(s); if (mod != 3) { gen_lea_modrm(env, s, modrm); gen_ldq_env_A0(s, offsetof(CPUX86State, @@ -3444,6 +3503,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, break; case 0x016: /* movhps */ case 0x116: /* movhpd */ + CHECK_AVX_128(s); if (mod != 3) { gen_lea_modrm(env, s, modrm); gen_ldq_env_A0(s, offsetof(CPUX86State, @@ -3456,6 +3516,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, } break; case 0x216: /* movshdup */ + CHECK_AVX_V0(s); if (mod != 3) { gen_lea_modrm(env, s, modrm); gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg])); @@ -3509,6 +3570,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, } break; case 0x17e: /* movd ea, xmm */ + CHECK_AVX_V0_128(s); #ifdef TARGET_X86_64 if (s->dflag == MO_64) { tcg_gen_ld_i64(s->T0, cpu_env, @@ -3523,6 +3585,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, } break; case 0x27e: /* movq xmm, ea */ + CHECK_AVX_V0_128(s); if (mod != 3) { gen_lea_modrm(env, s, modrm); gen_ldq_env_A0(s, offsetof(CPUX86State, @@ -3551,6 +3614,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, case 0x129: /* movapd */ case 0x17f: /* movdqa ea, xmm */ case 0x27f: /* movdqu ea, xmm */ + CHECK_AVX_V0(s); if (mod != 3) { gen_lea_modrm(env, s, modrm); gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg])); @@ -3562,11 +3626,13 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, break; case 0x211: /* movss ea, xmm */ if (mod != 3) { + CHECK_AVX_V0_128(s); gen_lea_modrm(env, s, modrm); tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0))); gen_op_st_v(s, MO_32, s->T0, s->A0); } else { + CHECK_AVX_128(s); rm = (modrm & 7) | REX_B(s); gen_op_movl(s, offsetof(CPUX86State, xmm_regs[rm].ZMM_L(0)), offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0))); @@ -3574,10 +3640,12 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, break; case 0x311: /* movsd ea, xmm */ if (mod != 3) { + CHECK_AVX_V0_128(s); gen_lea_modrm(env, s, modrm); gen_stq_env_A0(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0))); } else { + CHECK_AVX_128(s); rm = (modrm & 7) | REX_B(s); gen_op_movq(s, offsetof(CPUX86State, xmm_regs[rm].ZMM_Q(0)), offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0))); @@ -3585,6 +3653,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, break; case 0x013: /* movlps */ case 0x113: /* movlpd */ + CHECK_AVX_V0_128(s); if (mod != 3) { gen_lea_modrm(env, s, modrm); gen_stq_env_A0(s, offsetof(CPUX86State, @@ -3595,6 +3664,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, break; case 0x017: /* movhps */ case 0x117: /* movhpd */ + CHECK_AVX_V0_128(s); if (mod != 3) { gen_lea_modrm(env, s, modrm); gen_stq_env_A0(s, offsetof(CPUX86State, @@ -3611,6 +3681,8 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, case 0x173: val = x86_ldub_code(env, s); if (is_xmm) { + CHECK_AVX(s); + CHECK_AVX2_256(s); tcg_gen_movi_tl(s->T0, val); tcg_gen_st32_tl(s->T0, cpu_env, offsetof(CPUX86State, xmm_t0.ZMM_L(0))); @@ -3646,6 +3718,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, sse_fn_epp(cpu_env, s->ptr0, s->ptr1); break; case 0x050: /* movmskps */ + CHECK_AVX_V0(s); rm = (modrm & 7) | REX_B(s); tcg_gen_addi_ptr(s->ptr0, cpu_env, offsetof(CPUX86State,xmm_regs[rm])); @@ -3653,6 +3726,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32); break; case 0x150: /* movmskpd */ + CHECK_AVX_V0(s); rm = (modrm & 7) | REX_B(s); tcg_gen_addi_ptr(s->ptr0, cpu_env, offsetof(CPUX86State,xmm_regs[rm])); @@ -3686,6 +3760,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, break; case 0x22a: /* cvtsi2ss */ case 0x32a: /* cvtsi2sd */ + CHECK_AVX(s); ot = mo_64_32(s->dflag); gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); op1_offset = offsetof(CPUX86State,xmm_regs[reg]); @@ -3739,6 +3814,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, case 0x32c: /* cvttsd2si */ case 0x22d: /* cvtss2si */ case 0x32d: /* cvtsd2si */ + CHECK_AVX_V0(s); ot = mo_64_32(s->dflag); if (mod != 3) { gen_lea_modrm(env, s, modrm); @@ -3773,6 +3849,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, break; case 0xc4: /* pinsrw */ case 0x1c4: + CHECK_AVX_128(s); s->rip_offset = 1; gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0); val = x86_ldub_code(env, s); @@ -3789,6 +3866,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, break; case 0xc5: /* pextrw */ case 0x1c5: + CHECK_AVX_V0_128(s); if (mod != 3) goto illegal_op; ot = mo_64_32(s->dflag); @@ -3808,6 +3886,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, gen_op_mov_reg_v(s, ot, reg, s->T0); break; case 0x1d6: /* movq ea, xmm */ + CHECK_AVX_V0_128(s); if (mod != 3) { gen_lea_modrm(env, s, modrm); gen_stq_env_A0(s, offsetof(CPUX86State, @@ -3840,6 +3919,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, if (mod != 3) goto illegal_op; if (b1) { + CHECK_AVX_V0(s); rm = (modrm & 7) | REX_B(s); tcg_gen_addi_ptr(s->ptr0, cpu_env, offsetof(CPUX86State, xmm_regs[rm])); @@ -3875,8 +3955,33 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, goto illegal_op; } + if (op6.ext_mask == CPUID_EXT_AVX + && (s->prefix & PREFIX_VEX) == 0) { + goto illegal_op; + } + if (op6.flags & SSE_OPF_AVX2) { + CHECK_AVX2(s); + } + if (b1) { + if (op6.flags & SSE_OPF_V0) { + CHECK_AVX_V0(s); + } else { + CHECK_AVX(s); + } op1_offset = offsetof(CPUX86State,xmm_regs[reg]); + + if (op6.flags & SSE_OPF_MMX) { + CHECK_AVX2_256(s); + } + if (op6.flags & SSE_OPF_BLENDV) { + /* + * VEX encodings of the blendv opcodes are not valid + * they use a different opcode with an 0f 3a prefix + */ + CHECK_NO_VEX(s); + } + if (mod == 3) { op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]); } else { @@ -4327,6 +4432,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, val = x86_ldub_code(env, s); switch (b) { case 0x14: /* pextrb */ + CHECK_AVX_V0_128(s); tcg_gen_ld8u_tl(s->T0, cpu_env, offsetof(CPUX86State, xmm_regs[reg].ZMM_B(val & 15))); if (mod == 3) { @@ -4337,6 +4443,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, } break; case 0x15: /* pextrw */ + CHECK_AVX_V0_128(s); tcg_gen_ld16u_tl(s->T0, cpu_env, offsetof(CPUX86State, xmm_regs[reg].ZMM_W(val & 7))); if (mod == 3) { @@ -4347,6 +4454,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, } break; case 0x16: + CHECK_AVX_V0_128(s); if (ot == MO_32) { /* pextrd */ tcg_gen_ld_i32(s->tmp2_i32, cpu_env, offsetof(CPUX86State, @@ -4374,6 +4482,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, } break; case 0x17: /* extractps */ + CHECK_AVX_V0_128(s); tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(val & 3))); if (mod == 3) { @@ -4384,6 +4493,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, } break; case 0x20: /* pinsrb */ + CHECK_AVX_128(s); if (mod == 3) { gen_op_mov_v_reg(s, MO_32, s->T0, rm); } else { @@ -4394,6 +4504,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, xmm_regs[reg].ZMM_B(val & 15))); break; case 0x21: /* insertps */ + CHECK_AVX_128(s); if (mod == 3) { tcg_gen_ld_i32(s->tmp2_i32, cpu_env, offsetof(CPUX86State,xmm_regs[rm] @@ -4423,6 +4534,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, xmm_regs[reg].ZMM_L(3))); break; case 0x22: + CHECK_AVX_128(s); if (ot == MO_32) { /* pinsrd */ if (mod == 3) { tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[rm]); @@ -4453,6 +4565,20 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, return; } + CHECK_AVX(s); + scalar_op = (s->prefix & PREFIX_VEX) + && (op7.flags & SSE_OPF_SCALAR) + && !(op7.flags & SSE_OPF_CMP); + if (is_xmm && (op7.flags & SSE_OPF_MMX)) { + CHECK_AVX2_256(s); + } + if (op7.flags & SSE_OPF_AVX2) { + CHECK_AVX2(s); + } + if ((op7.flags & SSE_OPF_V0) && !scalar_op) { + CHECK_AVX_V0(s); + } + if (b1) { op1_offset = offsetof(CPUX86State,xmm_regs[reg]); if (mod == 3) { @@ -4540,6 +4666,19 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, break; } if (is_xmm) { + scalar_op = (s->prefix & PREFIX_VEX) + && (sse_op.flags & SSE_OPF_SCALAR) + && !(sse_op.flags & SSE_OPF_CMP) + && (b1 == 2 || b1 == 3); + /* VEX encoded scalar ops always have 3 operands! */ + if ((sse_op.flags & SSE_OPF_V0) && !scalar_op) { + CHECK_AVX_V0(s); + } else { + CHECK_AVX(s); + } + if (sse_op.flags & SSE_OPF_MMX) { + CHECK_AVX2_256(s); + } op1_offset = offsetof(CPUX86State,xmm_regs[reg]); if (mod != 3) { int sz = 4; -- 2.36.0