Signed-off-by: Jan Beulich <jbeul...@suse.com>
--- a/tools/tests/x86_emulator/evex-disp8.c
+++ b/tools/tests/x86_emulator/evex-disp8.c
@@ -612,18 +612,36 @@ static const struct test avx512_fp16_all
INSN(cmpph, , 0f3a, c2, vl, fp16, vl),
INSN(cmpsh, f3, 0f3a, c2, el, fp16, el),
INSN(comish, , map5, 2f, el, fp16, el),
+ INSN(cvtdq2ph, , map5, 5b, vl, d, vl),
INSN(cvtpd2ph, 66, map5, 5a, vl, q, vl),
+ INSN(cvtph2dq, 66, map5, 5b, vl_2, fp16, vl),
INSN(cvtph2pd, , map5, 5a, vl_4, fp16, vl),
INSN(cvtph2psx, 66, map6, 13, vl_2, fp16, vl),
+ INSN(cvtph2qq, 66, map5, 7b, vl_4, fp16, vl),
+ INSN(cvtph2udq, , map5, 79, vl_2, fp16, vl),
+ INSN(cvtph2uqq, 66, map5, 79, vl_4, fp16, vl),
INSN(cvtph2uw, , map5, 7d, vl, fp16, vl),
INSN(cvtph2w, 66, map5, 7d, vl, fp16, vl),
INSN(cvtps2phx, 66, map5, 1d, vl, d, vl),
+ INSN(cvtqq2ph, , map5, 5b, vl, q, vl),
INSN(cvtsd2sh, f2, map5, 5a, el, q, el),
INSN(cvtsh2sd, f3, map5, 5a, el, fp16, el),
+ INSN(cvtsh2si, f3, map5, 2d, el, fp16, el),
INSN(cvtsh2ss, , map6, 13, el, fp16, el),
+ INSN(cvtsh2usi, f3, map5, 79, el, fp16, el),
+ INSN(cvtsi2sh, f3, map5, 2a, el, dq64, el),
INSN(cvtss2sh, , map5, 1d, el, d, el),
+ INSN(cvttph2dq, f3, map5, 5b, vl_2, fp16, vl),
+ INSN(cvttph2qq, 66, map5, 7a, vl_4, fp16, vl),
+ INSN(cvttph2udq, , map5, 78, vl_2, fp16, vl),
+ INSN(cvttph2uqq, 66, map5, 78, vl_4, fp16, vl),
INSN(cvttph2uw, , map5, 7c, vl, fp16, vl),
INSN(cvttph2w, 66, map5, 7c, vl, fp16, vl),
+ INSN(cvttsh2si, f3, map5, 2c, el, fp16, el),
+ INSN(cvttsh2usi, f3, map5, 78, el, fp16, el),
+ INSN(cvtudq2ph, f2, map5, 7a, vl, d, vl),
+ INSN(cvtuqq2ph, f2, map5, 7a, vl, q, vl),
+ INSN(cvtusi2sh, f3, map5, 7b, el, dq64, el),
INSN(cvtuw2ph, f2, map5, 7d, vl, fp16, vl),
INSN(cvtw2ph, f3, map5, 7d, vl, fp16, vl),
INSN(divph, , map5, 5e, vl, fp16, vl),
--- a/tools/tests/x86_emulator/predicates.c
+++ b/tools/tests/x86_emulator/predicates.c
@@ -2033,6 +2033,9 @@ static const struct evex {
{ { 0x11 }, 2, T, W, pfx_f3, W0, LIG }, /* vmovsh */
{ { 0x1d }, 2, T, R, pfx_66, W0, Ln }, /* vcvtps2phx */
{ { 0x1d }, 2, T, R, pfx_no, W0, LIG }, /* vcvtss2sh */
+ { { 0x2a }, 2, T, R, pfx_f3, Wn, LIG }, /* vcvtsi2sh */
+ { { 0x2c }, 2, T, R, pfx_f3, Wn, LIG }, /* vcvttsh2si */
+ { { 0x2d }, 2, T, R, pfx_f3, Wn, LIG }, /* vcvtsh2si */
{ { 0x2e }, 2, T, R, pfx_no, W0, LIG }, /* vucomish */
{ { 0x2f }, 2, T, R, pfx_no, W0, LIG }, /* vcomish */
{ { 0x51 }, 2, T, R, pfx_no, W0, Ln }, /* vsqrtph */
@@ -2045,6 +2048,10 @@ static const struct evex {
{ { 0x5a }, 2, T, R, pfx_66, W1, Ln }, /* vcvtpd2ph */
{ { 0x5a }, 2, T, R, pfx_f3, W0, LIG }, /* vcvtsh2sd */
{ { 0x5a }, 2, T, R, pfx_f2, W1, LIG }, /* vcvtsd2sh */
+ { { 0x5b }, 2, T, R, pfx_no, W0, Ln }, /* vcvtdq2ph */
+ { { 0x5b }, 2, T, R, pfx_no, W1, Ln }, /* vcvtqq2ph */
+ { { 0x5b }, 2, T, R, pfx_66, W0, Ln }, /* vcvtph2dq */
+ { { 0x5b }, 2, T, R, pfx_f3, W0, Ln }, /* vcvttph2dq */
{ { 0x5c }, 2, T, R, pfx_no, W0, Ln }, /* vsubph */
{ { 0x5c }, 2, T, R, pfx_f3, W0, LIG }, /* vsubsh */
{ { 0x5d }, 2, T, R, pfx_no, W0, Ln }, /* vminph */
@@ -2054,6 +2061,17 @@ static const struct evex {
{ { 0x5f }, 2, T, R, pfx_no, W0, Ln }, /* vmaxph */
{ { 0x5f }, 2, T, R, pfx_f3, W0, LIG }, /* vmaxsh */
{ { 0x6e }, 2, T, R, pfx_66, WIG, L0 }, /* vmovw */
+ { { 0x78 }, 2, T, R, pfx_no, W0, Ln }, /* vcvttph2udq */
+ { { 0x78 }, 2, T, R, pfx_66, W0, Ln }, /* vcvttph2uqq */
+ { { 0x78 }, 2, T, R, pfx_f3, Wn, LIG }, /* vcvttsh2usi */
+ { { 0x79 }, 2, T, R, pfx_no, W0, Ln }, /* vcvtph2udq */
+ { { 0x79 }, 2, T, R, pfx_66, W0, Ln }, /* vcvtph2uqq */
+ { { 0x79 }, 2, T, R, pfx_f3, Wn, LIG }, /* vcvtsh2usi */
+ { { 0x7a }, 2, T, R, pfx_66, W0, Ln }, /* vcvttph2qq */
+ { { 0x7a }, 2, T, R, pfx_f2, W0, Ln }, /* vcvtudq2ph */
+ { { 0x7a }, 2, T, R, pfx_f2, W1, Ln }, /* vcvtuqq2ph */
+ { { 0x7b }, 2, T, R, pfx_66, W0, Ln }, /* vcvtph2qq */
+ { { 0x7b }, 2, T, R, pfx_f3, Wn, LIG }, /* vcvtusi2sh */
{ { 0x7c }, 2, T, R, pfx_no, W0, Ln }, /* vcvttph2uw */
{ { 0x7c }, 2, T, R, pfx_66, W0, Ln }, /* vcvttph2w */
{ { 0x7d }, 2, T, R, pfx_no, W0, Ln }, /* vcvtph2uw */
--- a/xen/arch/x86/x86_emulate/decode.c
+++ b/xen/arch/x86/x86_emulate/decode.c
@@ -1497,12 +1497,25 @@ int x86emul_decode(struct x86_emulate_st
s->simd_size = simd_scalar_vexw;
break;
+ case 0x2a: /* vcvtsi2sh */
+ break;
+
+ case 0x2c: case 0x2d: /* vcvt{,t}sh2si */
+ if ( s->evex.pfx == vex_f3 )
+ s->fp16 = true;
+ break;
+
case 0x2e: case 0x2f: /* v{,u}comish */
if ( !s->evex.pfx )
s->fp16 = true;
s->simd_size = simd_none;
break;
+ case 0x5b: /* vcvt{d,q}q2ph, vcvt{,t}ph2dq */
+ if ( s->evex.pfx && s->evex.pfx != vex_f2 )
+ s->fp16 = true;
+ break;
+
case 0x6e: /* vmovw r/m16, xmm */
d = (d & ~SrcMask) | SrcMem16;
/* fall through */
@@ -1512,6 +1525,17 @@ int x86emul_decode(struct x86_emulate_st
s->simd_size = simd_none;
break;
+ case 0x78: case 0x79: /* vcvt{,t}ph2u{d,q}q, vcvt{,t}sh2usi */
+ if ( s->evex.pfx != vex_f2 )
+ s->fp16 = true;
+ break;
+
+ case 0x7a: /* vcvttph2qq, vcvtu{d,q}q2ph */
+ case 0x7b: /* vcvtph2qq, vcvtusi2sh */
+ if ( s->evex.pfx == vex_66 )
+ s->fp16 = true;
+ break;
+
case 0x7c: /* vcvttph2{,u}w */
case 0x7d: /* vcvtph2{,u}w / vcvt{,u}w2ph */
d = DstReg | SrcMem | TwoOp;
@@ -1524,10 +1548,34 @@ int x86emul_decode(struct x86_emulate_st
switch ( b )
{
+ case 0x78:
+ case 0x79:
+ /* vcvt{,t}ph2u{d,q}q need special casing */
+ if ( s->evex.pfx <= vex_66 )
+ {
+ if ( !s->evex.brs )
+ disp8scale -= 1 + (s->evex.pfx == vex_66);
+ break;
+ }
+ /* vcvt{,t}sh2usi needs special casing: fall through */
+ case 0x2c: case 0x2d: /* vcvt{,t}sh2si need special casing */
+ disp8scale = 1;
+ break;
+
case 0x5a: /* vcvtph2pd needs special casing */
if ( !s->evex.pfx && !s->evex.brs )
disp8scale -= 2;
break;
+
+ case 0x5b: /* vcvt{,t}ph2dq need special casing */
+ if ( s->evex.pfx && !s->evex.brs )
+ --disp8scale;
+ break;
+
+ case 0x7a: case 0x7b: /* vcvt{,t}ph2qq need special casing */
+ if ( s->evex.pfx == vex_66 && !s->evex.brs )
+ disp8scale = s->evex.brs ? 1 : 2 + s->evex.lr;
+ break;
}
break;
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -3577,6 +3577,12 @@ x86_emulate(
state->simd_size = simd_none;
goto simd_0f_rm;
+#ifndef X86EMUL_NO_SIMD
+
+ case X86EMUL_OPC_EVEX_F3(5, 0x2a): /* vcvtsi2sh r/m,xmm,xmm */
+ case X86EMUL_OPC_EVEX_F3(5, 0x7b): /* vcvtusi2sh r/m,xmm,xmm */
+ host_and_vcpu_must_have(avx512_fp16);
+ /* fall through */
CASE_SIMD_SCALAR_FP(_EVEX, 0x0f, 0x2a): /* vcvtsi2s{s,d} r/m,xmm,xmm */
CASE_SIMD_SCALAR_FP(_EVEX, 0x0f, 0x7b): /* vcvtusi2s{s,d} r/m,xmm,xmm */
generate_exception_if(evex.opmsk || (ea.type != OP_REG && evex.brs),
@@ -3655,7 +3661,9 @@ x86_emulate(
opc[1] = 0x01;
rc = ops->read(ea.mem.seg, ea.mem.off, mmvalp,
- vex.pfx & VEX_PREFIX_DOUBLE_MASK ? 8 : 4, ctxt);
+ vex.pfx & VEX_PREFIX_DOUBLE_MASK
+ ? 8 : 2 << !state->fp16,
+ ctxt);
if ( rc != X86EMUL_OKAY )
goto done;
}
@@ -3685,6 +3693,12 @@ x86_emulate(
state->simd_size = simd_none;
break;
+ case X86EMUL_OPC_EVEX_F3(5, 0x2c): /* vcvttsh2si xmm/mem,reg */
+ case X86EMUL_OPC_EVEX_F3(5, 0x2d): /* vcvtsh2si xmm/mem,reg */
+ case X86EMUL_OPC_EVEX_F3(5, 0x78): /* vcvttsh2usi xmm/mem,reg */
+ case X86EMUL_OPC_EVEX_F3(5, 0x79): /* vcvtsh2usi xmm/mem,reg */
+ host_and_vcpu_must_have(avx512_fp16);
+ /* fall through */
CASE_SIMD_SCALAR_FP(_EVEX, 0x0f, 0x2c): /* vcvtts{s,d}2si xmm/mem,reg */
CASE_SIMD_SCALAR_FP(_EVEX, 0x0f, 0x2d): /* vcvts{s,d}2si xmm/mem,reg */
CASE_SIMD_SCALAR_FP(_EVEX, 0x0f, 0x78): /* vcvtts{s,d}2usi xmm/mem,reg */
@@ -3756,8 +3770,6 @@ x86_emulate(
ASSERT(!state->simd_size);
break;
-#ifndef X86EMUL_NO_SIMD
-
case X86EMUL_OPC_EVEX(5, 0x2e): /* vucomish xmm/m16,xmm */
case X86EMUL_OPC_EVEX(5, 0x2f): /* vcomish xmm/m16,xmm */
host_and_vcpu_must_have(avx512_fp16);
@@ -7787,6 +7799,38 @@ x86_emulate(
2 * evex.w);
goto avx512f_all_fp;
+ case X86EMUL_OPC_EVEX (5, 0x5b): /* vcvtdq2ph [xyz]mm/mem,[xy]mm{k} */
+ /* vcvtqq2ph [xyz]mm/mem,xmm{k} */
+ case X86EMUL_OPC_EVEX_F2(5, 0x7a): /* vcvtudq2ph [xyz]mm/mem,[xy]mm{k} */
+ /* vcvtuqq2ph [xyz]mm/mem,xmm{k} */
+ host_and_vcpu_must_have(avx512_fp16);
+ if ( ea.type != OP_REG || !evex.brs )
+ avx512_vlen_check(false);
+ op_bytes = 16 << evex.lr;
+ goto simd_zmm;
+
+ case X86EMUL_OPC_EVEX_66(5, 0x5b): /* vcvtph2dq [xy]mm/mem,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_F3(5, 0x5b): /* vcvttph2dq [xy]mm/mem,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX (5, 0x78): /* vcvttph2udq [xy]mm/mem,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX (5, 0x79): /* vcvtph2udq [xy]mm/mem,[xyz]mm{k} */
+ host_and_vcpu_must_have(avx512_fp16);
+ generate_exception_if(evex.w, EXC_UD);
+ if ( ea.type != OP_REG || !evex.brs )
+ avx512_vlen_check(false);
+ op_bytes = 8 << evex.lr;
+ goto simd_zmm;
+
+ case X86EMUL_OPC_EVEX_66(5, 0x78): /* vcvttph2uqq xmm/mem,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(5, 0x79): /* vcvtph2uqq xmm/mem,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(5, 0x7a): /* vcvttph2qq xmm/mem,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(5, 0x7b): /* vcvtph2qq xmm/mem,[xyz]mm{k} */
+ host_and_vcpu_must_have(avx512_fp16);
+ generate_exception_if(evex.w, EXC_UD);
+ if ( ea.type != OP_REG || !evex.brs )
+ avx512_vlen_check(false);
+ op_bytes = 4 << (evex.w + evex.lr);
+ goto simd_zmm;
+
case X86EMUL_OPC_EVEX (5, 0x7c): /* vcvttph2uw [xyz]mm/mem,[xyz]mm{k} */
case X86EMUL_OPC_EVEX_66(5, 0x7c): /* vcvttph2w [xyz]mm/mem,[xyz]mm{k} */
case X86EMUL_OPC_EVEX (5, 0x7d): /* vcvtph2uw [xyz]mm/mem,[xyz]mm{k} */