AVX512 Half-precision floating point (FP16) has better performance compared to FP32 if the presicion or magnitude requirements are met. It's defined as CPUID.(EAX=7,ECX=0):EDX[bit 23].
Refer to https://software.intel.com/content/www/us/en/develop/download/\ intel-architecture-instruction-set-extensions-programming-reference.html Signed-off-by: Cathy Zhang <cathy.zh...@intel.com> --- target/i386/cpu.c | 2 +- target/i386/cpu.h | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index ca997a68cd..c4d623b8b9 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -979,7 +979,7 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { "avx512-vp2intersect", NULL, "md-clear", NULL, NULL, NULL, "serialize", NULL, "tsx-ldtrk", NULL, NULL /* pconfig */, NULL, - NULL, NULL, NULL, NULL, + NULL, NULL, NULL, "avx512-fp16", NULL, NULL, "spec-ctrl", "stibp", NULL, "arch-capabilities", "core-capability", "ssbd", }, diff --git a/target/i386/cpu.h b/target/i386/cpu.h index c4a49c06a8..6fd675c654 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -784,6 +784,8 @@ typedef uint64_t FeatureWordArray[FEATURE_WORDS]; #define CPUID_7_0_EDX_SERIALIZE (1U << 14) /* TSX Suspend Load Address Tracking instruction */ #define CPUID_7_0_EDX_TSX_LDTRK (1U << 16) +/* AVX512_FP16 instruction */ +#define CPUID_7_0_EDX_AVX512_FP16 (1U << 23) /* Speculation Control */ #define CPUID_7_0_EDX_SPEC_CTRL (1U << 26) /* Single Thread Indirect Branch Predictors */ -- 2.17.1