On Wed, Jul 21, 2021 at 9:44 AM liuhongt <hongtao....@intel.com> wrote: > > From: "Guo, Xuepeng" <xuepeng....@intel.com> > > gcc/ChangeLog: > > * common/config/i386/cpuinfo.h (get_available_features): > Detect FEATURE_AVX512FP16. > * common/config/i386/i386-common.c > (OPTION_MASK_ISA_AVX512FP16_SET, > OPTION_MASK_ISA_AVX512FP16_UNSET, > OPTION_MASK_ISA2_AVX512FP16_SET, > OPTION_MASK_ISA2_AVX512FP16_UNSET): New. > (OPTION_MASK_ISA2_AVX512BW_UNSET, > OPTION_MASK_ISA2_AVX512BF16_UNSET): Add AVX512FP16. > (ix86_handle_option): Handle -mavx512fp16. > * common/config/i386/i386-cpuinfo.h (enum processor_features): > Add FEATURE_AVX512FP16. > * common/config/i386/i386-isas.h: Add entry for AVX512FP16. > * config.gcc: Add avx512fp16intrin.h. > * config/i386/avx512fp16intrin.h: New intrinsic header. > * config/i386/cpuid.h: Add bit_AVX512FP16. > * config/i386/i386-builtin-types.def: (FLOAT16): New primitive type. > * config/i386/i386-builtins.c: Support _Float16 type for i386 > backend. > (ix86_init_float16_builtins): New function. > (ix86_float16_type_node): New. > * config/i386/i386-c.c (ix86_target_macros_internal): Define > __AVX512FP16__. > * config/i386/i386-expand.c (ix86_expand_branch): Support > HFmode. > (ix86_prepare_fp_compare_args): Adjust TARGET_SSE_MATH && > SSE_FLOAT_MODE_P to SSE_FLOAT_MODE_SSEMATH_OR_HF_P. > (ix86_expand_fp_movcc): Ditto. > * config/i386/i386-isa.def: Add PTA define for AVX512FP16. > * config/i386/i386-options.c (isa2_opts): Add -mavx512fp16. > (ix86_valid_target_attribute_inner_p): Add avx512fp16 attribute. > * config/i386/i386.c (ix86_get_ssemov): Use > vmovdqu16/vmovw/vmovsh for HFmode/HImode scalar or vector. > (ix86_get_excess_precision): Use > FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16 when TARGET_AVX512FP16 > existed. > (output_387_binary_op): Update instruction suffix for HFmode. > (sse_store_index): Use SFmode cost for HFmode cost. > (inline_memory_move_cost): Add HFmode, and perfer SSE cost over > GPR cost for HFmode. > (ix86_hard_regno_mode_ok): Allow HImode in sse register. > (ix86_mangle_type): Add manlging for _Float16 type. > (inline_secondary_memory_needed): No memory is needed for > 16bit movement between gpr and sse reg under > TARGET_AVX512FP16. > (ix86_multiplication_cost): Adjust TARGET_SSE_MATH && > SSE_FLOAT_MODE_P to SSE_FLOAT_MODE_SSEMATH_OR_HF_P. > (ix86_division_cost): Ditto. > (ix86_rtx_costs): Ditto. > (ix86_add_stmt_cost): Ditto. > (ix86_optab_supported_p): Ditto. > * config/i386/i386.h (VALID_AVX512F_SCALAR_MODE): Add HFmode. > (SSE_FLOAT_MODE_SSEMATH_OR_HF_P): Add HFmode. > (SSE_FLOAT_MODE_P): Add HFmode. > (PTA_SAPPHIRERAPIDS): Add PTA_AVX512FP16. > * config/i386/i386.md (mode): Add HFmode. > (MODE_SIZE): Add HFmode. > (MODEFH): Likewise. > (ssemodesuffix): Add sh suffix for HFmode. > (cbranch<mode>4): Use MODEFH. > (<insn><mode>3): Likewise. > (mul<mode>3): Likewise. > (div<mode>3): Likewise. > (*ieee_s<ieee_maxmin><mode>3): Likewise. > (*cmpi<unord>hf): New define_insn for HFmode. > (*movhf_internal): Adjust for avx512fp16 instruction. > (extendhf<mode>2): Likewise. > (trunc<mode>hf2): Likewise. > (*fop_hf_comm): Likewise. > (*fop_hf_1): Likewise. > (float<floatunssuffix><mode>hf2): Likewise. > (mov<mode>cc): Likewise. > * config/i386/i386.opt: Add mavx512fp16. > * config/i386/immintrin.h: Include avx512fp16intrin.h. > * doc/invoke.texi: Add mavx512fp16. > > gcc/testsuite/ChangeLog: > > * gcc.target/i386/avx-1.c: Add -mavx512fp16 in dg-options. > * gcc.target/i386/avx-2.c: Ditto. > * gcc.target/i386/avx512-check.h: Check cpuid for AVX512FP16. > * gcc.target/i386/funcspec-56.inc: Add new target attribute check. > * gcc.target/i386/sse-13.c: Add -mavx512fp16. > * gcc.target/i386/sse-14.c: Ditto. > * gcc.target/i386/sse-22.c: Ditto. > * gcc.target/i386/sse-23.c: Ditto. > * lib/target-supports.exp: (check_effective_target_avx512fp16): New. > * g++.target/i386/float16-1.C: New test. > * g++.target/i386/float16-2.C: Ditto. > * g++.target/i386/float16-3.C: Ditto. > * gcc.target/i386/avx512fp16-12a.c: Ditto. > * gcc.target/i386/avx512fp16-12b.c: Ditto. > * gcc.target/i386/float16-3a.c: Ditto. > * gcc.target/i386/float16-3b.c: Ditto. > * gcc.target/i386/float16-4a.c: Ditto. > * gcc.target/i386/float16-4b.c: Ditto. > * gcc.target/i386/pr54855-12.c: Ditto. > * g++.dg/other/i386-2.C: Ditto. > * g++.dg/other/i386-3.C: Ditto. > > Co-Authored-By: Guo, Xuepeng <xuepeng....@intel.com> > Co-Authored-By: H.J. Lu <hongjiu...@intel.com> > Co-Authored-By: Liu, Hongtao <hongtao....@intel.com> > Co-Authored-By: Wang, Hongyu <hongyu.w...@intel.com> > Co-Authored-By: Xu, Dianhong <dianhong...@intel.com> > --- > gcc/common/config/i386/cpuinfo.h | 2 + > gcc/common/config/i386/i386-common.c | 26 ++- > gcc/common/config/i386/i386-cpuinfo.h | 1 + > gcc/common/config/i386/i386-isas.h | 1 + > gcc/config.gcc | 2 +- > gcc/config/i386/avx512fp16intrin.h | 53 +++++ > gcc/config/i386/cpuid.h | 1 + > gcc/config/i386/i386-builtin-types.def | 1 + > gcc/config/i386/i386-builtins.c | 23 +++ > gcc/config/i386/i386-c.c | 2 + > gcc/config/i386/i386-expand.c | 5 +- > gcc/config/i386/i386-isa.def | 1 + > gcc/config/i386/i386-options.c | 4 +- > gcc/config/i386/i386.c | 128 ++++++++---- > gcc/config/i386/i386.h | 11 +- > gcc/config/i386/i386.md | 185 ++++++++++++++---- > gcc/config/i386/i386.opt | 4 + > gcc/config/i386/immintrin.h | 4 + > gcc/doc/invoke.texi | 10 +- > gcc/testsuite/g++.dg/other/i386-2.C | 2 +- > gcc/testsuite/g++.dg/other/i386-3.C | 2 +- > gcc/testsuite/g++.target/i386/float16-1.C | 8 + > gcc/testsuite/g++.target/i386/float16-2.C | 14 ++ > gcc/testsuite/g++.target/i386/float16-3.C | 10 + > gcc/testsuite/gcc.target/i386/avx-1.c | 2 +- > gcc/testsuite/gcc.target/i386/avx-2.c | 2 +- > gcc/testsuite/gcc.target/i386/avx512-check.h | 3 + > .../gcc.target/i386/avx512fp16-12a.c | 21 ++ > .../gcc.target/i386/avx512fp16-12b.c | 27 +++ > gcc/testsuite/gcc.target/i386/float16-3a.c | 10 + > gcc/testsuite/gcc.target/i386/float16-3b.c | 10 + > gcc/testsuite/gcc.target/i386/float16-4a.c | 10 + > gcc/testsuite/gcc.target/i386/float16-4b.c | 10 + > gcc/testsuite/gcc.target/i386/funcspec-56.inc | 2 + > gcc/testsuite/gcc.target/i386/pr54855-12.c | 14 ++ > gcc/testsuite/gcc.target/i386/sse-13.c | 2 +- > gcc/testsuite/gcc.target/i386/sse-14.c | 2 +- > gcc/testsuite/gcc.target/i386/sse-22.c | 4 +- > gcc/testsuite/gcc.target/i386/sse-23.c | 2 +- > gcc/testsuite/lib/target-supports.exp | 13 +- > 40 files changed, 531 insertions(+), 103 deletions(-) > create mode 100644 gcc/config/i386/avx512fp16intrin.h > create mode 100644 gcc/testsuite/g++.target/i386/float16-1.C > create mode 100644 gcc/testsuite/g++.target/i386/float16-2.C > create mode 100644 gcc/testsuite/g++.target/i386/float16-3.C > create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-12a.c > create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-12b.c > create mode 100644 gcc/testsuite/gcc.target/i386/float16-3a.c > create mode 100644 gcc/testsuite/gcc.target/i386/float16-3b.c > create mode 100644 gcc/testsuite/gcc.target/i386/float16-4a.c > create mode 100644 gcc/testsuite/gcc.target/i386/float16-4b.c > create mode 100644 gcc/testsuite/gcc.target/i386/pr54855-12.c > > diff --git a/gcc/common/config/i386/cpuinfo.h > b/gcc/common/config/i386/cpuinfo.h > index 458f41de776..1835ac64e67 100644 > --- a/gcc/common/config/i386/cpuinfo.h > +++ b/gcc/common/config/i386/cpuinfo.h > @@ -731,6 +731,8 @@ get_available_features (struct __processor_model > *cpu_model, > set_feature (FEATURE_AVX5124FMAPS); > if (edx & bit_AVX512VP2INTERSECT) > set_feature (FEATURE_AVX512VP2INTERSECT); > + if (edx & bit_AVX512FP16) > + set_feature (FEATURE_AVX512FP16); > } > > __cpuid_count (7, 1, eax, ebx, ecx, edx); > diff --git a/gcc/common/config/i386/i386-common.c > b/gcc/common/config/i386/i386-common.c > index 76ab1a14e54..00c65ba15ab 100644 > --- a/gcc/common/config/i386/i386-common.c > +++ b/gcc/common/config/i386/i386-common.c > @@ -82,6 +82,8 @@ along with GCC; see the file COPYING3. If not see > #define OPTION_MASK_ISA2_AVX5124VNNIW_SET OPTION_MASK_ISA2_AVX5124VNNIW > #define OPTION_MASK_ISA_AVX512VBMI2_SET \ > (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512F_SET) > +#define OPTION_MASK_ISA_AVX512FP16_SET OPTION_MASK_ISA_AVX512BW_SET > +#define OPTION_MASK_ISA2_AVX512FP16_SET OPTION_MASK_ISA2_AVX512FP16 > #define OPTION_MASK_ISA_AVX512VNNI_SET \ > (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512F_SET) > #define OPTION_MASK_ISA2_AVXVNNI_SET OPTION_MASK_ISA2_AVXVNNI > @@ -231,6 +233,8 @@ along with GCC; see the file COPYING3. If not see > #define OPTION_MASK_ISA2_AVX5124FMAPS_UNSET OPTION_MASK_ISA2_AVX5124FMAPS > #define OPTION_MASK_ISA2_AVX5124VNNIW_UNSET OPTION_MASK_ISA2_AVX5124VNNIW > #define OPTION_MASK_ISA_AVX512VBMI2_UNSET OPTION_MASK_ISA_AVX512VBMI2 > +#define OPTION_MASK_ISA_AVX512FP16_UNSET OPTION_MASK_ISA_AVX512BW_UNSET > +#define OPTION_MASK_ISA2_AVX512FP16_UNSET OPTION_MASK_ISA2_AVX512FP16 > #define OPTION_MASK_ISA_AVX512VNNI_UNSET OPTION_MASK_ISA_AVX512VNNI > #define OPTION_MASK_ISA2_AVXVNNI_UNSET OPTION_MASK_ISA2_AVXVNNI > #define OPTION_MASK_ISA_AVX512VPOPCNTDQ_UNSET OPTION_MASK_ISA_AVX512VPOPCNTDQ > @@ -313,7 +317,8 @@ along with GCC; see the file COPYING3. If not see > (OPTION_MASK_ISA2_AVX512BF16_UNSET \ > | OPTION_MASK_ISA2_AVX5124FMAPS_UNSET \ > | OPTION_MASK_ISA2_AVX5124VNNIW_UNSET \ > - | OPTION_MASK_ISA2_AVX512VP2INTERSECT_UNSET) > + | OPTION_MASK_ISA2_AVX512VP2INTERSECT_UNSET \ > + | OPTION_MASK_ISA2_AVX512FP16_UNSET) > #define OPTION_MASK_ISA2_GENERAL_REGS_ONLY_UNSET \ > (OPTION_MASK_ISA2_AVX512F_UNSET) > #define OPTION_MASK_ISA2_AVX_UNSET OPTION_MASK_ISA2_AVX2_UNSET > @@ -326,7 +331,9 @@ along with GCC; see the file COPYING3. If not see > (OPTION_MASK_ISA2_SSE3_UNSET | OPTION_MASK_ISA2_KL_UNSET) > #define OPTION_MASK_ISA2_SSE_UNSET OPTION_MASK_ISA2_SSE2_UNSET > > -#define OPTION_MASK_ISA2_AVX512BW_UNSET OPTION_MASK_ISA2_AVX512BF16_UNSET > +#define OPTION_MASK_ISA2_AVX512BW_UNSET \ > + (OPTION_MASK_ISA2_AVX512BF16_UNSET \ > + | OPTION_MASK_ISA2_AVX512FP16_UNSET) > > /* Set 1 << value as value of -malign-FLAG option. */ > > @@ -853,6 +860,21 @@ ix86_handle_option (struct gcc_options *opts, > } > return true; > > + case OPT_mavx512fp16: > + if (value) > + { > + opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AVX512FP16_SET; > + opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVX512FP16_SET; > + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512FP16_SET; > + opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX512FP16_SET; > + } > + else > + { > + opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AVX512FP16_UNSET; > + opts->x_ix86_isa_flags2_explicit |= > OPTION_MASK_ISA2_AVX512FP16_UNSET; > + } > + return true; > + > case OPT_mavx512vnni: > if (value) > { > diff --git a/gcc/common/config/i386/i386-cpuinfo.h > b/gcc/common/config/i386/i386-cpuinfo.h > index e68dd656046..4e0659fc7b2 100644 > --- a/gcc/common/config/i386/i386-cpuinfo.h > +++ b/gcc/common/config/i386/i386-cpuinfo.h > @@ -228,6 +228,7 @@ enum processor_features > FEATURE_AESKLE, > FEATURE_WIDEKL, > FEATURE_AVXVNNI, > + FEATURE_AVX512FP16, > CPU_FEATURE_MAX > }; > > diff --git a/gcc/common/config/i386/i386-isas.h > b/gcc/common/config/i386/i386-isas.h > index 898c18f3dda..a6783660278 100644 > --- a/gcc/common/config/i386/i386-isas.h > +++ b/gcc/common/config/i386/i386-isas.h > @@ -169,4 +169,5 @@ ISA_NAMES_TABLE_START > ISA_NAMES_TABLE_ENTRY("aeskle", FEATURE_AESKLE, P_NONE, NULL) > ISA_NAMES_TABLE_ENTRY("widekl", FEATURE_WIDEKL, P_NONE, "-mwidekl") > ISA_NAMES_TABLE_ENTRY("avxvnni", FEATURE_AVXVNNI, P_NONE, "-mavxvnni") > + ISA_NAMES_TABLE_ENTRY("avx512fp16", FEATURE_AVX512FP16, P_NONE, > "-mavx512fp16") > ISA_NAMES_TABLE_END > diff --git a/gcc/config.gcc b/gcc/config.gcc > index 3df9b52cf25..a354351408c 100644 > --- a/gcc/config.gcc > +++ b/gcc/config.gcc > @@ -416,7 +416,7 @@ i[34567]86-*-* | x86_64-*-*) > tsxldtrkintrin.h amxtileintrin.h amxint8intrin.h > amxbf16intrin.h x86gprintrin.h uintrintrin.h > hresetintrin.h keylockerintrin.h avxvnniintrin.h > - mwaitintrin.h" > + mwaitintrin.h avx512fp16intrin.h" > ;; > ia64-*-*) > extra_headers=ia64intrin.h > diff --git a/gcc/config/i386/avx512fp16intrin.h > b/gcc/config/i386/avx512fp16intrin.h > new file mode 100644 > index 00000000000..38d63161ba6 > --- /dev/null > +++ b/gcc/config/i386/avx512fp16intrin.h > @@ -0,0 +1,53 @@ > +/* Copyright (C) 2019 Free Software Foundation, Inc. > + > + This file is part of GCC. > + > + GCC is free software; you can redistribute it and/or modify > + it under the terms of the GNU General Public License as published by > + the Free Software Foundation; either version 3, or (at your option) > + any later version. > + > + GCC is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + GNU General Public License for more details. > + > + Under Section 7 of GPL version 3, you are granted additional > + permissions described in the GCC Runtime Library Exception, version > + 3.1, as published by the Free Software Foundation. > + > + You should have received a copy of the GNU General Public License and > + a copy of the GCC Runtime Library Exception along with this program; > + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see > + <http://www.gnu.org/licenses/>. */ > + > +#ifndef _IMMINTRIN_H_INCLUDED > +#error "Never use <avx512fp16intrin.h> directly; include <immintrin.h> > instead." > +#endif > + > +#ifndef __AVX512FP16INTRIN_H_INCLUDED > +#define __AVX512FP16INTRIN_H_INCLUDED > + > +#ifndef __AVX512FP16__ > +#pragma GCC push_options > +#pragma GCC target("avx512fp16") > +#define __DISABLE_AVX512FP16__ > +#endif /* __AVX512FP16__ */ > + > +/* Internal data types for implementing the intrinsics. */ > +typedef _Float16 __v8hf __attribute__ ((__vector_size__ (16))); > +typedef _Float16 __v16hf __attribute__ ((__vector_size__ (32))); > +typedef _Float16 __v32hf __attribute__ ((__vector_size__ (64))); > + > +/* The Intel API is flexible enough that we must allow aliasing with other > + vector types, and their scalar components. */ > +typedef _Float16 __m128h __attribute__ ((__vector_size__ (16), > __may_alias__)); > +typedef _Float16 __m256h __attribute__ ((__vector_size__ (32), > __may_alias__)); > +typedef _Float16 __m512h __attribute__ ((__vector_size__ (64), > __may_alias__)); > + > +#ifdef __DISABLE_AVX512FP16__ > +#undef __DISABLE_AVX512FP16__ > +#pragma GCC pop_options > +#endif /* __DISABLE_AVX512FP16__ */ > + > +#endif /* __AVX512FP16INTRIN_H_INCLUDED */ > diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h > index aebc17c6827..82b8050028b 100644 > --- a/gcc/config/i386/cpuid.h > +++ b/gcc/config/i386/cpuid.h > @@ -126,6 +126,7 @@ > #define bit_AVX5124VNNIW (1 << 2) > #define bit_AVX5124FMAPS (1 << 3) > #define bit_AVX512VP2INTERSECT (1 << 8) > +#define bit_AVX512FP16 (1 << 23) > #define bit_IBT (1 << 20) > #define bit_UINTR (1 << 5) > #define bit_PCONFIG (1 << 18) > diff --git a/gcc/config/i386/i386-builtin-types.def > b/gcc/config/i386/i386-builtin-types.def > index 3ca313c19ec..1768b88d748 100644 > --- a/gcc/config/i386/i386-builtin-types.def > +++ b/gcc/config/i386/i386-builtin-types.def > @@ -68,6 +68,7 @@ DEF_PRIMITIVE_TYPE (UINT8, unsigned_char_type_node) > DEF_PRIMITIVE_TYPE (UINT16, short_unsigned_type_node) > DEF_PRIMITIVE_TYPE (INT64, long_long_integer_type_node) > DEF_PRIMITIVE_TYPE (UINT64, long_long_unsigned_type_node) > +DEF_PRIMITIVE_TYPE (FLOAT16, ix86_float16_type_node) > DEF_PRIMITIVE_TYPE (FLOAT, float_type_node) > DEF_PRIMITIVE_TYPE (DOUBLE, double_type_node) > DEF_PRIMITIVE_TYPE (FLOAT80, float80_type_node) > diff --git a/gcc/config/i386/i386-builtins.c b/gcc/config/i386/i386-builtins.c > index 204e2903126..668f09f12a0 100644 > --- a/gcc/config/i386/i386-builtins.c > +++ b/gcc/config/i386/i386-builtins.c > @@ -125,6 +125,7 @@ BDESC_VERIFYS (IX86_BUILTIN_MAX, > /* Table for the ix86 builtin non-function types. */ > static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1]; > > +tree ix86_float16_type_node = NULL_TREE; > /* Retrieve an element from the above table, building some of > the types lazily. */ > > @@ -1343,6 +1344,26 @@ ix86_init_builtins_va_builtins_abi (void) > BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv); > } > > +static void > +ix86_init_float16_builtins (void) > +{ > + /* Provide the _Float16 type and float16_type_node if needed so that > + it can be used in AVX512FP16 intrinsics and builtins. */ > + if (!float16_type_node) > + { > + ix86_float16_type_node = make_node (REAL_TYPE); > + TYPE_PRECISION (ix86_float16_type_node) = 16; > + SET_TYPE_MODE (ix86_float16_type_node, HFmode); > + layout_type (ix86_float16_type_node); > + } > + else > + ix86_float16_type_node = float16_type_node; > + > + if (!maybe_get_identifier ("_Float16") && TARGET_SSE2) > + lang_hooks.types.register_builtin_type (ix86_float16_type_node, > + "_Float16"); > +} > + > static void > ix86_init_builtin_types (void) > { > @@ -1371,6 +1392,8 @@ ix86_init_builtin_types (void) > it. */ > lang_hooks.types.register_builtin_type (float128_type_node, "__float128"); > > + ix86_init_float16_builtins (); > + > const_string_type_node > = build_pointer_type (build_qualified_type > (char_type_node, TYPE_QUAL_CONST)); > diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c > index 5ed0de006fb..cc64f855ecc 100644 > --- a/gcc/config/i386/i386-c.c > +++ b/gcc/config/i386/i386-c.c > @@ -598,6 +598,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, > def_or_undef (parse_in, "__PTWRITE__"); > if (isa_flag2 & OPTION_MASK_ISA2_AVX512BF16) > def_or_undef (parse_in, "__AVX512BF16__"); > + if (isa_flag2 & OPTION_MASK_ISA2_AVX512FP16) > + def_or_undef (parse_in, "__AVX512FP16__"); > if (TARGET_MMX_WITH_SSE) > def_or_undef (parse_in, "__MMX_WITH_SSE__"); > if (isa_flag2 & OPTION_MASK_ISA2_ENQCMD) > diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c > index 69ea79e6123..b7d050a1e42 100644 > --- a/gcc/config/i386/i386-expand.c > +++ b/gcc/config/i386/i386-expand.c > @@ -2314,6 +2314,7 @@ ix86_expand_branch (enum rtx_code code, rtx op0, rtx > op1, rtx label) > > switch (mode) > { > + case E_HFmode: > case E_SFmode: > case E_DFmode: > case E_XFmode: > @@ -2627,7 +2628,7 @@ ix86_prepare_fp_compare_args (enum rtx_code code, rtx > *pop0, rtx *pop1) > bool unordered_compare = ix86_unordered_fp_compare (code); > rtx op0 = *pop0, op1 = *pop1; > machine_mode op_mode = GET_MODE (op0); > - bool is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode); > + bool is_sse = SSE_FLOAT_MODE_SSEMATH_OR_HF_P (op_mode); > > /* All of the unordered compare instructions only work on registers. > The same is true of the fcomi compare instructions. The XFmode > @@ -4112,7 +4113,7 @@ ix86_expand_fp_movcc (rtx operands[]) > rtx op0 = XEXP (operands[1], 0); > rtx op1 = XEXP (operands[1], 1); > > - if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode)) > + if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode)) > { > machine_mode cmode; > > diff --git a/gcc/config/i386/i386-isa.def b/gcc/config/i386/i386-isa.def > index a0d46cbc892..83d9302ea3d 100644 > --- a/gcc/config/i386/i386-isa.def > +++ b/gcc/config/i386/i386-isa.def > @@ -108,3 +108,4 @@ DEF_PTA(HRESET) > DEF_PTA(KL) > DEF_PTA(WIDEKL) > DEF_PTA(AVXVNNI) > +DEF_PTA(AVX512FP16) > diff --git a/gcc/config/i386/i386-options.c b/gcc/config/i386/i386-options.c > index 3416a4f1752..df191763e4b 100644 > --- a/gcc/config/i386/i386-options.c > +++ b/gcc/config/i386/i386-options.c > @@ -223,7 +223,8 @@ static struct ix86_target_opts isa2_opts[] = > { "-mhreset", OPTION_MASK_ISA2_HRESET }, > { "-mkl", OPTION_MASK_ISA2_KL }, > { "-mwidekl", OPTION_MASK_ISA2_WIDEKL }, > - { "-mavxvnni", OPTION_MASK_ISA2_AVXVNNI } > + { "-mavxvnni", OPTION_MASK_ISA2_AVXVNNI }, > + { "-mavx512fp16", OPTION_MASK_ISA2_AVX512FP16 } > }; > static struct ix86_target_opts isa_opts[] = > { > @@ -1045,6 +1046,7 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree > args, char *p_strings[], > IX86_ATTR_ISA ("amx-bf16", OPT_mamx_bf16), > IX86_ATTR_ISA ("hreset", OPT_mhreset), > IX86_ATTR_ISA ("avxvnni", OPT_mavxvnni), > + IX86_ATTR_ISA ("avx512fp16", OPT_mavx512fp16), > > /* enum options */ > IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_), > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c > index 02628d838fc..e826484a4f4 100644 > --- a/gcc/config/i386/i386.c > +++ b/gcc/config/i386/i386.c > @@ -5497,6 +5497,14 @@ ix86_output_ssemov (rtx_insn *insn, rtx *operands) > case MODE_SI: > return "%vmovd\t{%1, %0|%0, %1}"; > > + case MODE_HI: > + if (GENERAL_REG_P (operands[0])) > + return "vmovw\t{%1, %k0|%k0, %1}"; > + else if (GENERAL_REG_P (operands[1])) > + return "vmovw\t{%k1, %0|%0, %k1}"; > + else > + return "vmovw\t{%1, %0|%0, %1}"; > + > case MODE_DF: > if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1])) > return "vmovsd\t{%d1, %0|%0, %d1}"; > @@ -5509,6 +5517,12 @@ ix86_output_ssemov (rtx_insn *insn, rtx *operands) > else > return "%vmovss\t{%1, %0|%0, %1}"; > > + case MODE_HF: > + if (REG_P (operands[0]) && REG_P (operands[1])) > + return "vmovsh\t{%d1, %0|%0, %d1}"; > + else > + return "vmovsh\t{%1, %0|%0, %1}"; > + > case MODE_V1DF: > gcc_assert (!TARGET_AVX); > return "movlpd\t{%1, %0|%0, %1}"; > @@ -13955,7 +13969,9 @@ output_387_binary_op (rtx_insn *insn, rtx *operands) > > if (is_sse) > { > - p = (GET_MODE (operands[0]) == SFmode) ? "ss" : "sd"; > + p = (GET_MODE (operands[0]) == HFmode > + ? "sh" > + : (GET_MODE (operands[0]) == SFmode ? "ss" : "sd")); > strcat (buf, p); > > if (TARGET_AVX) > @@ -19132,9 +19148,11 @@ inline_secondary_memory_needed (machine_mode mode, > reg_class_t class1, > if (!TARGET_SSE2) > return true; > > - /* Between SSE and general, we have moves no larger than word size. */ > + /* Between SSE and general, we have moves no larger than word size > + except for AVX512FP16, VMOVW enable 16bits movement. */ > if (!(INTEGER_CLASS_P (class1) || INTEGER_CLASS_P (class2)) > - || GET_MODE_SIZE (mode) < GET_MODE_SIZE (SImode) > + || GET_MODE_SIZE (mode) < GET_MODE_SIZE (TARGET_AVX512FP16 > + ? HImode : SImode) > || GET_MODE_SIZE (mode) > UNITS_PER_WORD) > return true;
Please recode the above to something like: if (!(INTEGER_CLASS_P (class1) || INTEGER_CLASS_P (class2)) return true; int msize = GET_MODE_SIZE (mode); /* Between SSE and general, we have moves no larger than word size. */ if (msize > UNITS_PER_WORD) return true; /* In addition to SImode moves, AVX512FP16 also enables HImode moves. */ int minsize = GET_MODE_SIZE (TARGET_AVX512FP16 ? HImode : SImode); if (msize < minsize) return true; > @@ -19229,21 +19247,26 @@ ix86_can_change_mode_class (machine_mode from, > machine_mode to, > static inline int > sse_store_index (machine_mode mode) > { > - switch (GET_MODE_SIZE (mode)) > - { > - case 4: > - return 0; > - case 8: > - return 1; > - case 16: > - return 2; > - case 32: > - return 3; > - case 64: > - return 4; > - default: > - return -1; > - } > + /* NB: Use SFmode cost for HFmode instead of adding HFmode load/store > + costs to processor_costs, which requires changes to all entries in > + processor cost table. */ > + if (mode == E_HFmode) > + mode = E_SFmode; > + switch (GET_MODE_SIZE (mode)) > + { > + case 4: > + return 0; > + case 8: > + return 1; > + case 16: > + return 2; > + case 32: > + return 3; > + case 64: > + return 4; > + default: > + return -1; > + } > } > > /* Return the cost of moving data of mode M between a > @@ -19270,6 +19293,7 @@ inline_memory_move_cost (machine_mode mode, enum > reg_class regclass, int in) > int index; > switch (mode) > { > + case E_HFmode: > case E_SFmode: > index = 0; > break; > @@ -19370,11 +19394,31 @@ inline_memory_move_cost (machine_mode mode, enum > reg_class regclass, int in) > } > break; > case 2: > - if (in == 2) > - return MAX (ix86_cost->hard_register.int_load[1], > - ix86_cost->hard_register.int_store[1]); > - return in ? ix86_cost->hard_register.int_load[1] > - : ix86_cost->hard_register.int_store[1]; > + { > + int cost; > + if (in == 2) > + cost = MAX (ix86_cost->hard_register.int_load[1], > + ix86_cost->hard_register.int_store[1]); > + else > + cost = in ? ix86_cost->hard_register.int_load[1] > + : ix86_cost->hard_register.int_store[1]; > + if (mode == E_HFmode) > + { > + /* Prefer SSE over GPR for HFmode. */ > + int sse_cost; > + int index = sse_store_index (mode); > + if (in == 2) > + sse_cost = MAX (ix86_cost->hard_register.sse_load[index], > + ix86_cost->hard_register.sse_store[index]); > + else > + sse_cost = (in > + ? ix86_cost->hard_register.sse_load [index] > + : ix86_cost->hard_register.sse_store [index]); > + if (sse_cost >= cost) > + cost = sse_cost + 1; > + } > + return cost; > + } > default: > if (in == 2) > cost = MAX (ix86_cost->hard_register.int_load[2], > @@ -19548,6 +19592,8 @@ ix86_hard_regno_mode_ok (unsigned int regno, > machine_mode mode) > - XI mode > - any of 512-bit wide vector mode > - any scalar mode. */ > + /* For AVX512FP16, vmovw supports movement of HImode > + between gpr and sse registser. */ > if (TARGET_AVX512F > && (mode == XImode > || VALID_AVX512F_REG_MODE (mode) > @@ -19833,7 +19879,7 @@ ix86_multiplication_cost (const struct > processor_costs *cost, > if (VECTOR_MODE_P (mode)) > inner_mode = GET_MODE_INNER (mode); > > - if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) > + if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode)) > return inner_mode == DFmode ? cost->mulsd : cost->mulss; > else if (X87_FLOAT_MODE_P (mode)) > return cost->fmul; > @@ -19885,7 +19931,7 @@ ix86_division_cost (const struct processor_costs > *cost, > if (VECTOR_MODE_P (mode)) > inner_mode = GET_MODE_INNER (mode); > > - if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) > + if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode)) > return inner_mode == DFmode ? cost->divsd : cost->divss; > else if (X87_FLOAT_MODE_P (mode)) > return cost->fdiv; > @@ -20305,7 +20351,7 @@ ix86_rtx_costs (rtx x, machine_mode mode, int > outer_code_i, int opno, > return true; > } > > - if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) > + if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode)) > { > *total = cost->addss; > return false; > @@ -20338,7 +20384,7 @@ ix86_rtx_costs (rtx x, machine_mode mode, int > outer_code_i, int opno, > /* FALLTHRU */ > > case NEG: > - if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) > + if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode)) > { > *total = cost->sse_op; > return false; > @@ -20420,14 +20466,14 @@ ix86_rtx_costs (rtx x, machine_mode mode, int > outer_code_i, int opno, > return false; > > case FLOAT_EXTEND: > - if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)) > + if (!SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode)) > *total = 0; > else > *total = ix86_vec_cost (mode, cost->addss); > return false; > > case FLOAT_TRUNCATE: > - if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)) > + if (!SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode)) > *total = cost->fadd; > else > *total = ix86_vec_cost (mode, cost->addss); > @@ -20437,7 +20483,7 @@ ix86_rtx_costs (rtx x, machine_mode mode, int > outer_code_i, int opno, > /* SSE requires memory load for the constant operand. It may make > sense to account for this. Of course the constant operand may or > may not be reused. */ > - if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) > + if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode)) > *total = cost->sse_op; > else if (X87_FLOAT_MODE_P (mode)) > *total = cost->fabs; > @@ -20446,7 +20492,7 @@ ix86_rtx_costs (rtx x, machine_mode mode, int > outer_code_i, int opno, > return false; > > case SQRT: > - if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) > + if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode)) > *total = mode == SFmode ? cost->sqrtss : cost->sqrtsd; > else if (X87_FLOAT_MODE_P (mode)) > *total = cost->fsqrt; > @@ -21930,6 +21976,10 @@ ix86_mangle_type (const_tree type) > > switch (TYPE_MODE (type)) > { > + case E_HFmode: > + /* _Float16 is "DF16_". > + Align with clang's decision in https://reviews.llvm.org/D33719. */ > + return "DF16_"; > case E_TFmode: > /* __float128 is "g". */ > return "g"; > @@ -22553,7 +22603,7 @@ ix86_add_stmt_cost (class vec_info *vinfo, void > *data, int count, > case MINUS_EXPR: > if (kind == scalar_stmt) > { > - if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) > + if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode)) > stmt_cost = ix86_cost->addss; > else if (X87_FLOAT_MODE_P (mode)) > stmt_cost = ix86_cost->fadd; > @@ -22571,7 +22621,7 @@ ix86_add_stmt_cost (class vec_info *vinfo, void > *data, int count, > stmt_cost = ix86_multiplication_cost (ix86_cost, mode); > break; > case NEGATE_EXPR: > - if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) > + if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode)) > stmt_cost = ix86_cost->sse_op; > else if (X87_FLOAT_MODE_P (mode)) > stmt_cost = ix86_cost->fchs; > @@ -22627,7 +22677,7 @@ ix86_add_stmt_cost (class vec_info *vinfo, void > *data, int count, > case BIT_XOR_EXPR: > case BIT_AND_EXPR: > case BIT_NOT_EXPR: > - if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) > + if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode)) > stmt_cost = ix86_cost->sse_op; > else if (VECTOR_MODE_P (mode)) > stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op); > @@ -23233,8 +23283,7 @@ ix86_optab_supported_p (int op, machine_mode mode1, > machine_mode, > return opt_type == OPTIMIZE_FOR_SPEED; > > case rint_optab: > - if (SSE_FLOAT_MODE_P (mode1) > - && TARGET_SSE_MATH > + if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode1) > && !flag_trapping_math > && !TARGET_SSE4_1) The above change is wrong. The condition is enabled for !TARGET_SSE4_1, so it never triggers for TARGET_AVX512FP16. > return opt_type == OPTIMIZE_FOR_SPEED; > @@ -23243,8 +23292,7 @@ ix86_optab_supported_p (int op, machine_mode mode1, > machine_mode, > case floor_optab: > case ceil_optab: > case btrunc_optab: > - if (SSE_FLOAT_MODE_P (mode1) > - && TARGET_SSE_MATH > + if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode1) > && !flag_trapping_math > && TARGET_SSE4_1) > return true; > @@ -23329,7 +23377,9 @@ ix86_get_excess_precision (enum excess_precision_type > type) > /* The fastest type to promote to will always be the native type, > whether that occurs with implicit excess precision or > otherwise. */ > - return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT; > + return TARGET_AVX512FP16 > + ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16 > + : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT; > case EXCESS_PRECISION_TYPE_STANDARD: > case EXCESS_PRECISION_TYPE_IMPLICIT: > /* Otherwise, the excess precision we want when we are > diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h > index e21922e8782..dca2ad32ed4 100644 > --- a/gcc/config/i386/i386.h > +++ b/gcc/config/i386/i386.h > @@ -1000,7 +1000,8 @@ extern const char *host_detect_local_cpu (int argc, > const char **argv); > > #define VALID_AVX512F_SCALAR_MODE(MODE) > \ > ((MODE) == DImode || (MODE) == DFmode || (MODE) == SImode \ > - || (MODE) == SFmode) > + || (MODE) == SFmode \ > + || (((MODE) == HImode || (MODE) == HFmode) && TARGET_AVX512FP16)) Please put TARGET_... in front of the condition. > #define VALID_AVX512F_REG_MODE(MODE) \ > ((MODE) == V8DImode || (MODE) == V8DFmode || (MODE) == V64QImode \ > @@ -1039,7 +1040,7 @@ extern const char *host_detect_local_cpu (int argc, > const char **argv); > > #define VALID_FP_MODE_P(MODE) \ > ((MODE) == SFmode || (MODE) == DFmode || (MODE) == XFmode \ > - || (MODE) == SCmode || (MODE) == DCmode || (MODE) == XCmode) > \ > + || (MODE) == SCmode || (MODE) == DCmode || (MODE) == XCmode) > > #define VALID_INT_MODE_P(MODE) \ > ((MODE) == QImode || (MODE) == HImode > \ > @@ -1071,6 +1072,10 @@ extern const char *host_detect_local_cpu (int argc, > const char **argv); > #define SSE_FLOAT_MODE_P(MODE) \ > ((TARGET_SSE && (MODE) == SFmode) || (TARGET_SSE2 && (MODE) == DFmode)) > > +#define SSE_FLOAT_MODE_SSEMATH_OR_HF_P(MODE) \ > + ((SSE_FLOAT_MODE_P (MODE) && TARGET_SSE_MATH) > \ > + || (TARGET_AVX512FP16 && (MODE) == HFmode)) > + > #define FMA4_VEC_FLOAT_MODE_P(MODE) \ > (TARGET_FMA4 && ((MODE) == V4SFmode || (MODE) == V2DFmode \ > || (MODE) == V8SFmode || (MODE) == V4DFmode)) > @@ -2264,7 +2269,7 @@ constexpr wide_int_bitmask PTA_TIGERLAKE = > PTA_ICELAKE_CLIENT | PTA_MOVDIRI > constexpr wide_int_bitmask PTA_SAPPHIRERAPIDS = PTA_COOPERLAKE | PTA_MOVDIRI > | PTA_MOVDIR64B | PTA_AVX512VP2INTERSECT | PTA_ENQCMD | PTA_CLDEMOTE > | PTA_PTWRITE | PTA_WAITPKG | PTA_SERIALIZE | PTA_TSXLDTRK | PTA_AMX_TILE > - | PTA_AMX_INT8 | PTA_AMX_BF16 | PTA_UINTR | PTA_AVXVNNI; > + | PTA_AMX_INT8 | PTA_AMX_BF16 | PTA_UINTR | PTA_AVXVNNI | PTA_AVX512FP16; > constexpr wide_int_bitmask PTA_KNL = PTA_BROADWELL | PTA_AVX512PF > | PTA_AVX512ER | PTA_AVX512F | PTA_AVX512CD | PTA_PREFETCHWT1; > constexpr wide_int_bitmask PTA_BONNELL = PTA_CORE2 | PTA_MOVBE; > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md > index dd991c3ffdf..8f11cbcf28b 100644 > --- a/gcc/config/i386/i386.md > +++ b/gcc/config/i386/i386.md > @@ -496,7 +496,7 @@ (define_attr "type" > > ;; Main data type used by the insn > (define_attr "mode" > - "unknown,none,QI,HI,SI,DI,TI,OI,XI,SF,DF,XF,TF,V16SF,V8SF,V4DF,V4SF, > + "unknown,none,QI,HI,SI,DI,TI,OI,XI,HF,SF,DF,XF,TF,V16SF,V8SF,V4DF,V4SF, > V2DF,V2SF,V1DF,V8DF" > (const_string "unknown")) > > @@ -832,8 +832,7 @@ (define_attr "isa" > "base,x64,nox64,x64_sse2,x64_sse4,x64_sse4_noavx, > sse_noavx,sse2,sse2_noavx,sse3,sse3_noavx,sse4,sse4_noavx, > avx,noavx,avx2,noavx2,bmi,bmi2,fma4,fma,avx512f,noavx512f, > avx512bw,noavx512bw,avx512dq,noavx512dq, > - avx512vl,noavx512vl, > - avxvnni,avx512vnnivl" > + avx512vl,noavx512vl,avxvnni,avx512vnnivl,avx512fp16" > (const_string "base")) > > ;; Define instruction set of MMX instructions > @@ -885,7 +884,8 @@ (define_attr "enabled" "" > (eq_attr "isa" "avxvnni") (symbol_ref "TARGET_AVXVNNI") > (eq_attr "isa" "avx512vnnivl") > (symbol_ref "TARGET_AVX512VNNI && TARGET_AVX512VL") > - > + (eq_attr "isa" "avx512fp16") > + (symbol_ref "TARGET_AVX512FP16") Space here between "isa" and "mmx_isa" attribute processing. > (eq_attr "mmx_isa" "native") > (symbol_ref "!TARGET_MMX_WITH_SSE") > (eq_attr "mmx_isa" "sse") > @@ -1089,8 +1089,9 @@ (define_mode_iterator SWI48DWI [SI DI (TI > "TARGET_64BIT")]) > ;; compile time constant, it is faster to use <MODE_SIZE> than > ;; GET_MODE_SIZE (<MODE>mode). For XFmode which depends on > ;; command line options just use GET_MODE_SIZE macro. > -(define_mode_attr MODE_SIZE [(QI "1") (HI "2") (SI "4") (DI "8") (TI "16") > - (SF "4") (DF "8") (XF "GET_MODE_SIZE (XFmode)") > +(define_mode_attr MODE_SIZE [(QI "1") (HI "2") (SI "4") (DI "8") > + (TI "16") (HF "2") (SF "4") (DF "8") > + (XF "GET_MODE_SIZE (XFmode)") > (V16QI "16") (V32QI "32") (V64QI "64") > (V8HI "16") (V16HI "32") (V32HI "64") > (V4SI "16") (V8SI "32") (V16SI "64") > @@ -1222,8 +1223,11 @@ (define_mode_iterator MODEF [SF DF]) > ;; All x87 floating point modes > (define_mode_iterator X87MODEF [SF DF XF]) > > -;; All x87 floating point modes plus HF > -(define_mode_iterator X87MODEFH [SF DF XF HF]) > +;; SSE and x87 SFmode and DFmode floating point modes plus HFmode > +(define_mode_iterator MODEFH [(HF "TARGET_AVX512FP16") SF DF]) > + > +;; All x87 floating point modes plus HFmode > +(define_mode_iterator X87MODEFH [HF SF DF XF]) A general remark: Please avoiding macroization of HFmode patterns for now. MODEF macro is used for cases where modes are shared between x87 and SSE, so the patterns have: TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH). Looking at the macroization gain, it looks to me that we get nothing but complications with conditional MODEFH iterator. So, please remove all HFmode macroization (incuding mode attributes) and simply add a couple of expanders, protected with TARGET_AVX512FP16 insn constraint. We can macroize newly added patterns with existing in future, but please not now. Uros. > ;; All SSE floating point modes > (define_mode_iterator SSEMODEF [SF DF TF]) > @@ -1231,7 +1235,7 @@ (define_mode_attr ssevecmodef [(SF "V4SF") (DF "V2DF") > (TF "TF")]) > > ;; SSE instruction suffix for various modes > (define_mode_attr ssemodesuffix > - [(SF "ss") (DF "sd") > + [(HF "sh") (SF "ss") (DF "sd") > (V16SF "ps") (V8DF "pd") > (V8SF "ps") (V4DF "pd") > (V4SF "ps") (V2DF "pd") > @@ -1498,15 +1502,15 @@ (define_expand "cstorexf4" > > (define_expand "cbranch<mode>4" > [(set (reg:CC FLAGS_REG) > - (compare:CC (match_operand:MODEF 1 "cmp_fp_expander_operand") > - (match_operand:MODEF 2 "cmp_fp_expander_operand"))) > + (compare:CC (match_operand:MODEFH 1 "cmp_fp_expander_operand") > + (match_operand:MODEFH 2 "cmp_fp_expander_operand"))) > (set (pc) (if_then_else > (match_operator 0 "ix86_fp_comparison_operator" > [(reg:CC FLAGS_REG) > (const_int 0)]) > (label_ref (match_operand 3)) > (pc)))] > - "TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)" > + "TARGET_80387 || SSE_FLOAT_MODE_SSEMATH_OR_HF_P (<MODE>mode)" > { > ix86_expand_branch (GET_CODE (operands[0]), > operands[1], operands[2], operands[3]); > @@ -1705,6 +1709,17 @@ (define_insn "*cmpi<unord><MODEF:mode>" > (eq_attr "alternative" "0") > (symbol_ref "true") > (symbol_ref "false"))))]) > + > +(define_insn "*cmpi<unord>hf" > + [(set (reg:CCFP FLAGS_REG) > + (compare:CCFP > + (match_operand:HF 0 "register_operand" "v") > + (match_operand:HF 1 "nonimmediate_operand" "vm")))] > + "TARGET_AVX512FP16" > + "v<unord>comish\t{%1, %0|%0, %1}" > + [(set_attr "type" "ssecomi") > + (set_attr "prefix" "evex") > + (set_attr "mode" "HF")]) > > ;; Push/pop instructions. > > @@ -2436,8 +2451,8 @@ (define_insn "*movsi_internal" > (symbol_ref "true")))]) > > (define_insn "*movhi_internal" > - [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r ,r ,m ,*k,*k > ,*r,*m,*k") > - (match_operand:HI 1 "general_operand" "r > ,rn,rm,rn,*r,*km,*k,*k,CBC"))] > + [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r ,r ,m ,*k,*k > ,*r,*m,*k,?r,?v,*v,*v,*m") > + (match_operand:HI 1 "general_operand" "r > ,rn,rm,rn,*r,*km,*k,*k,CBC,v, r, v, m, v"))] > "!(MEM_P (operands[0]) && MEM_P (operands[1])) > && ix86_hardreg_mov_ok (operands[0], operands[1])" > > @@ -2463,6 +2478,9 @@ (define_insn "*movhi_internal" > gcc_unreachable (); > } > > + case TYPE_SSEMOV: > + return ix86_output_ssemov (insn, operands); > + > case TYPE_MSKLOG: > if (operands[1] == const0_rtx) > return "kxorw\t%0, %0, %0"; > @@ -2478,7 +2496,9 @@ (define_insn "*movhi_internal" > } > } > [(set (attr "type") > - (cond [(eq_attr "alternative" "4,5,6,7") > + (cond [(eq_attr "alternative" "9,10,11,12,13") > + (const_string "ssemov") > + (eq_attr "alternative" "4,5,6,7") > (const_string "mskmov") > (eq_attr "alternative" "8") > (const_string "msklog") > @@ -2503,6 +2523,8 @@ (define_insn "*movhi_internal" > (set (attr "mode") > (cond [(eq_attr "type" "imovx") > (const_string "SI") > + (eq_attr "alternative" "11") > + (const_string "HF") > (and (eq_attr "alternative" "1,2") > (match_operand:HI 1 "aligned_operand")) > (const_string "SI") > @@ -2511,7 +2533,12 @@ (define_insn "*movhi_internal" > (not (match_test "TARGET_HIMODE_MATH")))) > (const_string "SI") > ] > - (const_string "HI")))]) > + (const_string "HI"))) > + (set (attr "isa") > + (cond [(eq_attr "alternative" "9,10,11,12,13") > + (const_string "avx512fp16") > + ] > + (const_string "*")))]) Attribute ISA should be the first in attribute section, see many examples. > ;; Situation is quite tricky about when to choose full sized (SImode) move > ;; over QImode moves. For Q_REG -> Q_REG move we use full size only for > @@ -3727,7 +3754,10 @@ (define_insn "*movhf_internal" > (eq_attr "alternative" "2") > (const_string "sselog1") > (eq_attr "alternative" "4,5,6,7") > - (const_string "sselog") > + (if_then_else > + (match_test ("TARGET_AVX512FP16")) > + (const_string "ssemov") > + (const_string "sselog")) > ] > (const_string "ssemov"))) > (set (attr "memory") > @@ -3750,9 +3780,15 @@ (define_insn "*movhf_internal" > (eq_attr "alternative" "2") > (const_string "V4SF") > (eq_attr "alternative" "4,5,6,7") > - (const_string "TI") > + (if_then_else > + (match_test "TARGET_AVX512FP16") > + (const_string "HI") > + (const_string "TI")) > (eq_attr "alternative" "3") > - (const_string "SF") > + (if_then_else > + (match_test "TARGET_AVX512FP16") > + (const_string "HF") > + (const_string "SF")) > ] > (const_string "*")))]) > > @@ -4493,6 +4529,17 @@ (define_split > emit_move_insn (operands[0], CONST0_RTX (V2DFmode)); > }) > > +(define_insn "extendhf<mode>2" > + [(set (match_operand:MODEF 0 "nonimm_ssenomem_operand" "=v") > + (float_extend:MODEF > + (match_operand:HF 1 "nonimmediate_operand" "vm")))] > + "TARGET_AVX512FP16" > + "vcvtsh2<ssemodesuffix>\t{%1, %0, %0|%0, %0, %1}" > + [(set_attr "type" "ssecvt") > + (set_attr "prefix" "evex") > + (set_attr "mode" "<MODE>")]) > + > + > (define_expand "extend<mode>xf2" > [(set (match_operand:XF 0 "nonimmediate_operand") > (float_extend:XF (match_operand:MODEF 1 "general_operand")))] > @@ -4670,6 +4717,18 @@ (define_insn "truncxf<mode>2" > (symbol_ref "flag_unsafe_math_optimizations") > ] > (symbol_ref "true")))]) > + > +;; Conversion from {SF,DF}mode to HFmode. > + > +(define_insn "trunc<mode>hf2" > + [(set (match_operand:HF 0 "register_operand" "=v") > + (float_truncate:HF > + (match_operand:MODEF 1 "nonimmediate_operand" "vm")))] > + "TARGET_AVX512FP16" > + "vcvt<ssemodesuffix>2sh\t{%1, %d0|%d0, %1}" > + [(set_attr "type" "ssecvt") > + (set_attr "prefix" "evex") > + (set_attr "mode" "HF")]) > > ;; Signed conversion to DImode. > > @@ -5046,6 +5105,16 @@ (define_insn "*float<SWI48:mode><MODEF:mode>2" > (symbol_ref "TARGET_INTER_UNIT_CONVERSIONS")] > (symbol_ref "true")))]) > > +(define_insn "float<floatunssuffix><mode>hf2" > + [(set (match_operand:HF 0 "register_operand" "=v") > + (any_float:HF > + (match_operand:SWI48 1 "nonimmediate_operand" "rm")))] > + "TARGET_AVX512FP16" > + "vcvt<floatsuffix>si2sh<rex64suffix>\t{%1, %d0|%d0, %1}" > + [(set_attr "type" "sseicvt") > + (set_attr "prefix" "evex") > + (set_attr "mode" "HF")]) > + > (define_insn "*floatdi<MODEF:mode>2_i387" > [(set (match_operand:MODEF 0 "register_operand" "=f") > (float:MODEF (match_operand:DI 1 "nonimmediate_operand" "m")))] > @@ -7627,12 +7696,12 @@ (define_expand "<insn>xf3" > "TARGET_80387") > > (define_expand "<insn><mode>3" > - [(set (match_operand:MODEF 0 "register_operand") > - (plusminus:MODEF > - (match_operand:MODEF 1 "register_operand") > - (match_operand:MODEF 2 "nonimmediate_operand")))] > + [(set (match_operand:MODEFH 0 "register_operand") > + (plusminus:MODEFH > + (match_operand:MODEFH 1 "register_operand") > + (match_operand:MODEFH 2 "nonimmediate_operand")))] > "(TARGET_80387 && X87_ENABLE_ARITH (<MODE>mode)) > - || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)") > + || SSE_FLOAT_MODE_SSEMATH_OR_HF_P (<MODE>mode)") > > ;; Multiply instructions > > @@ -8204,11 +8273,11 @@ (define_expand "mulxf3" > "TARGET_80387") > > (define_expand "mul<mode>3" > - [(set (match_operand:MODEF 0 "register_operand") > - (mult:MODEF (match_operand:MODEF 1 "register_operand") > - (match_operand:MODEF 2 "nonimmediate_operand")))] > + [(set (match_operand:MODEFH 0 "register_operand") > + (mult:MODEFH (match_operand:MODEFH 1 "register_operand") > + (match_operand:MODEFH 2 "nonimmediate_operand")))] > "(TARGET_80387 && X87_ENABLE_ARITH (<MODE>mode)) > - || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)") > + || SSE_FLOAT_MODE_SSEMATH_OR_HF_P (<MODE>mode)") > > ;; Divide instructions > > @@ -8221,11 +8290,11 @@ (define_expand "divxf3" > "TARGET_80387") > > (define_expand "div<mode>3" > - [(set (match_operand:MODEF 0 "register_operand") > - (div:MODEF (match_operand:MODEF 1 "register_operand") > - (match_operand:MODEF 2 "nonimmediate_operand")))] > + [(set (match_operand:MODEFH 0 "register_operand") > + (div:MODEFH (match_operand:MODEFH 1 "register_operand") > + (match_operand:MODEFH 2 "nonimmediate_operand")))] > "(TARGET_80387 && X87_ENABLE_ARITH (<MODE>mode)) > - || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)" > + || SSE_FLOAT_MODE_SSEMATH_OR_HF_P (<MODE>mode)" > { > if (<MODE>mode == SFmode > && TARGET_SSE && TARGET_SSE_MATH > @@ -16312,6 +16381,22 @@ (define_insn "*fop_<mode>_comm" > (symbol_ref "true") > (symbol_ref "false"))))]) > > +(define_insn "*fop_hf_comm" > + [(set (match_operand:HF 0 "register_operand" "=v") > + (match_operator:HF 3 "binary_fp_operator" > + [(match_operand:HF 1 "nonimmediate_operand" "%v") > + (match_operand:HF 2 "nonimmediate_operand" "vm")]))] > + "TARGET_AVX512FP16 > + && COMMUTATIVE_ARITH_P (operands[3]) > + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" > + "* return output_387_binary_op (insn, operands);" > + [(set (attr "type") > + (if_then_else (match_operand:HF 3 "mult_operator") > + (const_string "ssemul") > + (const_string "sseadd"))) > + (set_attr "prefix" "evex") > + (set_attr "mode" "HF")]) > + > (define_insn "*rcpsf2_sse" > [(set (match_operand:SF 0 "register_operand" "=x,x,x") > (unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "0,x,m")] > @@ -16385,6 +16470,22 @@ (define_insn "*fop_<mode>_1" > (symbol_ref "true") > (symbol_ref "false"))))]) > > +(define_insn "*fop_hf_1" > + [(set (match_operand:HF 0 "register_operand" "=v") > + (match_operator:HF 3 "binary_fp_operator" > + [(match_operand:HF 1 "nonimmediate_operand" "v") > + (match_operand:HF 2 "nonimmediate_operand" "vm")]))] > + "TARGET_AVX512FP16 > + && !COMMUTATIVE_ARITH_P (operands[3]) > + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" > + "* return output_387_binary_op (insn, operands);" > + [(set (attr "type") > + (if_then_else (match_operand:MODEF 3 "div_operator") > + (const_string "ssediv") > + (const_string "sseadd"))) > + (set_attr "prefix" "evex") > + (set_attr "mode" "<MODE>")]) > + > (define_insn "*fop_<X87MODEF:mode>_2_i387" > [(set (match_operand:X87MODEF 0 "register_operand" "=f") > (match_operator:X87MODEF 3 "binary_fp_operator" > @@ -19179,13 +19280,13 @@ (define_peephole2 > }) > > (define_expand "mov<mode>cc" > - [(set (match_operand:X87MODEF 0 "register_operand") > - (if_then_else:X87MODEF > + [(set (match_operand:X87MODEFH 0 "register_operand") > + (if_then_else:X87MODEFH > (match_operand 1 "comparison_operator") > - (match_operand:X87MODEF 2 "register_operand") > - (match_operand:X87MODEF 3 "register_operand")))] > + (match_operand:X87MODEFH 2 "register_operand") > + (match_operand:X87MODEFH 3 "register_operand")))] > "(TARGET_80387 && TARGET_CMOVE) > - || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)" > + || SSE_FLOAT_MODE_SSEMATH_OR_HF_P (<MODE>mode)" > "if (ix86_expand_fp_movcc (operands)) DONE; else FAIL;") > > (define_insn "*movxfcc_1" > @@ -19347,12 +19448,12 @@ (define_insn "<code><mode>3" > ;; presence of -0.0 and NaN. > > (define_insn "*ieee_s<ieee_maxmin><mode>3" > - [(set (match_operand:MODEF 0 "register_operand" "=x,v") > - (unspec:MODEF > - [(match_operand:MODEF 1 "register_operand" "0,v") > - (match_operand:MODEF 2 "nonimmediate_operand" "xm,vm")] > + [(set (match_operand:MODEFH 0 "register_operand" "=x,v") > + (unspec:MODEFH > + [(match_operand:MODEFH 1 "register_operand" "0,v") > + (match_operand:MODEFH 2 "nonimmediate_operand" "xm,vm")] > IEEE_MAXMIN))] > - "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH" > + "SSE_FLOAT_MODE_SSEMATH_OR_HF_P (<MODE>mode)" > "@ > <ieee_maxmin><ssemodesuffix>\t{%2, %0|%0, %2} > v<ieee_maxmin><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" > diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt > index 7b8547bb1c3..ad366974b5b 100644 > --- a/gcc/config/i386/i386.opt > +++ b/gcc/config/i386/i386.opt > @@ -1166,3 +1166,7 @@ Emit GNU_PROPERTY_X86_ISA_1_NEEDED GNU property. > mmwait > Target Mask(ISA2_MWAIT) Var(ix86_isa_flags2) Save > Support MWAIT and MONITOR built-in functions and code generation. > + > +mavx512fp16 > +Target Mask(ISA2_AVX512FP16) Var(ix86_isa_flags2) Save > +Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, AVX512F and > AVX512FP16 built-in functions and code generation. > diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h > index f129de4bbe5..2421a78637b 100644 > --- a/gcc/config/i386/immintrin.h > +++ b/gcc/config/i386/immintrin.h > @@ -94,6 +94,10 @@ > > #include <avx512vp2intersectvlintrin.h> > > +#ifdef __SSE2__ > +#include <avx512fp16intrin.h> > +#endif > + > #include <shaintrin.h> > > #include <fmaintrin.h> > diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi > index 32697e6117c..bb9f7ca956e 100644 > --- a/gcc/doc/invoke.texi > +++ b/gcc/doc/invoke.texi > @@ -1393,6 +1393,7 @@ See RS/6000 and PowerPC Options. > -mavx5124fmaps -mavx512vnni -mavx5124vnniw -mprfchw -mrdpid @gol > -mrdseed -msgx -mavx512vp2intersect -mserialize -mtsxldtrk@gol > -mamx-tile -mamx-int8 -mamx-bf16 -muintr -mhreset -mavxvnni@gol > +-mavx512fp16 @gol > -mcldemote -mms-bitfields -mno-align-stringops -minline-all-stringops @gol > -minline-stringops-dynamically -mstringop-strategy=@var{alg} @gol > -mkl -mwidekl @gol > @@ -31154,6 +31155,9 @@ preferred alignment to > @option{-mpreferred-stack-boundary=2}. > @itemx -mavx512bf16 > @opindex mavx512bf16 > @need 200 > +@itemx -mavx512fp16 > +@opindex mavx512fp16 > +@need 200 > @itemx -mgfni > @opindex mgfni > @need 200 > @@ -31232,9 +31236,9 @@ WBNOINVD, FMA4, PREFETCHW, RDPID, PREFETCHWT1, > RDSEED, SGX, XOP, LWP, > XSAVEOPT, XSAVEC, XSAVES, RTM, HLE, TBM, MWAITX, CLZERO, PKU, AVX512VBMI2, > GFNI, VAES, WAITPKG, VPCLMULQDQ, AVX512BITALG, MOVDIRI, MOVDIR64B, > AVX512BF16, > ENQCMD, AVX512VPOPCNTDQ, AVX5124FMAPS, AVX512VNNI, AVX5124VNNIW, SERIALIZE, > -UINTR, HRESET, AMXTILE, AMXINT8, AMXBF16, KL, WIDEKL, AVXVNNI or CLDEMOTE > -extended instruction sets. Each has a corresponding @option{-mno-} option to > -disable use of these instructions. > +UINTR, HRESET, AMXTILE, AMXINT8, AMXBF16, KL, WIDEKL, AVXVNNI, AVX512FP16 > +or CLDEMOTE extended instruction sets. Each has a corresponding > +@option{-mno-} option to disable use of these instructions. > > These extensions are also available as built-in functions: see > @ref{x86 Built-in Functions}, for details of the functions enabled and > diff --git a/gcc/testsuite/g++.dg/other/i386-2.C > b/gcc/testsuite/g++.dg/other/i386-2.C > index 62b2132957a..fba3d1ac684 100644 > --- a/gcc/testsuite/g++.dg/other/i386-2.C > +++ b/gcc/testsuite/g++.dg/other/i386-2.C > @@ -1,5 +1,5 @@ > /* { dg-do compile { target i?86-*-* x86_64-*-* } } */ > -/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 > -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp > -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr > -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 > -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma > -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq > -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig > -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk > -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni" } */ > +/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 > -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp > -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr > -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 > -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma > -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq > -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig > -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk > -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16" } */ > > /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h, > xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h, > diff --git a/gcc/testsuite/g++.dg/other/i386-3.C > b/gcc/testsuite/g++.dg/other/i386-3.C > index 843aa2bdb2f..5cc0fa83457 100644 > --- a/gcc/testsuite/g++.dg/other/i386-3.C > +++ b/gcc/testsuite/g++.dg/other/i386-3.C > @@ -1,5 +1,5 @@ > /* { dg-do compile { target i?86-*-* x86_64-*-* } } */ > -/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx > -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm > -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr > -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 > -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma > -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq > -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig > -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk > -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni" } */ > +/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx > -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm > -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr > -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 > -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma > -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq > -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig > -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk > -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16" } */ > > /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h, > xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h, > diff --git a/gcc/testsuite/g++.target/i386/float16-1.C > b/gcc/testsuite/g++.target/i386/float16-1.C > new file mode 100644 > index 00000000000..95d1ac27c4f > --- /dev/null > +++ b/gcc/testsuite/g++.target/i386/float16-1.C > @@ -0,0 +1,8 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -mno-sse2" } */ > + > +_Float16/* { dg-error "does not name a type" } */ > +foo (_Float16 x) > +{ > + return x; > +} > diff --git a/gcc/testsuite/g++.target/i386/float16-2.C > b/gcc/testsuite/g++.target/i386/float16-2.C > new file mode 100644 > index 00000000000..99eb797eff1 > --- /dev/null > +++ b/gcc/testsuite/g++.target/i386/float16-2.C > @@ -0,0 +1,14 @@ > +/* { dg-do assemble { target avx512fp16 } } */ > +/* { dg-options "-O2 -mavx512fp16" } */ > + > +union flt > +{ > + _Float16 flt; > + short s; > +}; > + > +_Float16 > +foo (union flt x) > +{ > + return x.flt; > +} > diff --git a/gcc/testsuite/g++.target/i386/float16-3.C > b/gcc/testsuite/g++.target/i386/float16-3.C > new file mode 100644 > index 00000000000..940878503f1 > --- /dev/null > +++ b/gcc/testsuite/g++.target/i386/float16-3.C > @@ -0,0 +1,10 @@ > +/* { dg-do assemble { target avx512fp16 } } */ > +/* { dg-options "-O0 -mavx512fp16" } */ > + > +template <typename> void a(char *) {} > +char b, d; > +void c() > +{ > + a<unsigned char>(&d); > + a<_Float16>(&b); > +} > diff --git a/gcc/testsuite/gcc.target/i386/avx-1.c > b/gcc/testsuite/gcc.target/i386/avx-1.c > index 6178e38ce02..f3676077743 100644 > --- a/gcc/testsuite/gcc.target/i386/avx-1.c > +++ b/gcc/testsuite/gcc.target/i386/avx-1.c > @@ -1,5 +1,5 @@ > /* { dg-do compile } */ > -/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -m3dnow > -mavx -mavx2 -maes -mpclmul -mgfni -mavx512bw" } */ > +/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -m3dnow > -mavx -mavx2 -maes -mpclmul -mgfni -mavx512bw -mavx512fp16" } */ > /* { dg-add-options bind_pic_locally } */ > > #include <mm_malloc.h> > diff --git a/gcc/testsuite/gcc.target/i386/avx-2.c > b/gcc/testsuite/gcc.target/i386/avx-2.c > index 986fbd819e4..1751c52565c 100644 > --- a/gcc/testsuite/gcc.target/i386/avx-2.c > +++ b/gcc/testsuite/gcc.target/i386/avx-2.c > @@ -1,5 +1,5 @@ > /* { dg-do compile } */ > -/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -m3dnow > -mavx -mavx2 -msse4a -maes -mpclmul -mavx512bw" } */ > +/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -m3dnow > -mavx -mavx2 -msse4a -maes -mpclmul -mavx512bw -mavx512fp16" } */ > /* { dg-add-options bind_pic_locally } */ > > #include <mm_malloc.h> > diff --git a/gcc/testsuite/gcc.target/i386/avx512-check.h > b/gcc/testsuite/gcc.target/i386/avx512-check.h > index 0a377dba1d5..0ad9064f637 100644 > --- a/gcc/testsuite/gcc.target/i386/avx512-check.h > +++ b/gcc/testsuite/gcc.target/i386/avx512-check.h > @@ -87,6 +87,9 @@ main () > #ifdef AVX512VNNI > && (ecx & bit_AVX512VNNI) > #endif > +#ifdef AVX512FP16 > + && (edx & bit_AVX512FP16) > +#endif > #ifdef VAES > && (ecx & bit_VAES) > #endif > diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-12a.c > b/gcc/testsuite/gcc.target/i386/avx512fp16-12a.c > new file mode 100644 > index 00000000000..88887556d68 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-12a.c > @@ -0,0 +1,21 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -mavx512fp16" } */ > + > +_Float16 > +__attribute__ ((noinline, noclone)) > +do_max (_Float16 __A, _Float16 __B) > +{ > + return __A > __B ? __A : __B; > +} > + > +_Float16 > +__attribute__ ((noinline, noclone)) > +do_min (_Float16 __A, _Float16 __B) > +{ > + return __A < __B ? __A : __B; > +} > + > +/* { dg-final { scan-assembler-times "vmaxsh\[ \\t\]" 1 } } */ > +/* { dg-final { scan-assembler-times "vminsh\[ \\t\]" 1 } } */ > +/* { dg-final { scan-assembler-not "vmovsh\[ \\t\]" { target { ! ia32 } } } > } */ > +/* { dg-final { scan-assembler-not "vcomish\[ \\t\]" } } */ > diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-12b.c > b/gcc/testsuite/gcc.target/i386/avx512fp16-12b.c > new file mode 100644 > index 00000000000..c9e23bf95c2 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-12b.c > @@ -0,0 +1,27 @@ > +/* { dg-do run { target avx512fp16 } } */ > +/* { dg-options "-O2 -mavx512fp16" } */ > + > +#include <string.h> > + > +static void do_test (void); > + > +#define DO_TEST do_test > +#define AVX512FP16 > +#include "avx512-check.h" > +#include "avx512fp16-12a.c" > + > +static void > +do_test (void) > +{ > + _Float16 x = 0.1f; > + _Float16 y = -3.2f; > + _Float16 z; > + > + z = do_max (x, y); > + if (z != x) > + abort (); > + > + z = do_min (x, y); > + if (z != y) > + abort (); > +} > diff --git a/gcc/testsuite/gcc.target/i386/float16-3a.c > b/gcc/testsuite/gcc.target/i386/float16-3a.c > new file mode 100644 > index 00000000000..3846c8e9b6e > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/float16-3a.c > @@ -0,0 +1,10 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -mavx512fp16" } */ > + > +_Float16 > +foo (int x) > +{ > + return x; > +} > + > +/* { dg-final { scan-assembler-times "vcvtsi2shl\[ \t\]+\[^\n\r]*%xmm0" 1 } > } */ > diff --git a/gcc/testsuite/gcc.target/i386/float16-3b.c > b/gcc/testsuite/gcc.target/i386/float16-3b.c > new file mode 100644 > index 00000000000..247dd6e7e33 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/float16-3b.c > @@ -0,0 +1,10 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -mavx512fp16" } */ > + > +_Float16 > +foo (unsigned int x) > +{ > + return x; > +} > + > +/* { dg-final { scan-assembler-times "vcvtusi2shl\[ \t\]+\[^\n\r]*%xmm0" 1 } > } */ > diff --git a/gcc/testsuite/gcc.target/i386/float16-4a.c > b/gcc/testsuite/gcc.target/i386/float16-4a.c > new file mode 100644 > index 00000000000..631082581f3 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/float16-4a.c > @@ -0,0 +1,10 @@ > +/* { dg-do compile { target { ! ia32 } } } */ > +/* { dg-options "-O2 -mavx512fp16" } */ > + > +_Float16 > +foo (long long x) > +{ > + return x; > +} > + > +/* { dg-final { scan-assembler-times "vcvtsi2shq\[ \t\]+\[^\n\r]*%xmm0" 1 } > } */ > diff --git a/gcc/testsuite/gcc.target/i386/float16-4b.c > b/gcc/testsuite/gcc.target/i386/float16-4b.c > new file mode 100644 > index 00000000000..828d8530769 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/float16-4b.c > @@ -0,0 +1,10 @@ > +/* { dg-do compile { target { ! ia32 } } } */ > +/* { dg-options "-O2 -mavx512fp16" } */ > + > +_Float16 > +foo (unsigned long long x) > +{ > + return x; > +} > + > +/* { dg-final { scan-assembler-times "vcvtusi2shq\[ \t\]+\[^\n\r]*%xmm0" 1 } > } */ > diff --git a/gcc/testsuite/gcc.target/i386/funcspec-56.inc > b/gcc/testsuite/gcc.target/i386/funcspec-56.inc > index 79265c7c94f..8499fdf2db9 100644 > --- a/gcc/testsuite/gcc.target/i386/funcspec-56.inc > +++ b/gcc/testsuite/gcc.target/i386/funcspec-56.inc > @@ -79,6 +79,7 @@ extern void test_hreset (void) > __attribute__((__target__("hreset"))); > extern void test_keylocker (void) > __attribute__((__target__("kl"))); > extern void test_widekl (void) > __attribute__((__target__("widekl"))); > extern void test_avxvnni (void) > __attribute__((__target__("avxvnni"))); > +extern void test_avx512fp16 (void) > __attribute__((__target__("avx512fp16"))); > > extern void test_no_sgx (void) > __attribute__((__target__("no-sgx"))); > extern void test_no_avx5124fmaps(void) > __attribute__((__target__("no-avx5124fmaps"))); > @@ -159,6 +160,7 @@ extern void test_no_hreset (void) > __attribute__((__target__("no-hreset"))); > extern void test_no_keylocker (void) > __attribute__((__target__("no-kl"))); > extern void test_no_widekl (void) > __attribute__((__target__("no-widekl"))); > extern void test_no_avxvnni (void) > __attribute__((__target__("no-avxvnni"))); > +extern void test_no_avx512fp16 (void) > __attribute__((__target__("no-avx512fp16"))); > > extern void test_arch_nocona (void) > __attribute__((__target__("arch=nocona"))); > extern void test_arch_core2 (void) > __attribute__((__target__("arch=core2"))); > diff --git a/gcc/testsuite/gcc.target/i386/pr54855-12.c > b/gcc/testsuite/gcc.target/i386/pr54855-12.c > new file mode 100644 > index 00000000000..2f8af392c83 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr54855-12.c > @@ -0,0 +1,14 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -mavx512fp16" } */ > +/* { dg-final { scan-assembler-times "vmaxsh\[ \\t\]" 1 } } */ > +/* { dg-final { scan-assembler-not "vcomish\[ \\t\]" } } */ > +/* { dg-final { scan-assembler-not "vmovsh\[ \\t\]" { target { ! ia32 } } } > } */ > + > +#include <immintrin.h> > + > +_Float16 > +foo (_Float16 x, _Float16 y) > +{ > + x = x > y ? x : y; > + return x; > +} > diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c > b/gcc/testsuite/gcc.target/i386/sse-13.c > index 7029771334b..f5f5c113612 100644 > --- a/gcc/testsuite/gcc.target/i386/sse-13.c > +++ b/gcc/testsuite/gcc.target/i386/sse-13.c > @@ -1,5 +1,5 @@ > /* { dg-do compile } */ > -/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a > -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi > -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw > -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha > -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw > -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw > -mavx512vpopcntdq -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx > -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd > -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl > -mavxvnni" } */ > +/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a > -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi > -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw > -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha > -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw > -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw > -mavx512vpopcntdq -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx > -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd > -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl > -mavxvnni -mavx512fp16" } */ > /* { dg-add-options bind_pic_locally } */ > > #include <mm_malloc.h> > diff --git a/gcc/testsuite/gcc.target/i386/sse-14.c > b/gcc/testsuite/gcc.target/i386/sse-14.c > index 4ce0ffffaf3..747d504cedb 100644 > --- a/gcc/testsuite/gcc.target/i386/sse-14.c > +++ b/gcc/testsuite/gcc.target/i386/sse-14.c > @@ -1,5 +1,5 @@ > /* { dg-do compile } */ > -/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a > -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi > -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw > -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha > -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl > -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw > -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni > -mpconfig -mwbnoinvd -mavx512vl -mavx512bf16 -menqcmd -mavx512vp2intersect > -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl > -mavxvnni" } */ > +/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a > -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi > -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw > -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha > -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl > -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw > -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni > -mpconfig -mwbnoinvd -mavx512vl -mavx512bf16 -menqcmd -mavx512vp2intersect > -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl > -mavxvnni -mavx512fp16" } */ > /* { dg-add-options bind_pic_locally } */ > > #include <mm_malloc.h> > diff --git a/gcc/testsuite/gcc.target/i386/sse-22.c > b/gcc/testsuite/gcc.target/i386/sse-22.c > index 6e8b6f3fa1b..33411969901 100644 > --- a/gcc/testsuite/gcc.target/i386/sse-22.c > +++ b/gcc/testsuite/gcc.target/i386/sse-22.c > @@ -103,7 +103,7 @@ > > > #ifndef DIFFERENT_PRAGMAS > -#pragma GCC target > ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,avx512vl,avx512bw,avx512dq,avx512vbmi,avx512vbmi2,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni") > +#pragma GCC target > ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,avx512vl,avx512bw,avx512dq,avx512vbmi,avx512vbmi2,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16") > #endif > > /* Following intrinsics require immediate arguments. They > @@ -220,7 +220,7 @@ test_4 (_mm_cmpestrz, int, __m128i, int, __m128i, int, 1) > > /* immintrin.h (AVX/AVX2/RDRND/FSGSBASE/F16C/RTM/AVX512F/SHA) */ > #ifdef DIFFERENT_PRAGMAS > -#pragma GCC target > ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf,sha,avx512vl,avx512bw,avx512dq,avx512ifma,avx512vbmi,avx512vbmi2,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni") > +#pragma GCC target > ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf,sha,avx512vl,avx512bw,avx512dq,avx512ifma,avx512vbmi,avx512vbmi2,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16") > #endif > #include <immintrin.h> > test_1 (_cvtss_sh, unsigned short, float, 1) > diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c > b/gcc/testsuite/gcc.target/i386/sse-23.c > index 7faa053ace8..86590ca5ffb 100644 > --- a/gcc/testsuite/gcc.target/i386/sse-23.c > +++ b/gcc/testsuite/gcc.target/i386/sse-23.c > @@ -708,6 +708,6 @@ > #define __builtin_ia32_vpclmulqdq_v2di(A, B, C) > __builtin_ia32_vpclmulqdq_v2di(A, B, 1) > #define __builtin_ia32_vpclmulqdq_v8di(A, B, C) > __builtin_ia32_vpclmulqdq_v8di(A, B, 1) > > -#pragma GCC target > ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq,avx512bitalg,pconfig,wbnoinvd,avx512bf16,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni") > +#pragma GCC target > ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq,avx512bitalg,pconfig,wbnoinvd,avx512bf16,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16") > > #include <x86intrin.h> > diff --git a/gcc/testsuite/lib/target-supports.exp > b/gcc/testsuite/lib/target-supports.exp > index 42ac9d0ac1a..10765365d7b 100644 > --- a/gcc/testsuite/lib/target-supports.exp > +++ b/gcc/testsuite/lib/target-supports.exp > @@ -3020,7 +3020,7 @@ proc check_effective_target_has_q_floating_suffix { } { > > proc check_effective_target_float16 {} { > return [check_no_compiler_messages_nocache float16 object { > - _Float16 x; > + _Float16 foo (_Float16 x) { return x; } > } [add_options_for_float16 ""]] > } > > @@ -8714,6 +8714,17 @@ proc check_prefer_avx128 { } { > } > > > +# Return 1 if avx512fp16 instructions can be compiled. > + > +proc check_effective_target_avx512fp16 { } { > + return [check_no_compiler_messages avx512fp16 object { > + void foo (void) > + { > + asm volatile ("vmovw %edi, %xmm0"); > + } > + } "-O2 -mavx512fp16" ] > +} > + > # Return 1 if avx512f instructions can be compiled. > > proc check_effective_target_avx512f { } { > -- > 2.18.1 >