On Mon, Aug 2, 2021 at 2:31 PM liuhongt <hongtao....@intel.com> wrote: > > gcc/ChangeLog: > > * config/i386/i386-modes.def (FLOAT_MODE): Define ieee HFmode. > * config/i386/i386.c (enum x86_64_reg_class): Add > X86_64_SSEHF_CLASS. > (merge_classes): Handle X86_64_SSEHF_CLASS. > (examine_argument): Ditto. > (construct_container): Ditto. > (classify_argument): Ditto, and set HFmode/HCmode to > X86_64_SSEHF_CLASS. > (function_value_32): Return _FLoat16/Complex Float16 by > %xmm0. > (function_value_64): Return _Float16/Complex Float16 by SSE > register. > (ix86_print_operand): Handle CONST_DOUBLE HFmode. > (ix86_secondary_reload): Require gpr as intermediate register > to store _Float16 from sse register when sse4 is not > available. > (ix86_libgcc_floating_mode_supported_p): Enable _FLoat16 under > sse2. > (ix86_scalar_mode_supported_p): Ditto. > (TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P): Defined. > * config/i386/i386.h (VALID_SSE2_REG_MODE): Add HFmode. > (VALID_INT_MODE_P): Add HFmode and HCmode. > * config/i386/i386.md (*pushhf_rex64): New define_insn. > (*pushhf): Ditto. > (*movhf_internal): Ditto. > * doc/extend.texi (Half-Precision Floating Point): Documemt > _Float16 for x86. > * emit-rtl.c (validate_subreg): Allow (subreg:SI (reg:HF) 0) > which is used by extract_bit_field but not backends. > > gcc/lto/ChangeLog: > > * lto-lang.c (lto_type_for_mode): Return float16_type_node > when mode == TYPE_MODE (float16_type_node). > > gcc/testsuite/ChangeLog > > * gcc.target/i386/sse2-float16-1.c: New test. > * gcc.target/i386/sse2-float16-2.c: Ditto. > * gcc.target/i386/sse2-float16-3.c: Ditto. > * gcc.target/i386/float16-5.c: Ditto. > --- > gcc/config/i386/i386-modes.def | 1 + > gcc/config/i386/i386.c | 91 +++++++++++++- > gcc/config/i386/i386.h | 3 +- > gcc/config/i386/i386.md | 118 +++++++++++++++++- > gcc/doc/extend.texi | 13 ++ > gcc/emit-rtl.c | 5 + > gcc/lto/lto-lang.c | 3 + > gcc/testsuite/gcc.target/i386/float16-5.c | 12 ++ > .../gcc.target/i386/sse2-float16-1.c | 8 ++ > .../gcc.target/i386/sse2-float16-2.c | 16 +++ > .../gcc.target/i386/sse2-float16-3.c | 12 ++ > 11 files changed, 274 insertions(+), 8 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/i386/float16-5.c > create mode 100644 gcc/testsuite/gcc.target/i386/sse2-float16-1.c > create mode 100644 gcc/testsuite/gcc.target/i386/sse2-float16-2.c > create mode 100644 gcc/testsuite/gcc.target/i386/sse2-float16-3.c > > diff --git a/gcc/config/i386/i386-modes.def b/gcc/config/i386/i386-modes.def > index 4e7014be034..9232f59a925 100644 > --- a/gcc/config/i386/i386-modes.def > +++ b/gcc/config/i386/i386-modes.def > @@ -23,6 +23,7 @@ along with GCC; see the file COPYING3. If not see > > FRACTIONAL_FLOAT_MODE (XF, 80, 12, ieee_extended_intel_96_format); > FLOAT_MODE (TF, 16, ieee_quad_format); > +FLOAT_MODE (HF, 2, ieee_half_format); > > /* In ILP32 mode, XFmode has size 12 and alignment 4. > In LP64 mode, XFmode has size and alignment 16. */ > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c > index ff96134fb37..7979e240426 100644 > --- a/gcc/config/i386/i386.c > +++ b/gcc/config/i386/i386.c > @@ -387,6 +387,7 @@ enum x86_64_reg_class > X86_64_INTEGER_CLASS, > X86_64_INTEGERSI_CLASS, > X86_64_SSE_CLASS, > + X86_64_SSEHF_CLASS, > X86_64_SSESF_CLASS, > X86_64_SSEDF_CLASS, > X86_64_SSEUP_CLASS, > @@ -2023,8 +2024,10 @@ merge_classes (enum x86_64_reg_class class1, enum > x86_64_reg_class class2) > return X86_64_MEMORY_CLASS; > > /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */ > - if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS) > - || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS)) > + if ((class1 == X86_64_INTEGERSI_CLASS > + && (class2 == X86_64_SSESF_CLASS || class2 == X86_64_SSEHF_CLASS)) > + || (class2 == X86_64_INTEGERSI_CLASS > + && (class1 == X86_64_SSESF_CLASS || class1 == X86_64_SSEHF_CLASS))) > return X86_64_INTEGERSI_CLASS; > if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS > || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS) > @@ -2178,6 +2181,8 @@ classify_argument (machine_mode mode, const_tree type, > /* The partial classes are now full classes. */ > if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4) > subclasses[0] = X86_64_SSE_CLASS; > + if (subclasses[0] == X86_64_SSEHF_CLASS && bytes != 2) > + subclasses[0] = X86_64_SSE_CLASS; > if (subclasses[0] == X86_64_INTEGERSI_CLASS > && !((bit_offset % 64) == 0 && bytes == 4)) > subclasses[0] = X86_64_INTEGER_CLASS; > @@ -2350,6 +2355,12 @@ classify_argument (machine_mode mode, const_tree type, > gcc_unreachable (); > case E_CTImode: > return 0; > + case E_HFmode: > + if (!(bit_offset % 64)) > + classes[0] = X86_64_SSEHF_CLASS; > + else > + classes[0] = X86_64_SSE_CLASS; > + return 1; > case E_SFmode: > if (!(bit_offset % 64)) > classes[0] = X86_64_SSESF_CLASS; > @@ -2367,6 +2378,15 @@ classify_argument (machine_mode mode, const_tree type, > classes[0] = X86_64_SSE_CLASS; > classes[1] = X86_64_SSEUP_CLASS; > return 2; > + case E_HCmode: > + classes[0] = X86_64_SSE_CLASS; > + if (!(bit_offset % 64)) > + return 1; > + else > + { > + classes[1] = X86_64_SSEHF_CLASS; > + return 2; > + } > case E_SCmode: > classes[0] = X86_64_SSE_CLASS; > if (!(bit_offset % 64)) > @@ -2481,6 +2501,7 @@ examine_argument (machine_mode mode, const_tree type, > int in_return, > (*int_nregs)++; > break; > case X86_64_SSE_CLASS: > + case X86_64_SSEHF_CLASS: > case X86_64_SSESF_CLASS: > case X86_64_SSEDF_CLASS: > (*sse_nregs)++; > @@ -2580,13 +2601,14 @@ construct_container (machine_mode mode, machine_mode > orig_mode, > > /* First construct simple cases. Avoid SCmode, since we want to use > single register to pass this type. */ > - if (n == 1 && mode != SCmode) > + if (n == 1 && mode != SCmode && mode != HCmode) > switch (regclass[0]) > { > case X86_64_INTEGER_CLASS: > case X86_64_INTEGERSI_CLASS: > return gen_rtx_REG (mode, intreg[0]); > case X86_64_SSE_CLASS: > + case X86_64_SSEHF_CLASS: > case X86_64_SSESF_CLASS: > case X86_64_SSEDF_CLASS: > if (mode != BLKmode) > @@ -2683,6 +2705,14 @@ construct_container (machine_mode mode, machine_mode > orig_mode, > GEN_INT (i*8)); > intreg++; > break; > + case X86_64_SSEHF_CLASS: > + exp [nexps++] > + = gen_rtx_EXPR_LIST (VOIDmode, > + gen_rtx_REG (HFmode, > + GET_SSE_REGNO (sse_regno)), > + GEN_INT (i*8)); > + sse_regno++; > + break; > case X86_64_SSESF_CLASS: > exp [nexps++] > = gen_rtx_EXPR_LIST (VOIDmode, > @@ -3903,6 +3933,19 @@ function_value_32 (machine_mode orig_mode, > machine_mode mode, > /* Most things go in %eax. */ > regno = AX_REG; > > + /* Return _Float16/_Complex _Foat16 by sse register. */ > + if (mode == HFmode) > + regno = FIRST_SSE_REG; > + if (mode == HCmode) > + { > + rtx ret = gen_rtx_PARALLEL (mode, rtvec_alloc(1)); > + XVECEXP (ret, 0, 0) > + = gen_rtx_EXPR_LIST (VOIDmode, > + gen_rtx_REG (SImode, FIRST_SSE_REG), > + GEN_INT (0)); > + return ret; > + } > + > /* Override FP return register with %xmm0 for local functions when > SSE math is enabled or for functions with sseregparm attribute. */ > if ((fn || fntype) && (mode == SFmode || mode == DFmode)) > @@ -3939,6 +3982,8 @@ function_value_64 (machine_mode orig_mode, machine_mode > mode, > > switch (mode) > { > + case E_HFmode: > + case E_HCmode: > case E_SFmode: > case E_SCmode: > case E_DFmode: > @@ -13411,6 +13456,15 @@ ix86_print_operand (FILE *file, rtx x, int code) > (file, addr, MEM_ADDR_SPACE (x), code == 'p' || code == 'P'); > } > > + else if (CONST_DOUBLE_P (x) && GET_MODE (x) == HFmode) > + { > + long l = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (x), > + REAL_MODE_FORMAT (HFmode)); > + if (ASSEMBLER_DIALECT == ASM_ATT) > + putc ('$', file); > + fprintf (file, "0x%04x", (unsigned int) l); > + } > + > else if (CONST_DOUBLE_P (x) && GET_MODE (x) == SFmode) > { > long l; > @@ -18928,6 +18982,16 @@ ix86_secondary_reload (bool in_p, rtx x, reg_class_t > rclass, > return NO_REGS; > } > > + /* Require movement to gpr, and then store to memory. */ > + if (mode == HFmode > + && !TARGET_SSE4_1 > + && SSE_CLASS_P (rclass) > + && !in_p && MEM_P (x)) > + { > + sri->extra_cost = 1; > + return GENERAL_REGS; > + } > + > /* This condition handles corner case where an expression involving > pointers gets vectorized. We're trying to use the address of a > stack slot as a vector initializer. > @@ -21555,10 +21619,27 @@ ix86_scalar_mode_supported_p (scalar_mode mode) > return default_decimal_float_supported_p (); > else if (mode == TFmode) > return true; > + else if (mode == HFmode && TARGET_SSE2) > + return true; > else > return default_scalar_mode_supported_p (mode); > } > > +/* Implement TARGET_LIBGCC_FLOATING_POINT_MODE_SUPPORTED_P - return TRUE > + if MODE is HFmode, and punt to the generic implementation otherwise. */ > + > +static bool > +ix86_libgcc_floating_mode_supported_p (scalar_float_mode mode) > +{ > + /* NB: Always return TRUE for HFmode so that the _Float16 type will > + be defined by the C front-end for AVX512FP16 intrinsics. We will > + issue an error in ix86_expand_move for HFmode if AVX512FP16 isn't > + enabled. */ > + return ((mode == HFmode && TARGET_SSE2) > + ? true > + : default_libgcc_floating_mode_supported_p (mode)); > +} > + > /* Implements target hook vector_mode_supported_p. */ > static bool > ix86_vector_mode_supported_p (machine_mode mode) > @@ -23820,6 +23901,10 @@ ix86_run_selftests (void) > #undef TARGET_SCALAR_MODE_SUPPORTED_P > #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p > > +#undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P > +#define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \ > +ix86_libgcc_floating_mode_supported_p > + > #undef TARGET_VECTOR_MODE_SUPPORTED_P > #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p > > diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h > index 0c2c93daf32..b1e66ee192e 100644 > --- a/gcc/config/i386/i386.h > +++ b/gcc/config/i386/i386.h > @@ -1018,7 +1018,7 @@ extern const char *host_detect_local_cpu (int argc, > const char **argv); > #define VALID_SSE2_REG_MODE(MODE) \ > ((MODE) == V16QImode || (MODE) == V8HImode || (MODE) == V2DFmode \ > || (MODE) == V4QImode || (MODE) == V2HImode || (MODE) == V1SImode \ > - || (MODE) == V2DImode || (MODE) == DFmode) > + || (MODE) == V2DImode || (MODE) == DFmode || (MODE) == HFmode) > > #define VALID_SSE_REG_MODE(MODE) \ > ((MODE) == V1TImode || (MODE) == TImode \ > @@ -1047,6 +1047,7 @@ extern const char *host_detect_local_cpu (int argc, > const char **argv); > || (MODE) == CQImode || (MODE) == CHImode \ > || (MODE) == CSImode || (MODE) == CDImode \ > || (MODE) == SDmode || (MODE) == DDmode \ > + || (MODE) == HFmode || (MODE) == HCmode \ > || (MODE) == V4QImode || (MODE) == V2HImode || (MODE) == V1SImode \ > || (TARGET_64BIT \ > && ((MODE) == TImode || (MODE) == CTImode \ > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md > index 8b809c49fe0..d475347172d 100644 > --- a/gcc/config/i386/i386.md > +++ b/gcc/config/i386/i386.md > @@ -1222,6 +1222,9 @@ (define_mode_iterator MODEF [SF DF]) > ;; All x87 floating point modes > (define_mode_iterator X87MODEF [SF DF XF]) > > +;; All x87 floating point modes plus HF > +(define_mode_iterator X87MODEFH [SF DF XF HF]) > + > ;; All SSE floating point modes > (define_mode_iterator SSEMODEF [SF DF TF]) > (define_mode_attr ssevecmodef [(SF "V4SF") (DF "V2DF") (TF "TF")]) > @@ -3130,6 +3133,32 @@ (define_split > operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx); > }) > > +(define_insn "*pushhf_rex64" > + [(set (match_operand:HF 0 "push_operand" "=X,X") > + (match_operand:HF 1 "nonmemory_no_elim_operand" "r,x"))] > + "TARGET_64BIT" > +{ > + /* Anything else should be already split before reg-stack. */ > + gcc_assert (which_alternative == 0); > + return "push{q}\t%q1"; > +} > + [(set_attr "isa" "*,sse4") > + (set_attr "type" "push,multi") > + (set_attr "mode" "DI,TI")]) > + > +(define_insn "*pushhf" > + [(set (match_operand:HF 0 "push_operand" "=X,X") > + (match_operand:HF 1 "general_no_elim_operand" "rmF,x"))] > + "!TARGET_64BIT" > +{ > + /* Anything else should be already split before reg-stack. */ > + gcc_assert (which_alternative == 0); > + return "push{l}\t%k1"; > +} > + [(set_attr "isa" "*,sse4") > + (set_attr "type" "push,multi") > + (set_attr "mode" "SI,TI")]) > + > (define_insn "*pushsf_rex64" > [(set (match_operand:SF 0 "push_operand" "=X,X,X") > (match_operand:SF 1 "nonmemory_no_elim_operand" "f,rF,v"))] > @@ -3158,10 +3187,11 @@ (define_insn "*pushsf" > (set_attr "unit" "i387,*,*") > (set_attr "mode" "SF,SI,SF")]) > > +(define_mode_iterator MODESH [SF HF]) > ;; %%% Kill this when call knows how to work this out. > (define_split > - [(set (match_operand:SF 0 "push_operand") > - (match_operand:SF 1 "any_fp_register_operand"))] > + [(set (match_operand:MODESH 0 "push_operand") > + (match_operand:MODESH 1 "any_fp_register_operand"))] > "reload_completed" > [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2))) > (set (match_dup 0) (match_dup 1))] > @@ -3209,8 +3239,8 @@ (define_expand "movtf" > "ix86_expand_move (TFmode, operands); DONE;") > > (define_expand "mov<mode>" > - [(set (match_operand:X87MODEF 0 "nonimmediate_operand") > - (match_operand:X87MODEF 1 "general_operand"))] > + [(set (match_operand:X87MODEFH 0 "nonimmediate_operand") > + (match_operand:X87MODEFH 1 "general_operand"))] > "" > "ix86_expand_move (<MODE>mode, operands); DONE;") > > @@ -3646,6 +3676,86 @@ (define_insn "*movsf_internal" > ] > (const_string "*")))]) > > +(define_insn "*movhf_internal" > + [(set (match_operand:HF 0 "nonimmediate_operand" > + "=?r,?m,v,v,?r,m,?v,v") > + (match_operand:HF 1 "general_operand" > + "rmF,rF,C,v, v,v, r,m"))] > + "!(MEM_P (operands[0]) && MEM_P (operands[1])) > + && (lra_in_progress > + || reload_completed > + || !CONST_DOUBLE_P (operands[1]) > + || (TARGET_SSE && TARGET_SSE_MATH > + && standard_sse_constant_p (operands[1], HFmode) == 1) > + || memory_operand (operands[0], HFmode))" > +{ > + switch (get_attr_type (insn)) > + { > + case TYPE_IMOV: > + return "mov{w}\t{%1, %0|%0, %1}"; > + > + case TYPE_SSELOG1: > + return standard_sse_constant_opcode (insn, operands); > + > + case TYPE_SSEMOV: > + return ix86_output_ssemov (insn, operands); > + > + case TYPE_SSELOG: > + if (SSE_REG_P (operands[0])) > + return MEM_P (operands[1]) > + ? "pinsrw\t{$0, %1, %0|%0, %1, 0}" > + : "pinsrw\t{$0, %k1, %0|%0, %k1, 0}"; > + else > + return MEM_P (operands[1]) > + ? "pextrw\t{$0, %1, %0|%0, %1, 0}" > + : "pextrw\t{$0, %1, %k0|%k0, %k1, 0}"; > + > + default: > + gcc_unreachable (); > + } > +} > + [(set (attr "isa") > + (cond [(eq_attr "alternative" "2,3,4,6,7") > + (const_string "sse2") > + (eq_attr "alternative" "5") > + (const_string "sse4") > + ] > + (const_string "*"))) > + (set (attr "type") > + (cond [(eq_attr "alternative" "0,1") > + (const_string "imov") > + (eq_attr "alternative" "2") > + (const_string "sselog1") > + (eq_attr "alternative" "4,5,6,7") > + (const_string "sselog") > + ] > + (const_string "ssemov"))) > + (set (attr "memory") > + (cond [(eq_attr "alternative" "4,6") > + (const_string "none") > + (eq_attr "alternative" "5") > + (const_string "store") > + (eq_attr "alternative" "7") > + (const_string "load") > + ] > + (const_string "*"))) > + (set (attr "prefix") > + (cond [(eq_attr "alternative" "0,1") > + (const_string "orig") > + ] > + (const_string "maybe_vex"))) > + (set (attr "mode") > + (cond [(eq_attr "alternative" "0,1") > + (const_string "HI") > + (eq_attr "alternative" "2") > + (const_string "V4SF") > + (eq_attr "alternative" "4,5,6,7") > + (const_string "TI") > + (eq_attr "alternative" "3") > + (const_string "SF") > + ] > + (const_string "*")))]) > + > (define_split > [(set (match_operand 0 "any_fp_register_operand") > (match_operand 1 "memory_operand"))] > diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi > index b83cd4919bb..f42fd633725 100644 > --- a/gcc/doc/extend.texi > +++ b/gcc/doc/extend.texi > @@ -1102,6 +1102,7 @@ typedef _Complex float __attribute__((mode(IC))) > _Complex_ibm128; > @section Half-Precision Floating Point > @cindex half-precision floating point > @cindex @code{__fp16} data type > +@cindex @code{__Float16} data type > > On ARM and AArch64 targets, GCC supports half-precision (16-bit) floating > point via the @code{__fp16} type defined in the ARM C Language Extensions. > @@ -1150,6 +1151,18 @@ calls. > It is recommended that portable code use the @code{_Float16} type defined > by ISO/IEC TS 18661-3:2015. @xref{Floating Types}. > > +On x86 targets with @code{target("sse2")} and above, GCC supports > half-precision > +(16-bit) floating point via the @code{_Float16} type which is defined by > +18661-3:2015. For C++, x86 provide a builtin type named @code{_Float16} > +which contains same data format as C. > + > +Without @option{-mavx512fp16}, @code{_Float16} type is storage only, all > +operations will be emulated by software emulation and the @code{float} > +instructions. The default behavior for @code{FLT_EVAL_METHOD} is to keep > +the intermediate result of the operation as 32-bit precision. This may lead > +to inconsistent behavior between software emulation and AVX512-FP16 > +instructions. > + > @node Decimal Float > @section Decimal Floating Types > @cindex decimal floating types
Ping, i'd like to ask for approval for the below codes which is related to generic part. start from .. > diff --git a/gcc/emit-rtl.c b/gcc/emit-rtl.c > index ff3b4449b37..775ee397836 100644 > --- a/gcc/emit-rtl.c > +++ b/gcc/emit-rtl.c > @@ -928,6 +928,11 @@ validate_subreg (machine_mode omode, machine_mode imode, > fix them all. */ > if (omode == word_mode) > ; > + /* ???Similarly like (subreg:DI (reg:SF), also allow (subreg:SI (reg:HF)) > + here. Though extract_bit_field is the culprit here, not the backends. > */ > + else if (known_gt (regsize, osize) && known_gt (osize, isize) > + && FLOAT_MODE_P (imode) && INTEGRAL_MODE_P (omode)) > + ; > /* ??? Similarly, e.g. with (subreg:DF (reg:TI)). Though store_bit_field > is the culprit here, and not the backends. */ > else if (known_ge (osize, regsize) && known_ge (isize, osize)) and end here. > diff --git a/gcc/lto/lto-lang.c b/gcc/lto/lto-lang.c > index c13c7e45ac1..92f499643b5 100644 > --- a/gcc/lto/lto-lang.c > +++ b/gcc/lto/lto-lang.c > @@ -992,6 +992,9 @@ lto_type_for_mode (machine_mode mode, int unsigned_p) > return unsigned_p ? unsigned_intTI_type_node : intTI_type_node; > #endif > > + if (float16_type_node && mode == TYPE_MODE (float16_type_node)) > + return float16_type_node; > + > if (mode == TYPE_MODE (float_type_node)) > return float_type_node; > > diff --git a/gcc/testsuite/gcc.target/i386/float16-5.c > b/gcc/testsuite/gcc.target/i386/float16-5.c > new file mode 100644 > index 00000000000..ebc0af1490b > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/float16-5.c > @@ -0,0 +1,12 @@ > +/* { dg-do compile } */ > +/* { dg-options "-msse2 -O2" } */ > +_Float16 > +foo (int a) > +{ > + union { > + int a; > + _Float16 b; > + }c; > + c.a = a; > + return c.b; > +} > diff --git a/gcc/testsuite/gcc.target/i386/sse2-float16-1.c > b/gcc/testsuite/gcc.target/i386/sse2-float16-1.c > new file mode 100644 > index 00000000000..1b645eb499d > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/sse2-float16-1.c > @@ -0,0 +1,8 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -mno-sse2" } */ > + > +_Float16/* { dg-error "is not supported on this target" } */ > +foo (_Float16 x) /* { dg-error "is not supported on this target" } */ > +{ > + return x; > +} > diff --git a/gcc/testsuite/gcc.target/i386/sse2-float16-2.c > b/gcc/testsuite/gcc.target/i386/sse2-float16-2.c > new file mode 100644 > index 00000000000..3da7683fc31 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/sse2-float16-2.c > @@ -0,0 +1,16 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -msse2 -mno-avx512f" } */ > + > +union flt > +{ > + _Float16 flt; > + short s; > +}; > + > +_Float16 > +foo (union flt x) > +{ > + return x.flt; > +} > + > +/* { dg-final { scan-assembler {(?n)pinsrw[\t ].*%xmm0} } } */ > diff --git a/gcc/testsuite/gcc.target/i386/sse2-float16-3.c > b/gcc/testsuite/gcc.target/i386/sse2-float16-3.c > new file mode 100644 > index 00000000000..60ff9d4ab80 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/sse2-float16-3.c > @@ -0,0 +1,12 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -msse2 -mno-avx512f" } */ > + > +#include<complex.h> > + > +_Complex _Float16 > +foo (_Complex _Float16 x) > +{ > + return x; > +} > + > +/* { dg-final { scan-assembler {(?n)movd[\t ].*%xmm0} } } */ > -- > 2.27.0 > -- BR, Hongtao