On Fri, Mar 22, 2019 at 9:41 AM Jakub Jelinek <ja...@redhat.com> wrote: > > Hi! > > The following patch adds forgotten avx512f fma instrinsics for masked scalar > operations. > > Bootstrapped/regtested on x86_64-linux and i686-linux (on skylake-avx512), > ok for trunk?
There are several possibilities to mark the 1st and the 2nd operand of fma pattern as commutative ("%"). However, there are already existing patterns without commutative operand, so this improvement could be eventually submitted as a follow-on patch. So, LGTM for the whole thing. Thanks, Uros. > 2019-03-22 Jakub Jelinek <ja...@redhat.com> > > PR target/89784 > * config/i386/i386.c (enum ix86_builtins): Remove > IX86_BUILTIN_VFMSUBSD3_MASK3 and IX86_BUILTIN_VFMSUBSS3_MASK3. > * config/i386/i386-builtin.def (__builtin_ia32_vfmaddsd3_mask, > __builtin_ia32_vfmaddsd3_mask3, __builtin_ia32_vfmaddsd3_maskz, > __builtin_ia32_vfmsubsd3_mask3, __builtin_ia32_vfmaddss3_mask, > __builtin_ia32_vfmaddss3_mask3, __builtin_ia32_vfmaddss3_maskz, > __builtin_ia32_vfmsubss3_mask3): New builtins. > * config/i386/sse.md (avx512f_vmfmadd_<mode>_mask<round_name>, > avx512f_vmfmadd_<mode>_mask3<round_name>, > avx512f_vmfmadd_<mode>_maskz_1<round_name>, > *avx512f_vmfmsub_<mode>_mask<round_name>, > avx512f_vmfmsub_<mode>_mask3<round_name>, > *avx512f_vmfmasub_<mode>_maskz_1<round_name>, > *avx512f_vmfnmadd_<mode>_mask<round_name>, > *avx512f_vmfnmadd_<mode>_mask3<round_name>, > *avx512f_vmfnmadd_<mode>_maskz_1<round_name>, > *avx512f_vmfnmsub_<mode>_mask<round_name>, > *avx512f_vmfnmsub_<mode>_mask3<round_name>, > *avx512f_vmfnmasub_<mode>_maskz_1<round_name>): New define_insns. > (avx512f_vmfmadd_<mode>_maskz<round_expand_name>): New define_expand. > * config/i386/avx512fintrin.h (_mm_mask_fmadd_sd, _mm_mask_fmadd_ss, > _mm_mask3_fmadd_sd, _mm_mask3_fmadd_ss, _mm_maskz_fmadd_sd, > _mm_maskz_fmadd_ss, _mm_mask_fmsub_sd, _mm_mask_fmsub_ss, > _mm_mask3_fmsub_sd, _mm_mask3_fmsub_ss, _mm_maskz_fmsub_sd, > _mm_maskz_fmsub_ss, _mm_mask_fnmadd_sd, _mm_mask_fnmadd_ss, > _mm_mask3_fnmadd_sd, _mm_mask3_fnmadd_ss, _mm_maskz_fnmadd_sd, > _mm_maskz_fnmadd_ss, _mm_mask_fnmsub_sd, _mm_mask_fnmsub_ss, > _mm_mask3_fnmsub_sd, _mm_mask3_fnmsub_ss, _mm_maskz_fnmsub_sd, > _mm_maskz_fnmsub_ss, _mm_mask_fmadd_round_sd, _mm_mask_fmadd_round_ss, > _mm_mask3_fmadd_round_sd, _mm_mask3_fmadd_round_ss, > _mm_maskz_fmadd_round_sd, _mm_maskz_fmadd_round_ss, > _mm_mask_fmsub_round_sd, _mm_mask_fmsub_round_ss, > _mm_mask3_fmsub_round_sd, _mm_mask3_fmsub_round_ss, > _mm_maskz_fmsub_round_sd, _mm_maskz_fmsub_round_ss, > _mm_mask_fnmadd_round_sd, _mm_mask_fnmadd_round_ss, > _mm_mask3_fnmadd_round_sd, _mm_mask3_fnmadd_round_ss, > _mm_maskz_fnmadd_round_sd, _mm_maskz_fnmadd_round_ss, > _mm_mask_fnmsub_round_sd, _mm_mask_fnmsub_round_ss, > _mm_mask3_fnmsub_round_sd, _mm_mask3_fnmsub_round_ss, > _mm_maskz_fnmsub_round_sd, _mm_maskz_fnmsub_round_ss): New intrinsics. > > * gcc.target/i386/sse-13.c (__builtin_ia32_vfmaddsd3_mask, > __builtin_ia32_vfmaddsd3_mask3, __builtin_ia32_vfmaddsd3_maskz, > __builtin_ia32_vfmsubsd3_mask3, __builtin_ia32_vfmaddss3_mask, > __builtin_ia32_vfmaddss3_mask3, __builtin_ia32_vfmaddss3_maskz, > __builtin_ia32_vfmsubss3_mask3): Define. > * gcc.target/i386/sse-23.c (__builtin_ia32_vfmaddsd3_mask, > __builtin_ia32_vfmaddsd3_mask3, __builtin_ia32_vfmaddsd3_maskz, > __builtin_ia32_vfmsubsd3_mask3, __builtin_ia32_vfmaddss3_mask, > __builtin_ia32_vfmaddss3_mask3, __builtin_ia32_vfmaddss3_maskz, > __builtin_ia32_vfmsubss3_mask3): Define. > * gcc.target/i386/avx-1.c (__builtin_ia32_vfmaddsd3_mask, > __builtin_ia32_vfmaddsd3_mask3, __builtin_ia32_vfmaddsd3_maskz, > __builtin_ia32_vfmsubsd3_mask3, __builtin_ia32_vfmaddss3_mask, > __builtin_ia32_vfmaddss3_mask3, __builtin_ia32_vfmaddss3_maskz, > __builtin_ia32_vfmsubss3_mask3): Define. > * gcc.target/i386/sse-14.c: Add tests for > _mm_mask{,3,z}_f{,n}m{add,sub}_round_s{s,d} builtins. > * gcc.target/i386/sse-22.c: Likewise. > > 2019-03-22 Hongtao Liu <hongtao....@intel.com> > > * gcc.target/i386/avx512f-vfmaddXXXsd-1.c (avx512f_test): Add tests > for _mm_mask{,3,z}_*. > * gcc.target/i386/avx512f-vfmaddXXXss-1.c (avx512f_test): Likewise. > * gcc.target/i386/avx512f-vfmsubXXXsd-1.c (avx512f_test): Likewise. > * gcc.target/i386/avx512f-vfmsubXXXss-1.c (avx512f_test): Likewise. > * gcc.target/i386/avx512f-vfnmaddXXXsd-1.c (avx512f_test): Likewise. > * gcc.target/i386/avx512f-vfnmaddXXXss-1.c (avx512f_test): Likewise. > * gcc.target/i386/avx512f-vfnmsubXXXsd-1.c (avx512f_test): Likewise. > * gcc.target/i386/avx512f-vfnmsubXXXss-1.c (avx512f_test): Likewise. > * gcc.target/i386/avx512f-vfmaddXXXsd-2.c: New test. > * gcc.target/i386/avx512f-vfmaddXXXss-2.c: New test. > * gcc.target/i386/avx512f-vfmsubXXXsd-2.c: New test. > * gcc.target/i386/avx512f-vfmsubXXXss-2.c: New test. > * gcc.target/i386/avx512f-vfnmaddXXXsd-2.c: New test. > * gcc.target/i386/avx512f-vfnmaddXXXss-2.c: New test. > * gcc.target/i386/avx512f-vfnmsubXXXsd-2.c: New test. > * gcc.target/i386/avx512f-vfnmsubXXXss-2.c: New test. > > --- gcc/config/i386/i386.c.jj 2019-03-19 08:25:24.225118967 +0100 > +++ gcc/config/i386/i386.c 2019-03-21 17:08:40.840369883 +0100 > @@ -30524,8 +30524,6 @@ enum ix86_builtins > IX86_BUILTIN_GATHERSIV8SI, > IX86_BUILTIN_GATHERDIV4SI, > IX86_BUILTIN_GATHERDIV8SI, > - IX86_BUILTIN_VFMSUBSD3_MASK3, > - IX86_BUILTIN_VFMSUBSS3_MASK3, > IX86_BUILTIN_GATHER3SIV8SF, > IX86_BUILTIN_GATHER3SIV4SF, > IX86_BUILTIN_GATHER3SIV4DF, > --- gcc/config/i386/i386-builtin.def.jj 2019-03-07 20:09:39.690753839 +0100 > +++ gcc/config/i386/i386-builtin.def 2019-03-21 16:06:40.262060495 +0100 > @@ -2827,6 +2827,14 @@ BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_ > BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, > "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, > (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) > BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_fmai_vmfmadd_v2df_round, > "__builtin_ia32_vfmaddsd3_round", IX86_BUILTIN_VFMADDSD3_ROUND, UNKNOWN, > (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT) > BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_fmai_vmfmadd_v4sf_round, > "__builtin_ia32_vfmaddss3_round", IX86_BUILTIN_VFMADDSS3_ROUND, UNKNOWN, > (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT) > +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vmfmadd_v2df_mask_round, > "__builtin_ia32_vfmaddsd3_mask", IX86_BUILTIN_VFMADDSD3_MASK, UNKNOWN, (int) > V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT) > +BDESC (OPTION_MASK_ISA_AVX512F, 0, > CODE_FOR_avx512f_vmfmadd_v2df_mask3_round, "__builtin_ia32_vfmaddsd3_mask3", > IX86_BUILTIN_VFMADDSD3_MASK3, UNKNOWN, (int) > V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT) > +BDESC (OPTION_MASK_ISA_AVX512F, 0, > CODE_FOR_avx512f_vmfmadd_v2df_maskz_round, "__builtin_ia32_vfmaddsd3_maskz", > IX86_BUILTIN_VFMADDSD3_MASKZ, UNKNOWN, (int) > V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT) > +BDESC (OPTION_MASK_ISA_AVX512F, 0, > CODE_FOR_avx512f_vmfmsub_v2df_mask3_round, "__builtin_ia32_vfmsubsd3_mask3", > IX86_BUILTIN_VFMSUBSD3_MASK3, UNKNOWN, (int) > V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT) > +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vmfmadd_v4sf_mask_round, > "__builtin_ia32_vfmaddss3_mask", IX86_BUILTIN_VFMADDSS3_MASK, UNKNOWN, (int) > V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT) > +BDESC (OPTION_MASK_ISA_AVX512F, 0, > CODE_FOR_avx512f_vmfmadd_v4sf_mask3_round, "__builtin_ia32_vfmaddss3_mask3", > IX86_BUILTIN_VFMADDSS3_MASK3, UNKNOWN, (int) > V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT) > +BDESC (OPTION_MASK_ISA_AVX512F, 0, > CODE_FOR_avx512f_vmfmadd_v4sf_maskz_round, "__builtin_ia32_vfmaddss3_maskz", > IX86_BUILTIN_VFMADDSS3_MASKZ, UNKNOWN, (int) > V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT) > +BDESC (OPTION_MASK_ISA_AVX512F, 0, > CODE_FOR_avx512f_vmfmsub_v4sf_mask3_round, "__builtin_ia32_vfmsubss3_mask3", > IX86_BUILTIN_VFMSUBSS3_MASK3, UNKNOWN, (int) > V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT) > BDESC (OPTION_MASK_ISA_AVX512F, 0, > CODE_FOR_avx512f_fmaddsub_v8df_mask_round, > "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, > UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) > BDESC (OPTION_MASK_ISA_AVX512F, 0, > CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, > "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, > UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) > BDESC (OPTION_MASK_ISA_AVX512F, 0, > CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, > "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, > UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) > --- gcc/config/i386/sse.md.jj 2019-03-14 09:55:49.291867091 +0100 > +++ gcc/config/i386/sse.md 2019-03-21 16:59:54.367800032 +0100 > @@ -4643,6 +4643,252 @@ (define_insn "*fmai_fnmsub_<mode><round_ > [(set_attr "type" "ssemuladd") > (set_attr "mode" "<MODE>")]) > > +(define_insn "avx512f_vmfmadd_<mode>_mask<round_name>" > + [(set (match_operand:VF_128 0 "register_operand" "=v,v") > + (vec_merge:VF_128 > + (vec_merge:VF_128 > + (fma:VF_128 > + (match_operand:VF_128 1 "register_operand" "0,0") > + (match_operand:VF_128 2 "<round_nimm_predicate>" > "<round_constraint>,v") > + (match_operand:VF_128 3 "<round_nimm_predicate>" > "v,<round_constraint>")) > + (match_dup 1) > + (match_operand:QI 4 "register_operand" "Yk,Yk")) > + (match_dup 1) > + (const_int 1)))] > + "TARGET_AVX512F" > + "@ > + vfmadd132<ssescalarmodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, > %<iptr>3, %<iptr>2<round_op5>} > + vfmadd213<ssescalarmodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, > %<iptr>2, %<iptr>3<round_op5>}" > + [(set_attr "type" "ssemuladd") > + (set_attr "mode" "<MODE>")]) > + > +(define_insn "avx512f_vmfmadd_<mode>_mask3<round_name>" > + [(set (match_operand:VF_128 0 "register_operand" "=v") > + (vec_merge:VF_128 > + (vec_merge:VF_128 > + (fma:VF_128 > + (match_operand:VF_128 1 "register_operand" "v") > + (match_operand:VF_128 2 "<round_nimm_predicate>" > "<round_constraint>") > + (match_operand:VF_128 3 "register_operand" "0")) > + (match_dup 3) > + (match_operand:QI 4 "register_operand" "Yk")) > + (match_dup 3) > + (const_int 1)))] > + "TARGET_AVX512F" > + "vfmadd231<ssescalarmodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, > %<iptr>3, %<iptr>2<round_op5>}" > + [(set_attr "type" "ssemuladd") > + (set_attr "mode" "<MODE>")]) > + > +(define_expand "avx512f_vmfmadd_<mode>_maskz<round_expand_name>" > + [(match_operand:VF_128 0 "register_operand") > + (match_operand:VF_128 1 "<round_expand_nimm_predicate>") > + (match_operand:VF_128 2 "<round_expand_nimm_predicate>") > + (match_operand:VF_128 3 "<round_expand_nimm_predicate>") > + (match_operand:QI 4 "register_operand")] > + "TARGET_AVX512F" > +{ > + emit_insn (gen_avx512f_vmfmadd_<mode>_maskz_1<round_expand_name> ( > + operands[0], operands[1], operands[2], operands[3], > + CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>)); > + DONE; > +}) > + > +(define_insn "avx512f_vmfmadd_<mode>_maskz_1<round_name>" > + [(set (match_operand:VF_128 0 "register_operand" "=v,v") > + (vec_merge:VF_128 > + (vec_merge:VF_128 > + (fma:VF_128 > + (match_operand:VF_128 1 "register_operand" "0,0") > + (match_operand:VF_128 2 "<round_nimm_predicate>" > "<round_constraint>,v") > + (match_operand:VF_128 3 "<round_nimm_predicate>" > "v,<round_constraint>")) > + (match_operand:VF_128 4 "const0_operand" "C,C") > + (match_operand:QI 5 "register_operand" "Yk,Yk")) > + (match_dup 1) > + (const_int 1)))] > + "TARGET_AVX512F" > + "@ > + vfmadd132<ssescalarmodesuffix>\t{<round_op6>%2, %3, > %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>3, %<iptr>2<round_op6>} > + vfmadd213<ssescalarmodesuffix>\t{<round_op6>%3, %2, > %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>2, %<iptr>3<round_op6>}" > + [(set_attr "type" "ssemuladd") > + (set_attr "mode" "<MODE>")]) > + > +(define_insn "*avx512f_vmfmsub_<mode>_mask<round_name>" > + [(set (match_operand:VF_128 0 "register_operand" "=v,v") > + (vec_merge:VF_128 > + (vec_merge:VF_128 > + (fma:VF_128 > + (match_operand:VF_128 1 "register_operand" "0,0") > + (match_operand:VF_128 2 "<round_nimm_predicate>" > "<round_constraint>,v") > + (neg:VF_128 > + (match_operand:VF_128 3 "<round_nimm_predicate>" > "v,<round_constraint>"))) > + (match_dup 1) > + (match_operand:QI 4 "register_operand" "Yk,Yk")) > + (match_dup 1) > + (const_int 1)))] > + "TARGET_AVX512F" > + "@ > + vfmsub132<ssescalarmodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, > %<iptr>3, %<iptr>2<round_op5>} > + vfmsub213<ssescalarmodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, > %<iptr>2, %<iptr>3<round_op5>}" > + [(set_attr "type" "ssemuladd") > + (set_attr "mode" "<MODE>")]) > + > +(define_insn "avx512f_vmfmsub_<mode>_mask3<round_name>" > + [(set (match_operand:VF_128 0 "register_operand" "=v") > + (vec_merge:VF_128 > + (vec_merge:VF_128 > + (fma:VF_128 > + (match_operand:VF_128 1 "register_operand" "v") > + (match_operand:VF_128 2 "<round_nimm_predicate>" > "<round_constraint>") > + (neg:VF_128 > + (match_operand:VF_128 3 "register_operand" "0"))) > + (match_dup 3) > + (match_operand:QI 4 "register_operand" "Yk")) > + (match_dup 3) > + (const_int 1)))] > + "TARGET_AVX512F" > + "vfmsub231<ssescalarmodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, > %<iptr>3, %<iptr>2<round_op5>}" > + [(set_attr "type" "ssemuladd") > + (set_attr "mode" "<MODE>")]) > + > +(define_insn "*avx512f_vmfmsub_<mode>_maskz_1<round_name>" > + [(set (match_operand:VF_128 0 "register_operand" "=v,v") > + (vec_merge:VF_128 > + (vec_merge:VF_128 > + (fma:VF_128 > + (match_operand:VF_128 1 "register_operand" "0,0") > + (match_operand:VF_128 2 "<round_nimm_predicate>" > "<round_constraint>,v") > + (neg:VF_128 > + (match_operand:VF_128 3 "<round_nimm_predicate>" > "v,<round_constraint>"))) > + (match_operand:VF_128 4 "const0_operand" "C,C") > + (match_operand:QI 5 "register_operand" "Yk,Yk")) > + (match_dup 1) > + (const_int 1)))] > + "TARGET_AVX512F" > + "@ > + vfmsub132<ssescalarmodesuffix>\t{<round_op6>%2, %3, > %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>3, %<iptr>2<round_op6>} > + vfmsub213<ssescalarmodesuffix>\t{<round_op6>%3, %2, > %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>2, %<iptr>3<round_op6>}" > + [(set_attr "type" "ssemuladd") > + (set_attr "mode" "<MODE>")]) > + > +(define_insn "*avx512f_vmfnmadd_<mode>_mask<round_name>" > + [(set (match_operand:VF_128 0 "register_operand" "=v,v") > + (vec_merge:VF_128 > + (vec_merge:VF_128 > + (fma:VF_128 > + (neg:VF_128 > + (match_operand:VF_128 2 "<round_nimm_predicate>" > "<round_constraint>,v")) > + (match_operand:VF_128 1 "register_operand" "0,0") > + (match_operand:VF_128 3 "<round_nimm_predicate>" > "v,<round_constraint>")) > + (match_dup 1) > + (match_operand:QI 4 "register_operand" "Yk,Yk")) > + (match_dup 1) > + (const_int 1)))] > + "TARGET_AVX512F" > + "@ > + vfnmadd132<ssescalarmodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, > %<iptr>3, %<iptr>2<round_op5>} > + vfnmadd213<ssescalarmodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, > %<iptr>2, %<iptr>3<round_op5>}" > + [(set_attr "type" "ssemuladd") > + (set_attr "mode" "<MODE>")]) > + > +(define_insn "*avx512f_vmfnmadd_<mode>_mask3<round_name>" > + [(set (match_operand:VF_128 0 "register_operand" "=v") > + (vec_merge:VF_128 > + (vec_merge:VF_128 > + (fma:VF_128 > + (neg:VF_128 > + (match_operand:VF_128 2 "<round_nimm_predicate>" > "<round_constraint>")) > + (match_operand:VF_128 1 "register_operand" "v") > + (match_operand:VF_128 3 "register_operand" "0")) > + (match_dup 3) > + (match_operand:QI 4 "register_operand" "Yk")) > + (match_dup 3) > + (const_int 1)))] > + "TARGET_AVX512F" > + "vfnmadd231<ssescalarmodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, > %<iptr>3, %<iptr>2<round_op5>}" > + [(set_attr "type" "ssemuladd") > + (set_attr "mode" "<MODE>")]) > + > +(define_insn "*avx512f_vmfnmadd_<mode>_maskz_1<round_name>" > + [(set (match_operand:VF_128 0 "register_operand" "=v,v") > + (vec_merge:VF_128 > + (vec_merge:VF_128 > + (fma:VF_128 > + (neg:VF_128 > + (match_operand:VF_128 2 "<round_nimm_predicate>" > "<round_constraint>,v")) > + (match_operand:VF_128 1 "register_operand" "0,0") > + (match_operand:VF_128 3 "<round_nimm_predicate>" > "v,<round_constraint>")) > + (match_operand:VF_128 4 "const0_operand" "C,C") > + (match_operand:QI 5 "register_operand" "Yk,Yk")) > + (match_dup 1) > + (const_int 1)))] > + "TARGET_AVX512F" > + "@ > + vfnmadd132<ssescalarmodesuffix>\t{<round_op6>%2, %3, > %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>3, %<iptr>2<round_op6>} > + vfnmadd213<ssescalarmodesuffix>\t{<round_op6>%3, %2, > %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>2, %<iptr>3<round_op6>}" > + [(set_attr "type" "ssemuladd") > + (set_attr "mode" "<MODE>")]) > + > +(define_insn "*avx512f_vmfnmsub_<mode>_mask<round_name>" > + [(set (match_operand:VF_128 0 "register_operand" "=v,v") > + (vec_merge:VF_128 > + (vec_merge:VF_128 > + (fma:VF_128 > + (neg:VF_128 > + (match_operand:VF_128 2 "<round_nimm_predicate>" > "<round_constraint>,v")) > + (match_operand:VF_128 1 "register_operand" "0,0") > + (neg:VF_128 > + (match_operand:VF_128 3 "<round_nimm_predicate>" > "v,<round_constraint>"))) > + (match_dup 1) > + (match_operand:QI 4 "register_operand" "Yk,Yk")) > + (match_dup 1) > + (const_int 1)))] > + "TARGET_AVX512F" > + "@ > + vfnmsub132<ssescalarmodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, > %<iptr>3, %<iptr>2<round_op5>} > + vfnmsub213<ssescalarmodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, > %<iptr>2, %<iptr>3<round_op5>}" > + [(set_attr "type" "ssemuladd") > + (set_attr "mode" "<MODE>")]) > + > +(define_insn "avx512f_vmfnmsub_<mode>_mask3<round_name>" > + [(set (match_operand:VF_128 0 "register_operand" "=v") > + (vec_merge:VF_128 > + (vec_merge:VF_128 > + (fma:VF_128 > + (neg:VF_128 > + (match_operand:VF_128 2 "<round_nimm_predicate>" > "<round_constraint>")) > + (match_operand:VF_128 1 "register_operand" "v") > + (neg:VF_128 > + (match_operand:VF_128 3 "register_operand" "0"))) > + (match_dup 3) > + (match_operand:QI 4 "register_operand" "Yk")) > + (match_dup 3) > + (const_int 1)))] > + "TARGET_AVX512F" > + "vfnmsub231<ssescalarmodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, > %<iptr>3, %<iptr>2<round_op5>}" > + [(set_attr "type" "ssemuladd") > + (set_attr "mode" "<MODE>")]) > + > +(define_insn "*avx512f_vmfnmsub_<mode>_maskz_1<round_name>" > + [(set (match_operand:VF_128 0 "register_operand" "=v,v") > + (vec_merge:VF_128 > + (vec_merge:VF_128 > + (fma:VF_128 > + (neg:VF_128 > + (match_operand:VF_128 2 "<round_nimm_predicate>" > "<round_constraint>,v")) > + (match_operand:VF_128 1 "register_operand" "0,0") > + (neg:VF_128 > + (match_operand:VF_128 3 "<round_nimm_predicate>" > "v,<round_constraint>"))) > + (match_operand:VF_128 4 "const0_operand" "C,C") > + (match_operand:QI 5 "register_operand" "Yk,Yk")) > + (match_dup 1) > + (const_int 1)))] > + "TARGET_AVX512F" > + "@ > + vfnmsub132<ssescalarmodesuffix>\t{<round_op6>%2, %3, > %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>3, %<iptr>2<round_op6>} > + vfnmsub213<ssescalarmodesuffix>\t{<round_op6>%3, %2, > %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>2, %<iptr>3<round_op6>}" > + [(set_attr "type" "ssemuladd") > + (set_attr "mode" "<MODE>")]) > + > ;; FMA4 floating point scalar intrinsics. These write the > ;; entire destination register, with the high-order elements zeroed. > > --- gcc/config/i386/avx512fintrin.h.jj 2019-03-07 20:09:39.692753807 +0100 > +++ gcc/config/i386/avx512fintrin.h 2019-03-21 16:03:20.356277036 +0100 > @@ -11562,6 +11562,608 @@ _mm_fnmsub_round_ss (__m128 __W, __m128 > (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), -(C), R) > #endif > > +extern __inline __m128d > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_mask_fmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) > +{ > + return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W, > + (__v2df) __A, > + (__v2df) __B, > + (__mmask8) __U, > + _MM_FROUND_CUR_DIRECTION); > +} > + > +extern __inline __m128 > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_mask_fmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) > +{ > + return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W, > + (__v4sf) __A, > + (__v4sf) __B, > + (__mmask8) __U, > + _MM_FROUND_CUR_DIRECTION); > +} > + > +extern __inline __m128d > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_mask3_fmadd_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U) > +{ > + return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W, > + (__v2df) __A, > + (__v2df) __B, > + (__mmask8) __U, > + _MM_FROUND_CUR_DIRECTION); > +} > + > +extern __inline __m128 > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_mask3_fmadd_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U) > +{ > + return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W, > + (__v4sf) __A, > + (__v4sf) __B, > + (__mmask8) __U, > + _MM_FROUND_CUR_DIRECTION); > +} > + > +extern __inline __m128d > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_maskz_fmadd_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B) > +{ > + return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W, > + (__v2df) __A, > + (__v2df) __B, > + (__mmask8) __U, > + _MM_FROUND_CUR_DIRECTION); > +} > + > +extern __inline __m128 > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_maskz_fmadd_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B) > +{ > + return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W, > + (__v4sf) __A, > + (__v4sf) __B, > + (__mmask8) __U, > + _MM_FROUND_CUR_DIRECTION); > +} > + > +extern __inline __m128d > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_mask_fmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) > +{ > + return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W, > + (__v2df) __A, > + -(__v2df) __B, > + (__mmask8) __U, > + _MM_FROUND_CUR_DIRECTION); > +} > + > +extern __inline __m128 > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_mask_fmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) > +{ > + return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W, > + (__v4sf) __A, > + -(__v4sf) __B, > + (__mmask8) __U, > + _MM_FROUND_CUR_DIRECTION); > +} > + > +extern __inline __m128d > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_mask3_fmsub_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U) > +{ > + return (__m128d) __builtin_ia32_vfmsubsd3_mask3 ((__v2df) __W, > + (__v2df) __A, > + (__v2df) __B, > + (__mmask8) __U, > + _MM_FROUND_CUR_DIRECTION); > +} > + > +extern __inline __m128 > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_mask3_fmsub_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U) > +{ > + return (__m128) __builtin_ia32_vfmsubss3_mask3 ((__v4sf) __W, > + (__v4sf) __A, > + (__v4sf) __B, > + (__mmask8) __U, > + _MM_FROUND_CUR_DIRECTION); > +} > + > +extern __inline __m128d > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_maskz_fmsub_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B) > +{ > + return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W, > + (__v2df) __A, > + -(__v2df) __B, > + (__mmask8) __U, > + _MM_FROUND_CUR_DIRECTION); > +} > + > +extern __inline __m128 > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_maskz_fmsub_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B) > +{ > + return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W, > + (__v4sf) __A, > + -(__v4sf) __B, > + (__mmask8) __U, > + _MM_FROUND_CUR_DIRECTION); > +} > + > +extern __inline __m128d > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_mask_fnmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) > +{ > + return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W, > + -(__v2df) __A, > + (__v2df) __B, > + (__mmask8) __U, > + _MM_FROUND_CUR_DIRECTION); > +} > + > +extern __inline __m128 > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_mask_fnmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) > +{ > + return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W, > + -(__v4sf) __A, > + (__v4sf) __B, > + (__mmask8) __U, > + _MM_FROUND_CUR_DIRECTION); > +} > + > +extern __inline __m128d > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_mask3_fnmadd_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U) > +{ > + return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W, > + -(__v2df) __A, > + (__v2df) __B, > + (__mmask8) __U, > + _MM_FROUND_CUR_DIRECTION); > +} > + > +extern __inline __m128 > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_mask3_fnmadd_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U) > +{ > + return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W, > + -(__v4sf) __A, > + (__v4sf) __B, > + (__mmask8) __U, > + _MM_FROUND_CUR_DIRECTION); > +} > + > +extern __inline __m128d > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_maskz_fnmadd_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B) > +{ > + return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W, > + -(__v2df) __A, > + (__v2df) __B, > + (__mmask8) __U, > + _MM_FROUND_CUR_DIRECTION); > +} > + > +extern __inline __m128 > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_maskz_fnmadd_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B) > +{ > + return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W, > + -(__v4sf) __A, > + (__v4sf) __B, > + (__mmask8) __U, > + _MM_FROUND_CUR_DIRECTION); > +} > + > +extern __inline __m128d > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_mask_fnmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) > +{ > + return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W, > + -(__v2df) __A, > + -(__v2df) __B, > + (__mmask8) __U, > + _MM_FROUND_CUR_DIRECTION); > +} > + > +extern __inline __m128 > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_mask_fnmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) > +{ > + return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W, > + -(__v4sf) __A, > + -(__v4sf) __B, > + (__mmask8) __U, > + _MM_FROUND_CUR_DIRECTION); > +} > + > +extern __inline __m128d > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_mask3_fnmsub_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U) > +{ > + return (__m128d) __builtin_ia32_vfmsubsd3_mask3 ((__v2df) __W, > + -(__v2df) __A, > + (__v2df) __B, > + (__mmask8) __U, > + _MM_FROUND_CUR_DIRECTION); > +} > + > +extern __inline __m128 > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_mask3_fnmsub_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U) > +{ > + return (__m128) __builtin_ia32_vfmsubss3_mask3 ((__v4sf) __W, > + -(__v4sf) __A, > + (__v4sf) __B, > + (__mmask8) __U, > + _MM_FROUND_CUR_DIRECTION); > +} > + > +extern __inline __m128d > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_maskz_fnmsub_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B) > +{ > + return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W, > + -(__v2df) __A, > + -(__v2df) __B, > + (__mmask8) __U, > + _MM_FROUND_CUR_DIRECTION); > +} > + > +extern __inline __m128 > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_maskz_fnmsub_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B) > +{ > + return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W, > + -(__v4sf) __A, > + -(__v4sf) __B, > + (__mmask8) __U, > + _MM_FROUND_CUR_DIRECTION); > +} > + > +#ifdef __OPTIMIZE__ > +extern __inline __m128d > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_mask_fmadd_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B, > + const int __R) > +{ > + return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W, > + (__v2df) __A, > + (__v2df) __B, > + (__mmask8) __U, __R); > +} > + > +extern __inline __m128 > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_mask_fmadd_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B, > + const int __R) > +{ > + return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W, > + (__v4sf) __A, > + (__v4sf) __B, > + (__mmask8) __U, __R); > +} > + > +extern __inline __m128d > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_mask3_fmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 > __U, > + const int __R) > +{ > + return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W, > + (__v2df) __A, > + (__v2df) __B, > + (__mmask8) __U, __R); > +} > + > +extern __inline __m128 > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_mask3_fmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U, > + const int __R) > +{ > + return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W, > + (__v4sf) __A, > + (__v4sf) __B, > + (__mmask8) __U, __R); > +} > + > +extern __inline __m128d > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_maskz_fmadd_round_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d > __B, > + const int __R) > +{ > + return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W, > + (__v2df) __A, > + (__v2df) __B, > + (__mmask8) __U, __R); > +} > + > +extern __inline __m128 > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_maskz_fmadd_round_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B, > + const int __R) > +{ > + return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W, > + (__v4sf) __A, > + (__v4sf) __B, > + (__mmask8) __U, __R); > +} > + > +extern __inline __m128d > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_mask_fmsub_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B, > + const int __R) > +{ > + return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W, > + (__v2df) __A, > + -(__v2df) __B, > + (__mmask8) __U, __R); > +} > + > +extern __inline __m128 > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_mask_fmsub_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B, > + const int __R) > +{ > + return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W, > + (__v4sf) __A, > + -(__v4sf) __B, > + (__mmask8) __U, __R); > +} > + > +extern __inline __m128d > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_mask3_fmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 > __U, > + const int __R) > +{ > + return (__m128d) __builtin_ia32_vfmsubsd3_mask3 ((__v2df) __W, > + (__v2df) __A, > + (__v2df) __B, > + (__mmask8) __U, __R); > +} > + > +extern __inline __m128 > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_mask3_fmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U, > + const int __R) > +{ > + return (__m128) __builtin_ia32_vfmsubss3_mask3 ((__v4sf) __W, > + (__v4sf) __A, > + (__v4sf) __B, > + (__mmask8) __U, __R); > +} > + > +extern __inline __m128d > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_maskz_fmsub_round_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d > __B, > + const int __R) > +{ > + return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W, > + (__v2df) __A, > + -(__v2df) __B, > + (__mmask8) __U, __R); > +} > + > +extern __inline __m128 > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_maskz_fmsub_round_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B, > + const int __R) > +{ > + return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W, > + (__v4sf) __A, > + -(__v4sf) __B, > + (__mmask8) __U, __R); > +} > + > +extern __inline __m128d > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_mask_fnmadd_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d > __B, > + const int __R) > +{ > + return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W, > + -(__v2df) __A, > + (__v2df) __B, > + (__mmask8) __U, __R); > +} > + > +extern __inline __m128 > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_mask_fnmadd_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B, > + const int __R) > +{ > + return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W, > + -(__v4sf) __A, > + (__v4sf) __B, > + (__mmask8) __U, __R); > +} > + > +extern __inline __m128d > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_mask3_fnmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 > __U, > + const int __R) > +{ > + return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W, > + -(__v2df) __A, > + (__v2df) __B, > + (__mmask8) __U, __R); > +} > + > +extern __inline __m128 > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_mask3_fnmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U, > + const int __R) > +{ > + return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W, > + -(__v4sf) __A, > + (__v4sf) __B, > + (__mmask8) __U, __R); > +} > + > +extern __inline __m128d > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_maskz_fnmadd_round_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d > __B, > + const int __R) > +{ > + return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W, > + -(__v2df) __A, > + (__v2df) __B, > + (__mmask8) __U, __R); > +} > + > +extern __inline __m128 > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_maskz_fnmadd_round_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B, > + const int __R) > +{ > + return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W, > + -(__v4sf) __A, > + (__v4sf) __B, > + (__mmask8) __U, __R); > +} > + > +extern __inline __m128d > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_mask_fnmsub_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d > __B, > + const int __R) > +{ > + return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W, > + -(__v2df) __A, > + -(__v2df) __B, > + (__mmask8) __U, __R); > +} > + > +extern __inline __m128 > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_mask_fnmsub_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B, > + const int __R) > +{ > + return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W, > + -(__v4sf) __A, > + -(__v4sf) __B, > + (__mmask8) __U, __R); > +} > + > +extern __inline __m128d > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_mask3_fnmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 > __U, > + const int __R) > +{ > + return (__m128d) __builtin_ia32_vfmsubsd3_mask3 ((__v2df) __W, > + -(__v2df) __A, > + (__v2df) __B, > + (__mmask8) __U, __R); > +} > + > +extern __inline __m128 > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_mask3_fnmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U, > + const int __R) > +{ > + return (__m128) __builtin_ia32_vfmsubss3_mask3 ((__v4sf) __W, > + -(__v4sf) __A, > + (__v4sf) __B, > + (__mmask8) __U, __R); > +} > + > +extern __inline __m128d > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_maskz_fnmsub_round_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d > __B, > + const int __R) > +{ > + return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W, > + -(__v2df) __A, > + -(__v2df) __B, > + (__mmask8) __U, __R); > +} > + > +extern __inline __m128 > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_maskz_fnmsub_round_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B, > + const int __R) > +{ > + return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W, > + -(__v4sf) __A, > + -(__v4sf) __B, > + (__mmask8) __U, __R); > +} > +#else > +#define _mm_mask_fmadd_round_sd(A, U, B, C, R) \ > + (__m128d) __builtin_ia32_vfmaddsd3_mask (A, B, C, U, R) > + > +#define _mm_mask_fmadd_round_ss(A, U, B, C, R) \ > + (__m128) __builtin_ia32_vfmaddss3_mask (A, B, C, U, R) > + > +#define _mm_mask3_fmadd_round_sd(A, B, C, U, R) \ > + (__m128d) __builtin_ia32_vfmaddsd3_mask3 (A, B, C, U, R) > + > +#define _mm_mask3_fmadd_round_ss(A, B, C, U, R) \ > + (__m128) __builtin_ia32_vfmaddss3_mask3 (A, B, C, U, R) > + > +#define _mm_maskz_fmadd_round_sd(U, A, B, C, R) \ > + (__m128d) __builtin_ia32_vfmaddsd3_mask3 (A, B, C, U, R) > + > +#define _mm_maskz_fmadd_round_ss(U, A, B, C, R) \ > + (__m128) __builtin_ia32_vfmaddss3_mask3 (A, B, C, U, R) > + > +#define _mm_mask_fmsub_round_sd(A, U, B, C, R) \ > + (__m128d) __builtin_ia32_vfmaddsd3_mask (A, B, -(C), U, R) > + > +#define _mm_mask_fmsub_round_ss(A, U, B, C, R) \ > + (__m128) __builtin_ia32_vfmaddss3_mask (A, B, -(C), U, R) > + > +#define _mm_mask3_fmsub_round_sd(A, B, C, U, R) \ > + (__m128d) __builtin_ia32_vfmsubsd3_mask3 (A, B, C, U, R) > + > +#define _mm_mask3_fmsub_round_ss(A, B, C, U, R) \ > + (__m128) __builtin_ia32_vfmsubss3_mask3 (A, B, C, U, R) > + > +#define _mm_maskz_fmsub_round_sd(U, A, B, C, R) \ > + (__m128d) __builtin_ia32_vfmaddsd3_mask3 (A, B, -(C), U, R) > + > +#define _mm_maskz_fmsub_round_ss(U, A, B, C, R) \ > + (__m128) __builtin_ia32_vfmaddss3_mask3 (A, B, -(C), U, R) > + > +#define _mm_mask_fnmadd_round_sd(A, U, B, C, R) \ > + (__m128d) __builtin_ia32_vfmaddsd3_mask (A, -(B), C, U, R) > + > +#define _mm_mask_fnmadd_round_ss(A, U, B, C, R) \ > + (__m128) __builtin_ia32_vfmaddss3_mask (A, -(B), C, U, R) > + > +#define _mm_mask3_fnmadd_round_sd(A, B, C, U, R) \ > + (__m128d) __builtin_ia32_vfmaddsd3_mask3 (A, -(B), C, U, R) > + > +#define _mm_mask3_fnmadd_round_ss(A, B, C, U, R) \ > + (__m128) __builtin_ia32_vfmaddss3_mask3 (A, -(B), C, U, R) > + > +#define _mm_maskz_fnmadd_round_sd(U, A, B, C, R) \ > + (__m128d) __builtin_ia32_vfmaddsd3_mask3 (A, -(B), C, U, R) > + > +#define _mm_maskz_fnmadd_round_ss(U, A, B, C, R) \ > + (__m128) __builtin_ia32_vfmaddss3_mask3 (A, -(B), C, U, R) > + > +#define _mm_mask_fnmsub_round_sd(A, U, B, C, R) \ > + (__m128d) __builtin_ia32_vfmaddsd3_mask (A, -(B), -(C), U, R) > + > +#define _mm_mask_fnmsub_round_ss(A, U, B, C, R) \ > + (__m128) __builtin_ia32_vfmaddss3_mask (A, -(B), -(C), U, R) > + > +#define _mm_mask3_fnmsub_round_sd(A, B, C, U, R) \ > + (__m128d) __builtin_ia32_vfmsubsd3_mask3 (A, -(B), C, U, R) > + > +#define _mm_mask3_fnmsub_round_ss(A, B, C, U, R) \ > + (__m128) __builtin_ia32_vfmsubss3_mask3 (A, -(B), C, U, R) > + > +#define _mm_maskz_fnmsub_round_sd(U, A, B, C, R) \ > + (__m128d) __builtin_ia32_vfmaddsd3_mask3 (A, -(B), -(C), U, R) > + > +#define _mm_maskz_fnmsub_round_ss(U, A, B, C, R) \ > + (__m128) __builtin_ia32_vfmaddss3_mask3 (A, -(B), -(C), U, R) > +#endif > + > #ifdef __OPTIMIZE__ > extern __inline int > __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > --- gcc/testsuite/gcc.target/i386/sse-13.c.jj 2019-01-17 13:19:59.630491720 > +0100 > +++ gcc/testsuite/gcc.target/i386/sse-13.c 2019-03-21 18:55:25.337713848 > +0100 > @@ -390,6 +390,14 @@ > #define __builtin_ia32_vfnmsubps512_maskz(A, B, C, D, E) > __builtin_ia32_vfnmsubps512_maskz(A, B, C, D, 8) > #define __builtin_ia32_vpermilpd512_mask(A, E, C, D) > __builtin_ia32_vpermilpd512_mask(A, 1, C, D) > #define __builtin_ia32_vpermilps512_mask(A, E, C, D) > __builtin_ia32_vpermilps512_mask(A, 1, C, D) > +#define __builtin_ia32_vfmaddsd3_mask(A, B, C, D, E) > __builtin_ia32_vfmaddsd3_mask(A, B, C, D, 8) > +#define __builtin_ia32_vfmaddsd3_mask3(A, B, C, D, E) > __builtin_ia32_vfmaddsd3_mask3(A, B, C, D, 8) > +#define __builtin_ia32_vfmaddsd3_maskz(A, B, C, D, E) > __builtin_ia32_vfmaddsd3_maskz(A, B, C, D, 8) > +#define __builtin_ia32_vfmsubsd3_mask3(A, B, C, D, E) > __builtin_ia32_vfmsubsd3_mask3(A, B, C, D, 8) > +#define __builtin_ia32_vfmaddss3_mask(A, B, C, D, E) > __builtin_ia32_vfmaddss3_mask(A, B, C, D, 8) > +#define __builtin_ia32_vfmaddss3_mask3(A, B, C, D, E) > __builtin_ia32_vfmaddss3_mask3(A, B, C, D, 8) > +#define __builtin_ia32_vfmaddss3_maskz(A, B, C, D, E) > __builtin_ia32_vfmaddss3_maskz(A, B, C, D, 8) > +#define __builtin_ia32_vfmsubss3_mask3(A, B, C, D, E) > __builtin_ia32_vfmsubss3_mask3(A, B, C, D, 8) > > /* avx512erintrin.h */ > #define __builtin_ia32_exp2ps_mask(A, B, C, D) __builtin_ia32_exp2ps_mask(A, > B, C, 8) > --- gcc/testsuite/gcc.target/i386/sse-14.c.jj 2019-01-17 13:19:59.562492823 > +0100 > +++ gcc/testsuite/gcc.target/i386/sse-14.c 2019-03-21 19:07:11.272369562 > +0100 > @@ -534,6 +534,30 @@ test_4 (_mm512_maskz_fnmsub_round_pd, __ > test_4 (_mm512_maskz_fnmsub_round_ps, __m512, __mmask16, __m512, __m512, > __m512, 9) > test_4 (_mm512_maskz_ternarylogic_epi32, __m512i, __mmask16, __m512i, > __m512i, __m512i, 1) > test_4 (_mm512_maskz_ternarylogic_epi64, __m512i, __mmask8, __m512i, > __m512i, __m512i, 1) > +test_4 (_mm_mask_fmadd_round_sd, __m128d, __m128d, __mmask8, __m128d, > __m128d, 9) > +test_4 (_mm_mask_fmadd_round_ss, __m128, __m128, __mmask8, __m128, __m128, 9) > +test_4 (_mm_mask3_fmadd_round_sd, __m128d, __m128d, __m128d, __m128d, > __mmask8, 9) > +test_4 (_mm_mask3_fmadd_round_ss, __m128, __m128, __m128, __m128, __mmask8, > 9) > +test_4 (_mm_maskz_fmadd_round_sd, __m128d, __mmask8, __m128d, __m128d, > __m128d, 9) > +test_4 (_mm_maskz_fmadd_round_ss, __m128, __mmask8, __m128, __m128, __m128, > 9) > +test_4 (_mm_mask_fmsub_round_sd, __m128d, __m128d, __mmask8, __m128d, > __m128d, 9) > +test_4 (_mm_mask_fmsub_round_ss, __m128, __m128, __mmask8, __m128, __m128, 9) > +test_4 (_mm_mask3_fmsub_round_sd, __m128d, __m128d, __m128d, __m128d, > __mmask8, 9) > +test_4 (_mm_mask3_fmsub_round_ss, __m128, __m128, __m128, __m128, __mmask8, > 9) > +test_4 (_mm_maskz_fmsub_round_sd, __m128d, __mmask8, __m128d, __m128d, > __m128d, 9) > +test_4 (_mm_maskz_fmsub_round_ss, __m128, __mmask8, __m128, __m128, __m128, > 9) > +test_4 (_mm_mask_fnmadd_round_sd, __m128d, __m128d, __mmask8, __m128d, > __m128d, 9) > +test_4 (_mm_mask_fnmadd_round_ss, __m128, __m128, __mmask8, __m128, __m128, > 9) > +test_4 (_mm_mask3_fnmadd_round_sd, __m128d, __m128d, __m128d, __m128d, > __mmask8, 9) > +test_4 (_mm_mask3_fnmadd_round_ss, __m128, __m128, __m128, __m128, __mmask8, > 9) > +test_4 (_mm_maskz_fnmadd_round_sd, __m128d, __mmask8, __m128d, __m128d, > __m128d, 9) > +test_4 (_mm_maskz_fnmadd_round_ss, __m128, __mmask8, __m128, __m128, __m128, > 9) > +test_4 (_mm_mask_fnmsub_round_sd, __m128d, __m128d, __mmask8, __m128d, > __m128d, 9) > +test_4 (_mm_mask_fnmsub_round_ss, __m128, __m128, __mmask8, __m128, __m128, > 9) > +test_4 (_mm_mask3_fnmsub_round_sd, __m128d, __m128d, __m128d, __m128d, > __mmask8, 9) > +test_4 (_mm_mask3_fnmsub_round_ss, __m128, __m128, __m128, __m128, __mmask8, > 9) > +test_4 (_mm_maskz_fnmsub_round_sd, __m128d, __mmask8, __m128d, __m128d, > __m128d, 9) > +test_4 (_mm_maskz_fnmsub_round_ss, __m128, __mmask8, __m128, __m128, __m128, > 9) > test_4v (_mm512_mask_i32scatter_epi32, void *, __mmask16, __m512i, __m512i, > 1) > test_4v (_mm512_mask_i32scatter_epi64, void *, __mmask8, __m256i, __m512i, 1) > test_4v (_mm512_mask_i32scatter_pd, void *, __mmask8, __m256i, __m512d, 1) > --- gcc/testsuite/gcc.target/i386/sse-22.c.jj 2019-01-17 13:19:59.572492661 > +0100 > +++ gcc/testsuite/gcc.target/i386/sse-22.c 2019-03-21 19:21:00.811042629 > +0100 > @@ -633,6 +633,30 @@ test_4 (_mm512_maskz_fnmsub_round_pd, __ > test_4 (_mm512_maskz_fnmsub_round_ps, __m512, __mmask16, __m512, __m512, > __m512, 9) > test_4 (_mm512_maskz_ternarylogic_epi32, __m512i, __mmask16, __m512i, > __m512i, __m512i, 1) > test_4 (_mm512_maskz_ternarylogic_epi64, __m512i, __mmask8, __m512i, > __m512i, __m512i, 1) > +test_4 (_mm_mask_fmadd_round_sd, __m128d, __m128d, __mmask8, __m128d, > __m128d, 9) > +test_4 (_mm_mask_fmadd_round_ss, __m128, __m128, __mmask8, __m128, __m128, 9) > +test_4 (_mm_mask3_fmadd_round_sd, __m128d, __m128d, __m128d, __m128d, > __mmask8, 9) > +test_4 (_mm_mask3_fmadd_round_ss, __m128, __m128, __m128, __m128, __mmask8, > 9) > +test_4 (_mm_maskz_fmadd_round_sd, __m128d, __mmask8, __m128d, __m128d, > __m128d, 9) > +test_4 (_mm_maskz_fmadd_round_ss, __m128, __mmask8, __m128, __m128, __m128, > 9) > +test_4 (_mm_mask_fmsub_round_sd, __m128d, __m128d, __mmask8, __m128d, > __m128d, 9) > +test_4 (_mm_mask_fmsub_round_ss, __m128, __m128, __mmask8, __m128, __m128, 9) > +test_4 (_mm_mask3_fmsub_round_sd, __m128d, __m128d, __m128d, __m128d, > __mmask8, 9) > +test_4 (_mm_mask3_fmsub_round_ss, __m128, __m128, __m128, __m128, __mmask8, > 9) > +test_4 (_mm_maskz_fmsub_round_sd, __m128d, __mmask8, __m128d, __m128d, > __m128d, 9) > +test_4 (_mm_maskz_fmsub_round_ss, __m128, __mmask8, __m128, __m128, __m128, > 9) > +test_4 (_mm_mask_fnmadd_round_sd, __m128d, __m128d, __mmask8, __m128d, > __m128d, 9) > +test_4 (_mm_mask_fnmadd_round_ss, __m128, __m128, __mmask8, __m128, __m128, > 9) > +test_4 (_mm_mask3_fnmadd_round_sd, __m128d, __m128d, __m128d, __m128d, > __mmask8, 9) > +test_4 (_mm_mask3_fnmadd_round_ss, __m128, __m128, __m128, __m128, __mmask8, > 9) > +test_4 (_mm_maskz_fnmadd_round_sd, __m128d, __mmask8, __m128d, __m128d, > __m128d, 9) > +test_4 (_mm_maskz_fnmadd_round_ss, __m128, __mmask8, __m128, __m128, __m128, > 9) > +test_4 (_mm_mask_fnmsub_round_sd, __m128d, __m128d, __mmask8, __m128d, > __m128d, 9) > +test_4 (_mm_mask_fnmsub_round_ss, __m128, __m128, __mmask8, __m128, __m128, > 9) > +test_4 (_mm_mask3_fnmsub_round_sd, __m128d, __m128d, __m128d, __m128d, > __mmask8, 9) > +test_4 (_mm_mask3_fnmsub_round_ss, __m128, __m128, __m128, __m128, __mmask8, > 9) > +test_4 (_mm_maskz_fnmsub_round_sd, __m128d, __mmask8, __m128d, __m128d, > __m128d, 9) > +test_4 (_mm_maskz_fnmsub_round_ss, __m128, __mmask8, __m128, __m128, __m128, > 9) > test_4v (_mm512_mask_i32scatter_epi32, void *, __mmask16, __m512i, __m512i, > 1) > test_4v (_mm512_mask_i32scatter_epi64, void *, __mmask8, __m256i, __m512i, 1) > test_4v (_mm512_mask_i32scatter_pd, void *, __mmask8, __m256i, __m512d, 1) > --- gcc/testsuite/gcc.target/i386/sse-23.c.jj 2019-01-17 13:19:59.605492126 > +0100 > +++ gcc/testsuite/gcc.target/i386/sse-23.c 2019-03-21 19:20:06.577913267 > +0100 > @@ -389,6 +389,14 @@ > #define __builtin_ia32_vfnmsubps512_maskz(A, B, C, D, E) > __builtin_ia32_vfnmsubps512_maskz(A, B, C, D, 8) > #define __builtin_ia32_vpermilpd512_mask(A, E, C, D) > __builtin_ia32_vpermilpd512_mask(A, 1, C, D) > #define __builtin_ia32_vpermilps512_mask(A, E, C, D) > __builtin_ia32_vpermilps512_mask(A, 1, C, D) > +#define __builtin_ia32_vfmaddsd3_mask(A, B, C, D, E) > __builtin_ia32_vfmaddsd3_mask(A, B, C, D, 8) > +#define __builtin_ia32_vfmaddsd3_mask3(A, B, C, D, E) > __builtin_ia32_vfmaddsd3_mask3(A, B, C, D, 8) > +#define __builtin_ia32_vfmaddsd3_maskz(A, B, C, D, E) > __builtin_ia32_vfmaddsd3_maskz(A, B, C, D, 8) > +#define __builtin_ia32_vfmsubsd3_mask3(A, B, C, D, E) > __builtin_ia32_vfmsubsd3_mask3(A, B, C, D, 8) > +#define __builtin_ia32_vfmaddss3_mask(A, B, C, D, E) > __builtin_ia32_vfmaddss3_mask(A, B, C, D, 8) > +#define __builtin_ia32_vfmaddss3_mask3(A, B, C, D, E) > __builtin_ia32_vfmaddss3_mask3(A, B, C, D, 8) > +#define __builtin_ia32_vfmaddss3_maskz(A, B, C, D, E) > __builtin_ia32_vfmaddss3_maskz(A, B, C, D, 8) > +#define __builtin_ia32_vfmsubss3_mask3(A, B, C, D, E) > __builtin_ia32_vfmsubss3_mask3(A, B, C, D, 8) > > /* avx512pfintrin.h */ > #define __builtin_ia32_gatherpfdps(A, B, C, D, E) > __builtin_ia32_gatherpfdps(A, B, C, 1, _MM_HINT_T0) > --- gcc/testsuite/gcc.target/i386/avx-1.c.jj 2019-01-17 13:19:59.630491720 > +0100 > +++ gcc/testsuite/gcc.target/i386/avx-1.c 2019-03-21 22:48:49.417925947 > +0100 > @@ -373,6 +373,14 @@ > #define __builtin_ia32_vfnmsubps512_maskz(A, B, C, D, E) > __builtin_ia32_vfnmsubps512_maskz(A, B, C, D, 8) > #define __builtin_ia32_vpermilpd512_mask(A, E, C, D) > __builtin_ia32_vpermilpd512_mask(A, 1, C, D) > #define __builtin_ia32_vpermilps512_mask(A, E, C, D) > __builtin_ia32_vpermilps512_mask(A, 1, C, D) > +#define __builtin_ia32_vfmaddsd3_mask(A, B, C, D, E) > __builtin_ia32_vfmaddsd3_mask(A, B, C, D, 8) > +#define __builtin_ia32_vfmaddsd3_mask3(A, B, C, D, E) > __builtin_ia32_vfmaddsd3_mask3(A, B, C, D, 8) > +#define __builtin_ia32_vfmaddsd3_maskz(A, B, C, D, E) > __builtin_ia32_vfmaddsd3_maskz(A, B, C, D, 8) > +#define __builtin_ia32_vfmsubsd3_mask3(A, B, C, D, E) > __builtin_ia32_vfmsubsd3_mask3(A, B, C, D, 8) > +#define __builtin_ia32_vfmaddss3_mask(A, B, C, D, E) > __builtin_ia32_vfmaddss3_mask(A, B, C, D, 8) > +#define __builtin_ia32_vfmaddss3_mask3(A, B, C, D, E) > __builtin_ia32_vfmaddss3_mask3(A, B, C, D, 8) > +#define __builtin_ia32_vfmaddss3_maskz(A, B, C, D, E) > __builtin_ia32_vfmaddss3_maskz(A, B, C, D, 8) > +#define __builtin_ia32_vfmsubss3_mask3(A, B, C, D, E) > __builtin_ia32_vfmsubss3_mask3(A, B, C, D, 8) > > /* avx512erintrin.h */ > #define __builtin_ia32_exp2ps_mask(A, B, C, D) __builtin_ia32_exp2ps_mask(A, > B, C, 8) > --- gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXsd-1.c.jj 2016-05-22 > 12:20:31.115669754 +0200 > +++ gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXsd-1.c 2019-03-22 > 09:28:16.211207123 +0100 > @@ -1,13 +1,26 @@ > /* { dg-do compile } */ > /* { dg-options "-mavx512f -O2" } */ > +/* { dg-final { scan-assembler-times "vfmadd...sd\[ > \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */ > +/* { dg-final { scan-assembler-times "vfmadd231sd\[ > \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ > +/* { dg-final { scan-assembler-times "vfmadd...sd\[ > \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ > /* { dg-final { scan-assembler-times "vfmadd...sd\[ > \\t\]+\[^\n\]*\{rn-sae\}\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ > +/* { dg-final { scan-assembler-times "vfmadd...sd\[ > \\t\]+\[^\n\]*\{rd-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" > 1 } } */ > +/* { dg-final { scan-assembler-times "vfmadd231sd\[ > \\t\]+\[^\n\]*\{ru-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" > 1 } } */ > +/* { dg-final { scan-assembler-times "vfmadd...sd\[ > \\t\]+\[^\n\]*\{rz-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ > \\t\]+#)" 1 } } */ > > #include <immintrin.h> > > volatile __m128d a, b, c; > +volatile __mmask8 m; > > void extern > avx512f_test (void) > { > + a = _mm_mask_fmadd_sd (a, m, b, c); > + c = _mm_mask3_fmadd_sd (a, b, c, m); > + a = _mm_maskz_fmadd_sd (m, a, b, c); > a = _mm_fmadd_round_sd (a, b, c, _MM_FROUND_TO_NEAREST_INT | > _MM_FROUND_NO_EXC); > + a = _mm_mask_fmadd_round_sd (a, m, b, c, _MM_FROUND_TO_NEG_INF | > _MM_FROUND_NO_EXC); > + c = _mm_mask3_fmadd_round_sd (a, b, c, m, _MM_FROUND_TO_POS_INF | > _MM_FROUND_NO_EXC); > + a = _mm_maskz_fmadd_round_sd (m, a, b, c, _MM_FROUND_TO_ZERO | > _MM_FROUND_NO_EXC); > } > --- gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXsd-2.c.jj 2019-03-22 > 09:28:16.211207123 +0100 > +++ gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXsd-2.c 2019-03-22 > 09:28:16.211207123 +0100 > @@ -0,0 +1,94 @@ > +/* { dg-do run } */ > +/* { dg-options "-O2 -mavx512f" } */ > +/* { dg-require-effective-target avx512f } */ > + > +#include "avx512f-check.h" > + > +#include "avx512f-helper.h" > + > +#define SIZE (128 / 64) > +#include "avx512f-mask-type.h" > + > +static void > +calc_add (double *s1, double *s2, double *s3, double* r) > +{ > + r[0] = s1[0] * s2[0] + s3[0]; > + > + int i; > + for (i = 1; i < SIZE; i++) > + r[i] = s1[i]; > +} > + > +static void > +calc_add_3 (double *s1, double *s2, double *s3, double* r) > +{ > + r[0] = s2[0] * s3[0] + s1[0]; > + > + int i; > + for (i = 1; i < SIZE; i++) > + r[i] = s1[i]; > +} > + > +void > +avx512f_test (void) > +{ > + int i, sign; > + union128d res1, res2, res3, res4, res5, res6, res7, src1, src2, src3; > + MASK_TYPE mask = MASK_VALUE; > + double res_ref1[SIZE], res_ref2[SIZE]; > + > + sign = -1; > + for (i = 0; i < SIZE; i++) > + { > + src1.a[i] = DEFAULT_VALUE; > + src2.a[i] = 56.78 * (i + 1) * sign; > + src3.a[i] = 90.12 * (i + 2) * sign; > + sign = sign * -1; > + } > + for (i = 0; i < SIZE; i++) > + { > + res1.a[i] = DEFAULT_VALUE; > + res2.a[i] = DEFAULT_VALUE; > + res5.a[i] = DEFAULT_VALUE; > + res6.a[i] = DEFAULT_VALUE; > + } > + > + calc_add (src1.a, src2.a, src3.a, res_ref1); > + calc_add_3(src1.a, src2.a, src3.a, res_ref2); > + > + res1.x = _mm_mask_fmadd_sd (src1.x, mask, src2.x, src3.x); > + res2.x = _mm_mask3_fmadd_sd (src2.x, src3.x, src1.x, mask); > + res3.x = _mm_maskz_fmadd_sd (mask, src1.x, src2.x, src3.x); > + res4.x = _mm_fmadd_round_sd (src1.x, src2.x, src3.x, _MM_FROUND_NO_EXC); > + res5.x = _mm_mask_fmadd_round_sd (src1.x, mask, src2.x, src3.x, > _MM_FROUND_NO_EXC); > + res6.x = _mm_mask3_fmadd_round_sd (src2.x, src3.x, src1.x, mask, > _MM_FROUND_NO_EXC); > + res7.x = _mm_maskz_fmadd_round_sd (mask, src1.x, src2.x, src3.x, > _MM_FROUND_NO_EXC); > + > + if (check_union128d (res4, res_ref1)) > + abort(); > + > + MASK_ZERO (d) (res_ref1, mask, 1); > + if (check_union128d (res3, res_ref1)) > + abort (); > + > + MASK_ZERO (d) (res_ref1, mask, 1); > + if (check_union128d (res7, res_ref1)) > + abort (); > + > + MASK_MERGE (d) (res_ref2, mask, 1); > + if (check_union128d (res2, res_ref2)) > + abort (); > + > + MASK_MERGE (d) (res_ref2, mask, 1); > + if (check_union128d (res6, res_ref2)) > + abort (); > + > + MASK_MERGE (d) (res_ref1, mask, 1); > + if (check_union128d (res1, res_ref1)) > + abort (); > + > + MASK_MERGE (d) (res_ref1, mask, 1); > + if (check_union128d (res5, res_ref1)) > + abort (); > +} > + > --- gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXss-1.c.jj 2016-05-22 > 12:20:12.960915693 +0200 > +++ gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXss-1.c 2019-03-22 > 09:28:16.217207025 +0100 > @@ -1,13 +1,26 @@ > /* { dg-do compile } */ > /* { dg-options "-mavx512f -O2" } */ > +/* { dg-final { scan-assembler-times "vfmadd...ss\[ > \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */ > +/* { dg-final { scan-assembler-times "vfmadd231ss\[ > \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ > +/* { dg-final { scan-assembler-times "vfmadd...ss\[ > \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ > /* { dg-final { scan-assembler-times "vfmadd...ss\[ > \\t\]+\[^\n\]*\{rn-sae\}\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ > +/* { dg-final { scan-assembler-times "vfmadd...ss\[ > \\t\]+\[^\n\]*\{rd-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" > 1 } } */ > +/* { dg-final { scan-assembler-times "vfmadd231ss\[ > \\t\]+\[^\n\]*\{ru-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" > 1 } } */ > +/* { dg-final { scan-assembler-times "vfmadd...ss\[ > \\t\]+\[^\n\]*\{rz-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ > \\t\]+#)" 1 } } */ > > #include <immintrin.h> > > volatile __m128 a, b, c; > +volatile __mmask8 m; > > void extern > avx512f_test (void) > { > + a = _mm_mask_fmadd_ss (a, m, b, c); > + c = _mm_mask3_fmadd_ss (a, b, c, m); > + a = _mm_maskz_fmadd_ss (m, a, b, c); > a = _mm_fmadd_round_ss (a, b, c, _MM_FROUND_TO_NEAREST_INT | > _MM_FROUND_NO_EXC); > + a = _mm_mask_fmadd_round_ss (a, m, b, c, _MM_FROUND_TO_NEG_INF | > _MM_FROUND_NO_EXC); > + c = _mm_mask3_fmadd_round_ss (a, b, c, m, _MM_FROUND_TO_POS_INF | > _MM_FROUND_NO_EXC); > + a = _mm_maskz_fmadd_round_ss (m, a, b, c, _MM_FROUND_TO_ZERO | > _MM_FROUND_NO_EXC); > } > --- gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXss-2.c.jj 2019-03-22 > 09:28:16.217207025 +0100 > +++ gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXss-2.c 2019-03-22 > 09:28:16.217207025 +0100 > @@ -0,0 +1,94 @@ > +/* { dg-do run } */ > +/* { dg-options "-O2 -mavx512f" } */ > +/* { dg-require-effective-target avx512f } */ > + > +#include "avx512f-check.h" > + > +#include "avx512f-helper.h" > + > +#define SIZE (128 / 32) > +#include "avx512f-mask-type.h" > + > +static void > +calc_add (float *s1, float *s2, float *s3, float* r) > +{ > + r[0] = s1[0] * s2[0] + s3[0]; > + > + int i; > + for (i = 1; i < SIZE; i++) > + r[i] = s1[i]; > +} > + > +static void > +calc_add_3 (float *s1, float *s2, float *s3, float* r) > +{ > + r[0] = s2[0] * s3[0] + s1[0]; > + > + int i; > + for (i = 1; i < SIZE; i++) > + r[i] = s1[i]; > +} > + > +void > +avx512f_test (void) > +{ > + int i, sign; > + union128 res1, res2, res3, res4, res5, res6, res7, src1, src2, src3; > + MASK_TYPE mask = MASK_VALUE; > + float res_ref1[SIZE], res_ref2[SIZE]; > + > + sign = -1; > + for (i = 0; i < SIZE; i++) > + { > + src1.a[i] = DEFAULT_VALUE; > + src2.a[i] = 56.78 * (i + 1) * sign; > + src3.a[i] = 90.12 * (i + 2) * sign; > + sign = sign * -1; > + } > + for (i = 0; i < SIZE; i++) > + { > + res1.a[i] = DEFAULT_VALUE; > + res2.a[i] = DEFAULT_VALUE; > + res5.a[i] = DEFAULT_VALUE; > + res6.a[i] = DEFAULT_VALUE; > + } > + > + calc_add (src1.a, src2.a, src3.a, res_ref1); > + calc_add_3(src1.a, src2.a, src3.a, res_ref2); > + > + res1.x = _mm_mask_fmadd_ss (src1.x, mask, src2.x, src3.x); > + res2.x = _mm_mask3_fmadd_ss (src2.x, src3.x, src1.x, mask); > + res3.x = _mm_maskz_fmadd_ss (mask, src1.x, src2.x, src3.x); > + res4.x = _mm_fmadd_round_ss (src1.x, src2.x, src3.x, _MM_FROUND_NO_EXC); > + res5.x = _mm_mask_fmadd_round_ss (src1.x, mask, src2.x, src3.x, > _MM_FROUND_NO_EXC); > + res6.x = _mm_mask3_fmadd_round_ss (src2.x, src3.x, src1.x, mask, > _MM_FROUND_NO_EXC); > + res7.x = _mm_maskz_fmadd_round_ss (mask, src1.x, src2.x, src3.x, > _MM_FROUND_NO_EXC); > + > + if (check_union128 (res4, res_ref1)) > + abort(); > + > + MASK_ZERO () (res_ref1, mask, 1); > + if (check_union128 (res3, res_ref1)) > + abort (); > + > + MASK_ZERO () (res_ref1, mask, 1); > + if (check_union128 (res7, res_ref1)) > + abort (); > + > + MASK_MERGE () (res_ref2, mask, 1); > + if (check_union128 (res2, res_ref2)) > + abort (); > + > + MASK_MERGE () (res_ref2, mask, 1); > + if (check_union128 (res6, res_ref2)) > + abort (); > + > + MASK_MERGE () (res_ref1, mask, 1); > + if (check_union128 (res1, res_ref1)) > + abort (); > + > + MASK_MERGE () (res_ref1, mask, 1); > + if (check_union128 (res5, res_ref1)) > + abort (); > +} > + > --- gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXsd-1.c.jj 2016-05-22 > 12:20:19.794823115 +0200 > +++ gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXsd-1.c 2019-03-22 > 09:28:16.227206861 +0100 > @@ -1,13 +1,26 @@ > /* { dg-do compile } */ > /* { dg-options "-mavx512f -O2" } */ > +/* { dg-final { scan-assembler-times "vfmsub...sd\[ > \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */ > +/* { dg-final { scan-assembler-times "vfmsub231sd\[ > \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ > +/* { dg-final { scan-assembler-times "vfmsub...sd\[ > \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ > /* { dg-final { scan-assembler-times "vfmsub...sd\[ > \\t\]+\[^\n\]*\{rn-sae\}\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ > +/* { dg-final { scan-assembler-times "vfmsub...sd\[ > \\t\]+\[^\n\]*\{rd-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" > 1 } } */ > +/* { dg-final { scan-assembler-times "vfmsub231sd\[ > \\t\]+\[^\n\]*\{ru-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" > 1 } } */ > +/* { dg-final { scan-assembler-times "vfmsub...sd\[ > \\t\]+\[^\n\]*\{rz-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ > \\t\]+#)" 1 } } */ > > #include <immintrin.h> > > volatile __m128d a, b, c; > +volatile __mmask8 m; > > void extern > avx512f_test (void) > { > + a = _mm_mask_fmsub_sd (a, m, b, c); > + c = _mm_mask3_fmsub_sd (a, b, c, m); > + a = _mm_maskz_fmsub_sd (m, a, b, c); > a = _mm_fmsub_round_sd (a, b, c, _MM_FROUND_TO_NEAREST_INT | > _MM_FROUND_NO_EXC); > + a = _mm_mask_fmsub_round_sd (a, m, b, c, _MM_FROUND_TO_NEG_INF | > _MM_FROUND_NO_EXC); > + c = _mm_mask3_fmsub_round_sd (a, b, c, m, _MM_FROUND_TO_POS_INF | > _MM_FROUND_NO_EXC); > + a = _mm_maskz_fmsub_round_sd (m, a, b, c, _MM_FROUND_TO_ZERO | > _MM_FROUND_NO_EXC); > } > --- gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXsd-2.c.jj 2019-03-22 > 09:28:16.227206861 +0100 > +++ gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXsd-2.c 2019-03-22 > 09:28:16.227206861 +0100 > @@ -0,0 +1,94 @@ > +/* { dg-do run } */ > +/* { dg-options "-O2 -mavx512f" } */ > +/* { dg-require-effective-target avx512f } */ > + > +#include "avx512f-check.h" > + > +#include "avx512f-helper.h" > + > +#define SIZE (128 / 64) > +#include "avx512f-mask-type.h" > + > +static void > +calc_add (double *s1, double *s2, double *s3, double* r) > +{ > + r[0] = s1[0] * s2[0] - s3[0]; > + > + int i; > + for (i = 1; i < SIZE; i++) > + r[i] = s1[i]; > +} > + > +static void > +calc_add_3 (double *s1, double *s2, double *s3, double* r) > +{ > + r[0] = s2[0] * s3[0] - s1[0]; > + > + int i; > + for (i = 1; i < SIZE; i++) > + r[i] = s1[i]; > +} > + > +void > +avx512f_test (void) > +{ > + int i, sign; > + union128d res1, res2, res3, res4, res5, res6, res7, src1, src2, src3; > + MASK_TYPE mask = MASK_VALUE; > + double res_ref1[SIZE], res_ref2[SIZE]; > + > + sign = -1; > + for (i = 0; i < SIZE; i++) > + { > + src1.a[i] = DEFAULT_VALUE; > + src2.a[i] = 56.78 * (i + 1) * sign; > + src3.a[i] = 90.12 * (i + 2) * sign; > + sign = sign * -1; > + } > + for (i = 0; i < SIZE; i++) > + { > + res1.a[i] = DEFAULT_VALUE; > + res2.a[i] = DEFAULT_VALUE; > + res5.a[i] = DEFAULT_VALUE; > + res6.a[i] = DEFAULT_VALUE; > + } > + > + calc_add (src1.a, src2.a, src3.a, res_ref1); > + calc_add_3(src1.a, src2.a, src3.a, res_ref2); > + > + res1.x = _mm_mask_fmsub_sd (src1.x, mask, src2.x, src3.x); > + res2.x = _mm_mask3_fmsub_sd (src2.x, src3.x, src1.x, mask); > + res3.x = _mm_maskz_fmsub_sd (mask, src1.x, src2.x, src3.x); > + res4.x = _mm_fmsub_round_sd (src1.x, src2.x, src3.x, _MM_FROUND_NO_EXC); > + res5.x = _mm_mask_fmsub_round_sd (src1.x, mask, src2.x, src3.x, > _MM_FROUND_NO_EXC); > + res6.x = _mm_mask3_fmsub_round_sd (src2.x, src3.x, src1.x, mask, > _MM_FROUND_NO_EXC); > + res7.x = _mm_maskz_fmsub_round_sd (mask, src1.x, src2.x, src3.x, > _MM_FROUND_NO_EXC); > + > + if (check_union128d (res4, res_ref1)) > + abort(); > + > + MASK_ZERO (d) (res_ref1, mask, 1); > + if (check_union128d (res3, res_ref1)) > + abort (); > + > + MASK_ZERO (d) (res_ref1, mask, 1); > + if (check_union128d (res7, res_ref1)) > + abort (); > + > + MASK_MERGE (d) (res_ref2, mask, 1); > + if (check_union128d (res2, res_ref2)) > + abort (); > + > + MASK_MERGE (d) (res_ref2, mask, 1); > + if (check_union128d (res6, res_ref2)) > + abort (); > + > + MASK_MERGE (d) (res_ref1, mask, 1); > + if (check_union128d (res1, res_ref1)) > + abort (); > + > + MASK_MERGE (d) (res_ref1, mask, 1); > + if (check_union128d (res5, res_ref1)) > + abort (); > +} > + > --- gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXss-1.c.jj 2016-05-22 > 12:20:04.885025095 +0200 > +++ gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXss-1.c 2019-03-22 > 09:28:16.228206844 +0100 > @@ -1,13 +1,26 @@ > /* { dg-do compile } */ > /* { dg-options "-mavx512f -O2" } */ > +/* { dg-final { scan-assembler-times "vfmsub...ss\[ > \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */ > +/* { dg-final { scan-assembler-times "vfmsub231ss\[ > \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ > +/* { dg-final { scan-assembler-times "vfmsub...ss\[ > \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ > /* { dg-final { scan-assembler-times "vfmsub...ss\[ > \\t\]+\[^\n\]*\{rn-sae\}\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ > +/* { dg-final { scan-assembler-times "vfmsub...ss\[ > \\t\]+\[^\n\]*\{rd-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" > 1 } } */ > +/* { dg-final { scan-assembler-times "vfmsub231ss\[ > \\t\]+\[^\n\]*\{ru-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" > 1 } } */ > +/* { dg-final { scan-assembler-times "vfmsub...ss\[ > \\t\]+\[^\n\]*\{rz-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ > \\t\]+#)" 1 } } */ > > #include <immintrin.h> > > volatile __m128 a, b, c; > +volatile __mmask8 m; > > void extern > avx512f_test (void) > { > + a = _mm_mask_fmsub_ss (a, m, b, c); > + c = _mm_mask3_fmsub_ss (a, b, c, m); > + a = _mm_maskz_fmsub_ss (m, a, b, c); > a = _mm_fmsub_round_ss (a, b, c, _MM_FROUND_TO_NEAREST_INT | > _MM_FROUND_NO_EXC); > + a = _mm_mask_fmsub_round_ss (a, m, b, c, _MM_FROUND_TO_NEG_INF | > _MM_FROUND_NO_EXC); > + c = _mm_mask3_fmsub_round_ss (a, b, c, m, _MM_FROUND_TO_POS_INF | > _MM_FROUND_NO_EXC); > + a = _mm_maskz_fmsub_round_ss (m, a, b, c, _MM_FROUND_TO_ZERO | > _MM_FROUND_NO_EXC); > } > --- gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXss-2.c.jj 2019-03-22 > 09:28:16.228206844 +0100 > +++ gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXss-2.c 2019-03-22 > 09:28:16.228206844 +0100 > @@ -0,0 +1,94 @@ > +/* { dg-do run } */ > +/* { dg-options "-O2 -mavx512f" } */ > +/* { dg-require-effective-target avx512f } */ > + > +#include "avx512f-check.h" > + > +#include "avx512f-helper.h" > + > +#define SIZE (128 / 32) > +#include "avx512f-mask-type.h" > + > +static void > +calc_add (float *s1, float *s2, float *s3, float* r) > +{ > + r[0] = s1[0] * s2[0] - s3[0]; > + > + int i; > + for (i = 1; i < SIZE; i++) > + r[i] = s1[i]; > +} > + > +static void > +calc_add_3 (float *s1, float *s2, float *s3, float* r) > +{ > + r[0] = s2[0] * s3[0] - s1[0]; > + > + int i; > + for (i = 1; i < SIZE; i++) > + r[i] = s1[i]; > +} > + > +void > +avx512f_test (void) > +{ > + int i, sign; > + union128 res1, res2, res3, res4, res5, res6, res7, src1, src2, src3; > + MASK_TYPE mask = MASK_VALUE; > + float res_ref1[SIZE], res_ref2[SIZE]; > + > + sign = -1; > + for (i = 0; i < SIZE; i++) > + { > + src1.a[i] = DEFAULT_VALUE; > + src2.a[i] = 56.78 * (i + 1) * sign; > + src3.a[i] = 90.12 * (i + 2) * sign; > + sign = sign * -1; > + } > + for (i = 0; i < SIZE; i++) > + { > + res1.a[i] = DEFAULT_VALUE; > + res2.a[i] = DEFAULT_VALUE; > + res5.a[i] = DEFAULT_VALUE; > + res6.a[i] = DEFAULT_VALUE; > + } > + > + calc_add (src1.a, src2.a, src3.a, res_ref1); > + calc_add_3(src1.a, src2.a, src3.a, res_ref2); > + > + res1.x = _mm_mask_fmsub_ss (src1.x, mask, src2.x, src3.x); > + res2.x = _mm_mask3_fmsub_ss (src2.x, src3.x, src1.x, mask); > + res3.x = _mm_maskz_fmsub_ss (mask, src1.x, src2.x, src3.x); > + res4.x = _mm_fmsub_round_ss (src1.x, src2.x, src3.x, _MM_FROUND_NO_EXC); > + res5.x = _mm_mask_fmsub_round_ss (src1.x, mask, src2.x, src3.x, > _MM_FROUND_NO_EXC); > + res6.x = _mm_mask3_fmsub_round_ss (src2.x, src3.x, src1.x, mask, > _MM_FROUND_NO_EXC); > + res7.x = _mm_maskz_fmsub_round_ss (mask, src1.x, src2.x, src3.x, > _MM_FROUND_NO_EXC); > + > + if (check_union128 (res4, res_ref1)) > + abort(); > + > + MASK_ZERO () (res_ref1, mask, 1); > + if (check_union128 (res3, res_ref1)) > + abort (); > + > + MASK_ZERO () (res_ref1, mask, 1); > + if (check_union128 (res7, res_ref1)) > + abort (); > + > + MASK_MERGE () (res_ref2, mask, 1); > + if (check_union128 (res2, res_ref2)) > + abort (); > + > + MASK_MERGE () (res_ref2, mask, 1); > + if (check_union128 (res6, res_ref2)) > + abort (); > + > + MASK_MERGE () (res_ref1, mask, 1); > + if (check_union128 (res1, res_ref1)) > + abort (); > + > + MASK_MERGE () (res_ref1, mask, 1); > + if (check_union128 (res5, res_ref1)) > + abort (); > +} > + > --- gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXsd-1.c.jj 2016-05-22 > 12:20:21.273803080 +0200 > +++ gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXsd-1.c 2019-03-22 > 09:28:16.235206729 +0100 > @@ -1,13 +1,26 @@ > /* { dg-do compile } */ > /* { dg-options "-mavx512f -O2" } */ > +/* { dg-final { scan-assembler-times "vfnmadd...sd\[ > \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */ > +/* { dg-final { scan-assembler-times "vfnmadd231sd\[ > \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ > +/* { dg-final { scan-assembler-times "vfnmadd...sd\[ > \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ > /* { dg-final { scan-assembler-times "vfnmadd...sd\[ > \\t\]+\[^\n\]*\{rn-sae\}\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ > +/* { dg-final { scan-assembler-times "vfnmadd...sd\[ > \\t\]+\[^\n\]*\{rd-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" > 1 } } */ > +/* { dg-final { scan-assembler-times "vfnmadd231sd\[ > \\t\]+\[^\n\]*\{ru-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" > 1 } } */ > +/* { dg-final { scan-assembler-times "vfnmadd...sd\[ > \\t\]+\[^\n\]*\{rz-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ > \\t\]+#)" 1 } } */ > > #include <immintrin.h> > > volatile __m128d a, b, c; > +volatile __mmask8 m; > > void extern > avx512f_test (void) > { > + a = _mm_mask_fnmadd_sd (a, m, b, c); > + c = _mm_mask3_fnmadd_sd (a, b, c, m); > + a = _mm_maskz_fnmadd_sd (m, a, b, c); > a = _mm_fnmadd_round_sd (a, b, c, _MM_FROUND_TO_NEAREST_INT | > _MM_FROUND_NO_EXC); > + a = _mm_mask_fnmadd_round_sd (a, m, b, c, _MM_FROUND_TO_NEG_INF | > _MM_FROUND_NO_EXC); > + c = _mm_mask3_fnmadd_round_sd (a, b, c, m, _MM_FROUND_TO_POS_INF | > _MM_FROUND_NO_EXC); > + a = _mm_maskz_fnmadd_round_sd (m, a, b, c, _MM_FROUND_TO_ZERO | > _MM_FROUND_NO_EXC); > } > --- gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXsd-2.c.jj 2019-03-22 > 09:28:16.236206713 +0100 > +++ gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXsd-2.c 2019-03-22 > 09:28:16.236206713 +0100 > @@ -0,0 +1,94 @@ > +/* { dg-do run } */ > +/* { dg-options "-O2 -mavx512f" } */ > +/* { dg-require-effective-target avx512f } */ > + > +#include "avx512f-check.h" > + > +#include "avx512f-helper.h" > + > +#define SIZE (128 / 64) > +#include "avx512f-mask-type.h" > + > +static void > +calc_add (double *s1, double *s2, double *s3, double* r) > +{ > + r[0] = -s1[0] * s2[0] + s3[0]; > + > + int i; > + for (i = 1; i < SIZE; i++) > + r[i] = s1[i]; > +} > + > +static void > +calc_add_3 (double *s1, double *s2, double *s3, double* r) > +{ > + r[0] = -s2[0] * s3[0] + s1[0]; > + > + int i; > + for (i = 1; i < SIZE; i++) > + r[i] = s1[i]; > +} > + > +void > +avx512f_test (void) > +{ > + int i, sign; > + union128d res1, res2, res3, res4, res5, res6, res7, src1, src2, src3; > + MASK_TYPE mask = MASK_VALUE; > + double res_ref1[SIZE], res_ref2[SIZE]; > + > + sign = -1; > + for (i = 0; i < SIZE; i++) > + { > + src1.a[i] = DEFAULT_VALUE; > + src2.a[i] = 56.78 * (i + 1) * sign; > + src3.a[i] = 90.12 * (i + 2) * sign; > + sign = sign * -1; > + } > + for (i = 0; i < SIZE; i++) > + { > + res1.a[i] = DEFAULT_VALUE; > + res2.a[i] = DEFAULT_VALUE; > + res5.a[i] = DEFAULT_VALUE; > + res6.a[i] = DEFAULT_VALUE; > + } > + > + calc_add (src1.a, src2.a, src3.a, res_ref1); > + calc_add_3(src1.a, src2.a, src3.a, res_ref2); > + > + res1.x = _mm_mask_fnmadd_sd (src1.x, mask, src2.x, src3.x); > + res2.x = _mm_mask3_fnmadd_sd (src2.x, src3.x, src1.x, mask); > + res3.x = _mm_maskz_fnmadd_sd (mask, src1.x, src2.x, src3.x); > + res4.x = _mm_fnmadd_round_sd (src1.x, src2.x, src3.x, _MM_FROUND_NO_EXC); > + res5.x = _mm_mask_fnmadd_round_sd (src1.x, mask, src2.x, src3.x, > _MM_FROUND_NO_EXC); > + res6.x = _mm_mask3_fnmadd_round_sd (src2.x, src3.x, src1.x, mask, > _MM_FROUND_NO_EXC); > + res7.x = _mm_maskz_fnmadd_round_sd (mask, src1.x, src2.x, src3.x, > _MM_FROUND_NO_EXC); > + > + if (check_union128d (res4, res_ref1)) > + abort(); > + > + MASK_ZERO (d) (res_ref1, mask, 1); > + if (check_union128d (res3, res_ref1)) > + abort (); > + > + MASK_ZERO (d) (res_ref1, mask, 1); > + if (check_union128d (res7, res_ref1)) > + abort (); > + > + MASK_MERGE (d) (res_ref2, mask, 1); > + if (check_union128d (res2, res_ref2)) > + abort (); > + > + MASK_MERGE (d) (res_ref2, mask, 1); > + if (check_union128d (res6, res_ref2)) > + abort (); > + > + MASK_MERGE (d) (res_ref1, mask, 1); > + if (check_union128d (res1, res_ref1)) > + abort (); > + > + MASK_MERGE (d) (res_ref1, mask, 1); > + if (check_union128d (res5, res_ref1)) > + abort (); > +} > + > --- gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXss-1.c.jj 2016-05-22 > 12:20:06.646001239 +0200 > +++ gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXss-1.c 2019-03-22 > 09:28:16.260206318 +0100 > @@ -1,13 +1,26 @@ > /* { dg-do compile } */ > /* { dg-options "-mavx512f -O2" } */ > +/* { dg-final { scan-assembler-times "vfnmadd...ss\[ > \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */ > +/* { dg-final { scan-assembler-times "vfnmadd231ss\[ > \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ > +/* { dg-final { scan-assembler-times "vfnmadd...ss\[ > \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ > /* { dg-final { scan-assembler-times "vfnmadd...ss\[ > \\t\]+\[^\n\]*\{rn-sae\}\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ > +/* { dg-final { scan-assembler-times "vfnmadd...ss\[ > \\t\]+\[^\n\]*\{rd-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" > 1 } } */ > +/* { dg-final { scan-assembler-times "vfnmadd231ss\[ > \\t\]+\[^\n\]*\{ru-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" > 1 } } */ > +/* { dg-final { scan-assembler-times "vfnmadd...ss\[ > \\t\]+\[^\n\]*\{rz-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ > \\t\]+#)" 1 } } */ > > #include <immintrin.h> > > volatile __m128 a, b, c; > +volatile __mmask8 m; > > void extern > avx512f_test (void) > { > + a = _mm_mask_fnmadd_ss (a, m, b, c); > + c = _mm_mask3_fnmadd_ss (a, b, c, m); > + a = _mm_maskz_fnmadd_ss (m, a, b, c); > a = _mm_fnmadd_round_ss (a, b, c, _MM_FROUND_TO_NEAREST_INT | > _MM_FROUND_NO_EXC); > + a = _mm_mask_fnmadd_round_ss (a, m, b, c, _MM_FROUND_TO_NEG_INF | > _MM_FROUND_NO_EXC); > + c = _mm_mask3_fnmadd_round_ss (a, b, c, m, _MM_FROUND_TO_POS_INF | > _MM_FROUND_NO_EXC); > + a = _mm_maskz_fnmadd_round_ss (m, a, b, c, _MM_FROUND_TO_ZERO | > _MM_FROUND_NO_EXC); > } > --- gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXss-2.c.jj 2019-03-22 > 09:28:16.261206301 +0100 > +++ gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXss-2.c 2019-03-22 > 09:28:16.261206301 +0100 > @@ -0,0 +1,94 @@ > +/* { dg-do run } */ > +/* { dg-options "-O2 -mavx512f" } */ > +/* { dg-require-effective-target avx512f } */ > + > +#include "avx512f-check.h" > + > +#include "avx512f-helper.h" > + > +#define SIZE (128 / 32) > +#include "avx512f-mask-type.h" > + > +static void > +calc_add (float *s1, float *s2, float *s3, float* r) > +{ > + r[0] = -s1[0] * s2[0] + s3[0]; > + > + int i; > + for (i = 1; i < SIZE; i++) > + r[i] = s1[i]; > +} > + > +static void > +calc_add_3 (float *s1, float *s2, float *s3, float* r) > +{ > + r[0] = -s2[0] * s3[0] + s1[0]; > + > + int i; > + for (i = 1; i < SIZE; i++) > + r[i] = s1[i]; > +} > + > +void > +avx512f_test (void) > +{ > + int i, sign; > + union128 res1, res2, res3, res4, res5, res6, res7, src1, src2, src3; > + MASK_TYPE mask = MASK_VALUE; > + float res_ref1[SIZE], res_ref2[SIZE]; > + > + sign = -1; > + for (i = 0; i < SIZE; i++) > + { > + src1.a[i] = DEFAULT_VALUE; > + src2.a[i] = 56.78 * (i + 1) * sign; > + src3.a[i] = 90.12 * (i + 2) * sign; > + sign = sign * -1; > + } > + for (i = 0; i < SIZE; i++) > + { > + res1.a[i] = DEFAULT_VALUE; > + res2.a[i] = DEFAULT_VALUE; > + res5.a[i] = DEFAULT_VALUE; > + res6.a[i] = DEFAULT_VALUE; > + } > + > + calc_add (src1.a, src2.a, src3.a, res_ref1); > + calc_add_3(src1.a, src2.a, src3.a, res_ref2); > + > + res1.x = _mm_mask_fnmadd_ss (src1.x, mask, src2.x, src3.x); > + res2.x = _mm_mask3_fnmadd_ss (src2.x, src3.x, src1.x, mask); > + res3.x = _mm_maskz_fnmadd_ss (mask, src1.x, src2.x, src3.x); > + res4.x = _mm_fnmadd_round_ss (src1.x, src2.x, src3.x, _MM_FROUND_NO_EXC); > + res5.x = _mm_mask_fnmadd_round_ss (src1.x, mask, src2.x, src3.x, > _MM_FROUND_NO_EXC); > + res6.x = _mm_mask3_fnmadd_round_ss (src2.x, src3.x, src1.x, mask, > _MM_FROUND_NO_EXC); > + res7.x = _mm_maskz_fnmadd_round_ss (mask, src1.x, src2.x, src3.x, > _MM_FROUND_NO_EXC); > + > + if (check_union128 (res4, res_ref1)) > + abort(); > + > + MASK_ZERO () (res_ref1, mask, 1); > + if (check_union128 (res3, res_ref1)) > + abort (); > + > + MASK_ZERO () (res_ref1, mask, 1); > + if (check_union128 (res7, res_ref1)) > + abort (); > + > + MASK_MERGE () (res_ref2, mask, 1); > + if (check_union128 (res2, res_ref2)) > + abort (); > + > + MASK_MERGE () (res_ref2, mask, 1); > + if (check_union128 (res6, res_ref2)) > + abort (); > + > + MASK_MERGE () (res_ref1, mask, 1); > + if (check_union128 (res1, res_ref1)) > + abort (); > + > + MASK_MERGE () (res_ref1, mask, 1); > + if (check_union128 (res5, res_ref1)) > + abort (); > +} > + > --- gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXsd-1.c.jj 2016-05-22 > 12:20:14.351896849 +0200 > +++ gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXsd-1.c 2019-03-22 > 09:28:16.269206170 +0100 > @@ -1,13 +1,26 @@ > /* { dg-do compile } */ > /* { dg-options "-mavx512f -O2" } */ > +/* { dg-final { scan-assembler-times "vfnmsub...sd\[ > \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */ > +/* { dg-final { scan-assembler-times "vfnmsub231sd\[ > \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ > +/* { dg-final { scan-assembler-times "vfnmsub...sd\[ > \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ > /* { dg-final { scan-assembler-times "vfnmsub...sd\[ > \\t\]+\[^\n\]*\{rn-sae\}\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ > +/* { dg-final { scan-assembler-times "vfnmsub...sd\[ > \\t\]+\[^\n\]*\{rd-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" > 1 } } */ > +/* { dg-final { scan-assembler-times "vfnmsub231sd\[ > \\t\]+\[^\n\]*\{ru-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" > 1 } } */ > +/* { dg-final { scan-assembler-times "vfnmsub...sd\[ > \\t\]+\[^\n\]*\{rz-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ > \\t\]+#)" 1 } } */ > > #include <immintrin.h> > > volatile __m128d a, b, c; > +volatile __mmask8 m; > > void extern > avx512f_test (void) > { > + a = _mm_mask_fnmsub_sd (a, m, b, c); > + c = _mm_mask3_fnmsub_sd (a, b, c, m); > + a = _mm_maskz_fnmsub_sd (m, a, b, c); > a = _mm_fnmsub_round_sd (a, b, c, _MM_FROUND_TO_NEAREST_INT | > _MM_FROUND_NO_EXC); > + a = _mm_mask_fnmsub_round_sd (a, m, b, c, _MM_FROUND_TO_NEG_INF | > _MM_FROUND_NO_EXC); > + c = _mm_mask3_fnmsub_round_sd (a, b, c, m, _MM_FROUND_TO_POS_INF | > _MM_FROUND_NO_EXC); > + a = _mm_maskz_fnmsub_round_sd (m, a, b, c, _MM_FROUND_TO_ZERO | > _MM_FROUND_NO_EXC); > } > --- gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXsd-2.c.jj 2019-03-22 > 09:28:16.269206170 +0100 > +++ gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXsd-2.c 2019-03-22 > 09:28:16.269206170 +0100 > @@ -0,0 +1,94 @@ > +/* { dg-do run } */ > +/* { dg-options "-O2 -mavx512f" } */ > +/* { dg-require-effective-target avx512f } */ > + > +#include "avx512f-check.h" > + > +#include "avx512f-helper.h" > + > +#define SIZE (128 / 64) > +#include "avx512f-mask-type.h" > + > +static void > +calc_add (double *s1, double *s2, double *s3, double* r) > +{ > + r[0] = -s1[0] * s2[0] - s3[0]; > + > + int i; > + for (i = 1; i < SIZE; i++) > + r[i] = s1[i]; > +} > + > +static void > +calc_add_3 (double *s1, double *s2, double *s3, double* r) > +{ > + r[0] = -s2[0] * s3[0] - s1[0]; > + > + int i; > + for (i = 1; i < SIZE; i++) > + r[i] = s1[i]; > +} > + > +void > +avx512f_test (void) > +{ > + int i, sign; > + union128d res1, res2, res3, res4, res5, res6, res7, src1, src2, src3; > + MASK_TYPE mask = MASK_VALUE; > + double res_ref1[SIZE], res_ref2[SIZE]; > + > + sign = -1; > + for (i = 0; i < SIZE; i++) > + { > + src1.a[i] = DEFAULT_VALUE; > + src2.a[i] = 56.78 * (i + 1) * sign; > + src3.a[i] = 90.12 * (i + 2) * sign; > + sign = sign * -1; > + } > + for (i = 0; i < SIZE; i++) > + { > + res1.a[i] = DEFAULT_VALUE; > + res2.a[i] = DEFAULT_VALUE; > + res5.a[i] = DEFAULT_VALUE; > + res6.a[i] = DEFAULT_VALUE; > + } > + > + calc_add (src1.a, src2.a, src3.a, res_ref1); > + calc_add_3(src1.a, src2.a, src3.a, res_ref2); > + > + res1.x = _mm_mask_fnmsub_sd (src1.x, mask, src2.x, src3.x); > + res2.x = _mm_mask3_fnmsub_sd (src2.x, src3.x, src1.x, mask); > + res3.x = _mm_maskz_fnmsub_sd (mask, src1.x, src2.x, src3.x); > + res4.x = _mm_fnmsub_round_sd (src1.x, src2.x, src3.x, _MM_FROUND_NO_EXC); > + res5.x = _mm_mask_fnmsub_round_sd (src1.x, mask, src2.x, src3.x, > _MM_FROUND_NO_EXC); > + res6.x = _mm_mask3_fnmsub_round_sd (src2.x, src3.x, src1.x, mask, > _MM_FROUND_NO_EXC); > + res7.x = _mm_maskz_fnmsub_round_sd (mask, src1.x, src2.x, src3.x, > _MM_FROUND_NO_EXC); > + > + if (check_union128d (res4, res_ref1)) > + abort(); > + > + MASK_ZERO (d) (res_ref1, mask, 1); > + if (check_union128d (res3, res_ref1)) > + abort (); > + > + MASK_ZERO (d) (res_ref1, mask, 1); > + if (check_union128d (res7, res_ref1)) > + abort (); > + > + MASK_MERGE (d) (res_ref2, mask, 1); > + if (check_union128d (res2, res_ref2)) > + abort (); > + > + MASK_MERGE (d) (res_ref2, mask, 1); > + if (check_union128d (res6, res_ref2)) > + abort (); > + > + MASK_MERGE (d) (res_ref1, mask, 1); > + if (check_union128d (res1, res_ref1)) > + abort (); > + > + MASK_MERGE (d) (res_ref1, mask, 1); > + if (check_union128d (res5, res_ref1)) > + abort (); > +} > + > --- gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXss-1.c.jj 2016-05-22 > 12:20:31.968658199 +0200 > +++ gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXss-1.c 2019-03-22 > 09:28:16.284205923 +0100 > @@ -1,13 +1,26 @@ > /* { dg-do compile } */ > /* { dg-options "-mavx512f -O2" } */ > +/* { dg-final { scan-assembler-times "vfnmsub...ss\[ > \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */ > +/* { dg-final { scan-assembler-times "vfnmsub231ss\[ > \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ > +/* { dg-final { scan-assembler-times "vfnmsub...ss\[ > \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ > /* { dg-final { scan-assembler-times "vfnmsub...ss\[ > \\t\]+\[^\n\]*\{rn-sae\}\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ > +/* { dg-final { scan-assembler-times "vfnmsub...ss\[ > \\t\]+\[^\n\]*\{rd-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" > 1 } } */ > +/* { dg-final { scan-assembler-times "vfnmsub231ss\[ > \\t\]+\[^\n\]*\{ru-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" > 1 } } */ > +/* { dg-final { scan-assembler-times "vfnmsub...ss\[ > \\t\]+\[^\n\]*\{rz-sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ > \\t\]+#)" 1 } } */ > > #include <immintrin.h> > > volatile __m128 a, b, c; > +volatile __mmask8 m; > > void extern > avx512f_test (void) > { > + a = _mm_mask_fnmsub_ss (a, m, b, c); > + c = _mm_mask3_fnmsub_ss (a, b, c, m); > + a = _mm_maskz_fnmsub_ss (m, a, b, c); > a = _mm_fnmsub_round_ss (a, b, c, _MM_FROUND_TO_NEAREST_INT | > _MM_FROUND_NO_EXC); > + a = _mm_mask_fnmsub_round_ss (a, m, b, c, _MM_FROUND_TO_NEG_INF | > _MM_FROUND_NO_EXC); > + c = _mm_mask3_fnmsub_round_ss (a, b, c, m, _MM_FROUND_TO_POS_INF | > _MM_FROUND_NO_EXC); > + a = _mm_maskz_fnmsub_round_ss (m, a, b, c, _MM_FROUND_TO_ZERO | > _MM_FROUND_NO_EXC); > } > --- gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXss-2.c.jj 2019-03-22 > 09:28:16.284205923 +0100 > +++ gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXss-2.c 2019-03-22 > 09:28:16.284205923 +0100 > @@ -0,0 +1,94 @@ > +/* { dg-do run } */ > +/* { dg-options "-O2 -mavx512f" } */ > +/* { dg-require-effective-target avx512f } */ > + > +#include "avx512f-check.h" > + > +#include "avx512f-helper.h" > + > +#define SIZE (128 / 32) > +#include "avx512f-mask-type.h" > + > +static void > +calc_add (float *s1, float *s2, float *s3, float* r) > +{ > + r[0] = -s1[0] * s2[0] - s3[0]; > + > + int i; > + for (i = 1; i < SIZE; i++) > + r[i] = s1[i]; > +} > + > +static void > +calc_add_3 (float *s1, float *s2, float *s3, float* r) > +{ > + r[0] = -s2[0] * s3[0] - s1[0]; > + > + int i; > + for (i = 1; i < SIZE; i++) > + r[i] = s1[i]; > +} > + > +void > +avx512f_test (void) > +{ > + int i, sign; > + union128 res1, res2, res3, res4, res5, res6, res7, src1, src2, src3; > + MASK_TYPE mask = MASK_VALUE; > + float res_ref1[SIZE], res_ref2[SIZE]; > + > + sign = -1; > + for (i = 0; i < SIZE; i++) > + { > + src1.a[i] = DEFAULT_VALUE; > + src2.a[i] = 56.78 * (i + 1) * sign; > + src3.a[i] = 90.12 * (i + 2) * sign; > + sign = sign * -1; > + } > + for (i = 0; i < SIZE; i++) > + { > + res1.a[i] = DEFAULT_VALUE; > + res2.a[i] = DEFAULT_VALUE; > + res5.a[i] = DEFAULT_VALUE; > + res6.a[i] = DEFAULT_VALUE; > + } > + > + calc_add (src1.a, src2.a, src3.a, res_ref1); > + calc_add_3(src1.a, src2.a, src3.a, res_ref2); > + > + res1.x = _mm_mask_fnmsub_ss (src1.x, mask, src2.x, src3.x); > + res2.x = _mm_mask3_fnmsub_ss (src2.x, src3.x, src1.x, mask); > + res3.x = _mm_maskz_fnmsub_ss (mask, src1.x, src2.x, src3.x); > + res4.x = _mm_fnmsub_round_ss (src1.x, src2.x, src3.x, _MM_FROUND_NO_EXC); > + res5.x = _mm_mask_fnmsub_round_ss (src1.x, mask, src2.x, src3.x, > _MM_FROUND_NO_EXC); > + res6.x = _mm_mask3_fnmsub_round_ss (src2.x, src3.x, src1.x, mask, > _MM_FROUND_NO_EXC); > + res7.x = _mm_maskz_fnmsub_round_ss (mask, src1.x, src2.x, src3.x, > _MM_FROUND_NO_EXC); > + > + if (check_union128 (res4, res_ref1)) > + abort(); > + > + MASK_ZERO () (res_ref1, mask, 1); > + if (check_union128 (res3, res_ref1)) > + abort (); > + > + MASK_ZERO () (res_ref1, mask, 1); > + if (check_union128 (res7, res_ref1)) > + abort (); > + > + MASK_MERGE () (res_ref2, mask, 1); > + if (check_union128 (res2, res_ref2)) > + abort (); > + > + MASK_MERGE () (res_ref2, mask, 1); > + if (check_union128 (res6, res_ref2)) > + abort (); > + > + MASK_MERGE () (res_ref1, mask, 1); > + if (check_union128 (res1, res_ref1)) > + abort (); > + > + MASK_MERGE () (res_ref1, mask, 1); > + if (check_union128 (res5, res_ref1)) > + abort (); > +} > + > > Jakub