On Sat, Oct 20, 2018 at 8:46 AM H.J. Lu <hjl.to...@gmail.com> wrote: > > Many AVX512 vector operations can broadcast from a scalar memory source. > This patch enables memory broadcast for FMSUB operations. In order to > support AVX512 memory broadcast for FMSUB, FMSUB builtin functions are > also added, instead of passing the negated value to FMA builtin functions. > > gcc/ > > PR target/72782 > * config/i386/avx512fintrin.h (_mm512_fmsub_round_pd): Use > __builtin_ia32_vfmsubpd512_mask. > (_mm512_mask_fmsub_round_pd): Likewise. > (_mm512_fmsub_pd): Likewise. > (_mm512_mask_fmsub_pd): Likewise. > (_mm512_maskz_fmsub_round_pd): Use > __builtin_ia32_vfmsubpd512_maskz. > (_mm512_maskz_fmsub_pd): Likewise. > (_mm512_fmsub_round_ps): Use __builtin_ia32_vfmsubps512_mask. > (_mm512_mask_fmsub_round_ps): Likewise. > (_mm512_fmsub_ps): Likewise. > (_mm512_mask_fmsub_ps): Likewise. > (_mm512_maskz_fmsub_round_ps): Use > __builtin_ia32_vfmsubps512_maskz. > (_mm512_maskz_fmsub_ps): Likewise. > * config/i386/avx512vlintrin.h (_mm256_mask_fmsub_pd): Use > __builtin_ia32_vfmsubpd256_mask. > (_mm256_maskz_fmsub_pd): Use __builtin_ia32_vfmsubpd256_maskz. > (_mm_mask_fmsub_pd): Use __builtin_ia32_vfmaddpd128_mask > (_mm_maskz_fmsub_pd): Use __builtin_ia32_vfmsubpd128_maskz. > (_mm256_mask_fmsub_ps): Use __builtin_ia32_vfmsubps256_mask. > (_mm256_mask_fmsub_ps): Use __builtin_ia32_vfmsubps256_mask. > (_mm256_maskz_fmsub_ps): Use __builtin_ia32_vfmsubps256_maskz. > (_mm_mask_fmsub_ps): Use __builtin_ia32_vfmsubps128_mask. > (_mm_maskz_fmsub_ps): Use __builtin_ia32_vfmsubps128_maskz. > * config/i386/fmaintrin.h (_mm_fmsub_pd): Use > __builtin_ia32_vfmsubpd. > (_mm256_fmsub_pd): Use __builtin_ia32_vfmsubpd256. > (_mm_fmsub_ps): Use __builtin_ia32_vfmsubps. > (_mm256_fmsub_ps): Use __builtin_ia32_vfmsubps256. > (_mm_fmsub_sd): Use __builtin_ia32_vfmsubsd3. > (_mm_fmsub_ss): Use __builtin_ia32_vfmsubss3. > * config/i386/i386-builtin.def: Add > __builtin_ia32_vfmsubpd256_mask, > __builtin_ia32_vfmsubpd256_maskz, > __builtin_ia32_vfmsubpd128_mask, > __builtin_ia32_vfmsubpd128_maskz, > __builtin_ia32_vfmsubps256_mask, > __builtin_ia32_vfmsubps256_maskz, > __builtin_ia32_vfmsubps128_mask, > __builtin_ia32_vfmsubps128_maskz, > __builtin_ia32_vfmsubpd512_mask, > __builtin_ia32_vfmsubpd512_maskz, > __builtin_ia32_vfmsubps512_mask, > __builtin_ia32_vfmsubps512_maskz, __builtin_ia32_vfmsubss3, > __builtin_ia32_vfmsubsd3, __builtin_ia32_vfmsubps, > __builtin_ia32_vfmsubpd, __builtin_ia32_vfmsubps256 and. > __builtin_ia32_vfmsubpd256. > * config/i386/sse.md (fma4i_fmsub_<mode>): New. > (<avx512>_fmsub_<mode>_maskz<round_expand_name>): Likewise. > (*<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name>_bcst_1): > Likewise. > (*<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name>_bcst_2): > Likewise. > (*<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name>_bcst_3): > Likewise. > (fmai_vmfmsub_<mode><round_name>): Likewise. > > gcc/testsuite/ > > PR target/72782 > * gcc.target/i386/avx512f-fmsub-df-zmm-1.c: New test. > * gcc.target/i386/avx512f-fmsub-sf-zmm-1.c: Likewise. > * gcc.target/i386/avx512f-fmsub-sf-zmm-2.c: Likewise. > * gcc.target/i386/avx512f-fmsub-sf-zmm-3.c: Likewise. > * gcc.target/i386/avx512f-fmsub-sf-zmm-4.c: Likewise. > * gcc.target/i386/avx512f-fmsub-sf-zmm-5.c: Likewise. > * gcc.target/i386/avx512f-fmsub-sf-zmm-6.c: Likewise. > * gcc.target/i386/avx512f-fmsub-sf-zmm-7.c: Likewise. > * gcc.target/i386/avx512f-fmsub-sf-zmm-8.c: Likewise. > * gcc.target/i386/avx512vl-fmsub-sf-xmm-1.c: Likewise. > * gcc.target/i386/avx512vl-fmsub-sf-ymm-1.c: Likewise.
LGTM. Thanks, Uros. > --- > gcc/config/i386/avx512fintrin.h | 60 +++++++-------- > gcc/config/i386/avx512vlintrin.h | 32 ++++---- > gcc/config/i386/fmaintrin.h | 24 +++--- > gcc/config/i386/i386-builtin.def | 18 +++++ > gcc/config/i386/sse.md | 77 +++++++++++++++++++ > .../gcc.target/i386/avx512f-fmsub-df-zmm-1.c | 12 +++ > .../gcc.target/i386/avx512f-fmsub-sf-zmm-1.c | 12 +++ > .../gcc.target/i386/avx512f-fmsub-sf-zmm-2.c | 12 +++ > .../gcc.target/i386/avx512f-fmsub-sf-zmm-3.c | 12 +++ > .../gcc.target/i386/avx512f-fmsub-sf-zmm-4.c | 12 +++ > .../gcc.target/i386/avx512f-fmsub-sf-zmm-5.c | 12 +++ > .../gcc.target/i386/avx512f-fmsub-sf-zmm-6.c | 12 +++ > .../gcc.target/i386/avx512f-fmsub-sf-zmm-7.c | 12 +++ > .../gcc.target/i386/avx512f-fmsub-sf-zmm-8.c | 12 +++ > .../gcc.target/i386/avx512vl-fmsub-sf-xmm-1.c | 12 +++ > .../gcc.target/i386/avx512vl-fmsub-sf-ymm-1.c | 12 +++ > 16 files changed, 285 insertions(+), 58 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-fmsub-df-zmm-1.c > create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-1.c > create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-2.c > create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-3.c > create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-4.c > create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-5.c > create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-6.c > create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-7.c > create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-8.c > create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-fmsub-sf-xmm-1.c > create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-fmsub-sf-ymm-1.c > > diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h > index 8473cd0d26c..c0c8fa1efd0 100644 > --- a/gcc/config/i386/avx512fintrin.h > +++ b/gcc/config/i386/avx512fintrin.h > @@ -3355,9 +3355,9 @@ extern __inline __m512d > __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > _mm512_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R) > { > - return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, > + return (__m512d) __builtin_ia32_vfmsubpd512_mask ((__v8df) __A, > (__v8df) __B, > - -(__v8df) __C, > + (__v8df) __C, > (__mmask8) -1, __R); > } > > @@ -3366,9 +3366,9 @@ __attribute__ ((__gnu_inline__, __always_inline__, > __artificial__)) > _mm512_mask_fmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B, > __m512d __C, const int __R) > { > - return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, > + return (__m512d) __builtin_ia32_vfmsubpd512_mask ((__v8df) __A, > (__v8df) __B, > - -(__v8df) __C, > + (__v8df) __C, > (__mmask8) __U, __R); > } > > @@ -3388,9 +3388,9 @@ __attribute__ ((__gnu_inline__, __always_inline__, > __artificial__)) > _mm512_maskz_fmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B, > __m512d __C, const int __R) > { > - return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A, > + return (__m512d) __builtin_ia32_vfmsubpd512_maskz ((__v8df) __A, > (__v8df) __B, > - -(__v8df) __C, > + (__v8df) __C, > (__mmask8) __U, __R); > } > > @@ -3398,9 +3398,9 @@ extern __inline __m512 > __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > _mm512_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R) > { > - return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, > + return (__m512) __builtin_ia32_vfmsubps512_mask ((__v16sf) __A, > (__v16sf) __B, > - -(__v16sf) __C, > + (__v16sf) __C, > (__mmask16) -1, __R); > } > > @@ -3409,9 +3409,9 @@ __attribute__ ((__gnu_inline__, __always_inline__, > __artificial__)) > _mm512_mask_fmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B, > __m512 __C, const int __R) > { > - return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, > + return (__m512) __builtin_ia32_vfmsubps512_mask ((__v16sf) __A, > (__v16sf) __B, > - -(__v16sf) __C, > + (__v16sf) __C, > (__mmask16) __U, __R); > } > > @@ -3431,9 +3431,9 @@ __attribute__ ((__gnu_inline__, __always_inline__, > __artificial__)) > _mm512_maskz_fmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B, > __m512 __C, const int __R) > { > - return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A, > + return (__m512) __builtin_ia32_vfmsubps512_maskz ((__v16sf) __A, > (__v16sf) __B, > - -(__v16sf) __C, > + (__v16sf) __C, > (__mmask16) __U, __R); > } > > @@ -3806,28 +3806,28 @@ _mm512_maskz_fnmsub_round_ps (__mmask16 __U, __m512 > __A, __m512 __B, > (__m512)__builtin_ia32_vfmaddps512_maskz(A, B, C, U, R) > > #define _mm512_fmsub_round_pd(A, B, C, R) \ > - (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, -(C), -1, R) > + (__m512d)__builtin_ia32_vfmsubpd512_mask(A, B, C, -1, R) > > #define _mm512_mask_fmsub_round_pd(A, U, B, C, R) \ > - (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, -(C), U, R) > + (__m512d)__builtin_ia32_vfmsubpd512_mask(A, B, C, U, R) > > #define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) \ > (__m512d)__builtin_ia32_vfmsubpd512_mask3(A, B, C, U, R) > > #define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) \ > - (__m512d)__builtin_ia32_vfmaddpd512_maskz(A, B, -(C), U, R) > + (__m512d)__builtin_ia32_vfmsubpd512_maskz(A, B, C, U, R) > > #define _mm512_fmsub_round_ps(A, B, C, R) \ > - (__m512)__builtin_ia32_vfmaddps512_mask(A, B, -(C), -1, R) > + (__m512)__builtin_ia32_vfmsubps512_mask(A, B, C, -1, R) > > #define _mm512_mask_fmsub_round_ps(A, U, B, C, R) \ > - (__m512)__builtin_ia32_vfmaddps512_mask(A, B, -(C), U, R) > + (__m512)__builtin_ia32_vfmsubps512_mask(A, B, C, U, R) > > #define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \ > (__m512)__builtin_ia32_vfmsubps512_mask3(A, B, C, U, R) > > #define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) \ > - (__m512)__builtin_ia32_vfmaddps512_maskz(A, B, -(C), U, R) > + (__m512)__builtin_ia32_vfmsubps512_maskz(A, B, C, U, R) > > #define _mm512_fmaddsub_round_pd(A, B, C, R) \ > (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, C, -1, R) > @@ -12416,9 +12416,9 @@ extern __inline __m512d > __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > _mm512_fmsub_pd (__m512d __A, __m512d __B, __m512d __C) > { > - return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, > + return (__m512d) __builtin_ia32_vfmsubpd512_mask ((__v8df) __A, > (__v8df) __B, > - -(__v8df) __C, > + (__v8df) __C, > (__mmask8) -1, > _MM_FROUND_CUR_DIRECTION); > } > @@ -12427,9 +12427,9 @@ extern __inline __m512d > __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > _mm512_mask_fmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) > { > - return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, > + return (__m512d) __builtin_ia32_vfmsubpd512_mask ((__v8df) __A, > (__v8df) __B, > - -(__v8df) __C, > + (__v8df) __C, > (__mmask8) __U, > _MM_FROUND_CUR_DIRECTION); > } > @@ -12449,9 +12449,9 @@ extern __inline __m512d > __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > _mm512_maskz_fmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) > { > - return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A, > + return (__m512d) __builtin_ia32_vfmsubpd512_maskz ((__v8df) __A, > (__v8df) __B, > - -(__v8df) __C, > + (__v8df) __C, > (__mmask8) __U, > > _MM_FROUND_CUR_DIRECTION); > } > @@ -12460,9 +12460,9 @@ extern __inline __m512 > __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > _mm512_fmsub_ps (__m512 __A, __m512 __B, __m512 __C) > { > - return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, > + return (__m512) __builtin_ia32_vfmsubps512_mask ((__v16sf) __A, > (__v16sf) __B, > - -(__v16sf) __C, > + (__v16sf) __C, > (__mmask16) -1, > _MM_FROUND_CUR_DIRECTION); > } > @@ -12471,9 +12471,9 @@ extern __inline __m512 > __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > _mm512_mask_fmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) > { > - return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, > + return (__m512) __builtin_ia32_vfmsubps512_mask ((__v16sf) __A, > (__v16sf) __B, > - -(__v16sf) __C, > + (__v16sf) __C, > (__mmask16) __U, > _MM_FROUND_CUR_DIRECTION); > } > @@ -12493,9 +12493,9 @@ extern __inline __m512 > __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > _mm512_maskz_fmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) > { > - return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A, > + return (__m512) __builtin_ia32_vfmsubps512_maskz ((__v16sf) __A, > (__v16sf) __B, > - -(__v16sf) __C, > + (__v16sf) __C, > (__mmask16) __U, > _MM_FROUND_CUR_DIRECTION); > } > diff --git a/gcc/config/i386/avx512vlintrin.h > b/gcc/config/i386/avx512vlintrin.h > index 68b5537845b..1e2e6da29bd 100644 > --- a/gcc/config/i386/avx512vlintrin.h > +++ b/gcc/config/i386/avx512vlintrin.h > @@ -4117,9 +4117,9 @@ __attribute__ ((__gnu_inline__, __always_inline__, > __artificial__)) > _mm256_mask_fmsub_pd (__m256d __A, __mmask8 __U, __m256d __B, > __m256d __C) > { > - return (__m256d) __builtin_ia32_vfmaddpd256_mask ((__v4df) __A, > + return (__m256d) __builtin_ia32_vfmsubpd256_mask ((__v4df) __A, > (__v4df) __B, > - -(__v4df) __C, > + (__v4df) __C, > (__mmask8) __U); > } > > @@ -4139,9 +4139,9 @@ __attribute__ ((__gnu_inline__, __always_inline__, > __artificial__)) > _mm256_maskz_fmsub_pd (__mmask8 __U, __m256d __A, __m256d __B, > __m256d __C) > { > - return (__m256d) __builtin_ia32_vfmaddpd256_maskz ((__v4df) __A, > + return (__m256d) __builtin_ia32_vfmsubpd256_maskz ((__v4df) __A, > (__v4df) __B, > - -(__v4df) __C, > + (__v4df) __C, > (__mmask8) __U); > } > > @@ -4149,9 +4149,9 @@ extern __inline __m128d > __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > _mm_mask_fmsub_pd (__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) > { > - return (__m128d) __builtin_ia32_vfmaddpd128_mask ((__v2df) __A, > + return (__m128d) __builtin_ia32_vfmsubpd128_mask ((__v2df) __A, > (__v2df) __B, > - -(__v2df) __C, > + (__v2df) __C, > (__mmask8) __U); > } > > @@ -4171,9 +4171,9 @@ __attribute__ ((__gnu_inline__, __always_inline__, > __artificial__)) > _mm_maskz_fmsub_pd (__mmask8 __U, __m128d __A, __m128d __B, > __m128d __C) > { > - return (__m128d) __builtin_ia32_vfmaddpd128_maskz ((__v2df) __A, > + return (__m128d) __builtin_ia32_vfmsubpd128_maskz ((__v2df) __A, > (__v2df) __B, > - -(__v2df) __C, > + (__v2df) __C, > (__mmask8) __U); > } > > @@ -4181,9 +4181,9 @@ extern __inline __m256 > __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > _mm256_mask_fmsub_ps (__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) > { > - return (__m256) __builtin_ia32_vfmaddps256_mask ((__v8sf) __A, > + return (__m256) __builtin_ia32_vfmsubps256_mask ((__v8sf) __A, > (__v8sf) __B, > - -(__v8sf) __C, > + (__v8sf) __C, > (__mmask8) __U); > } > > @@ -4203,9 +4203,9 @@ __attribute__ ((__gnu_inline__, __always_inline__, > __artificial__)) > _mm256_maskz_fmsub_ps (__mmask8 __U, __m256 __A, __m256 __B, > __m256 __C) > { > - return (__m256) __builtin_ia32_vfmaddps256_maskz ((__v8sf) __A, > + return (__m256) __builtin_ia32_vfmsubps256_maskz ((__v8sf) __A, > (__v8sf) __B, > - -(__v8sf) __C, > + (__v8sf) __C, > (__mmask8) __U); > } > > @@ -4213,9 +4213,9 @@ extern __inline __m128 > __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > _mm_mask_fmsub_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) > { > - return (__m128) __builtin_ia32_vfmaddps128_mask ((__v4sf) __A, > + return (__m128) __builtin_ia32_vfmsubps128_mask ((__v4sf) __A, > (__v4sf) __B, > - -(__v4sf) __C, > + (__v4sf) __C, > (__mmask8) __U); > } > > @@ -4233,9 +4233,9 @@ extern __inline __m128 > __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > _mm_maskz_fmsub_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) > { > - return (__m128) __builtin_ia32_vfmaddps128_maskz ((__v4sf) __A, > + return (__m128) __builtin_ia32_vfmsubps128_maskz ((__v4sf) __A, > (__v4sf) __B, > - -(__v4sf) __C, > + (__v4sf) __C, > (__mmask8) __U); > } > > diff --git a/gcc/config/i386/fmaintrin.h b/gcc/config/i386/fmaintrin.h > index 660d3453590..2eddd896579 100644 > --- a/gcc/config/i386/fmaintrin.h > +++ b/gcc/config/i386/fmaintrin.h > @@ -86,48 +86,48 @@ extern __inline __m128d > __attribute__((__gnu_inline__, __always_inline__, __artificial__)) > _mm_fmsub_pd (__m128d __A, __m128d __B, __m128d __C) > { > - return (__m128d)__builtin_ia32_vfmaddpd ((__v2df)__A, (__v2df)__B, > - -(__v2df)__C); > + return (__m128d)__builtin_ia32_vfmsubpd ((__v2df)__A, (__v2df)__B, > + (__v2df)__C); > } > > extern __inline __m256d > __attribute__((__gnu_inline__, __always_inline__, __artificial__)) > _mm256_fmsub_pd (__m256d __A, __m256d __B, __m256d __C) > { > - return (__m256d)__builtin_ia32_vfmaddpd256 ((__v4df)__A, (__v4df)__B, > - -(__v4df)__C); > + return (__m256d)__builtin_ia32_vfmsubpd256 ((__v4df)__A, (__v4df)__B, > + (__v4df)__C); > } > > extern __inline __m128 > __attribute__((__gnu_inline__, __always_inline__, __artificial__)) > _mm_fmsub_ps (__m128 __A, __m128 __B, __m128 __C) > { > - return (__m128)__builtin_ia32_vfmaddps ((__v4sf)__A, (__v4sf)__B, > - -(__v4sf)__C); > + return (__m128)__builtin_ia32_vfmsubps ((__v4sf)__A, (__v4sf)__B, > + (__v4sf)__C); > } > > extern __inline __m256 > __attribute__((__gnu_inline__, __always_inline__, __artificial__)) > _mm256_fmsub_ps (__m256 __A, __m256 __B, __m256 __C) > { > - return (__m256)__builtin_ia32_vfmaddps256 ((__v8sf)__A, (__v8sf)__B, > - -(__v8sf)__C); > + return (__m256)__builtin_ia32_vfmsubps256 ((__v8sf)__A, (__v8sf)__B, > + (__v8sf)__C); > } > > extern __inline __m128d > __attribute__((__gnu_inline__, __always_inline__, __artificial__)) > _mm_fmsub_sd (__m128d __A, __m128d __B, __m128d __C) > { > - return (__m128d)__builtin_ia32_vfmaddsd3 ((__v2df)__A, (__v2df)__B, > - -(__v2df)__C); > + return (__m128d)__builtin_ia32_vfmsubsd3 ((__v2df)__A, (__v2df)__B, > + (__v2df)__C); > } > > extern __inline __m128 > __attribute__((__gnu_inline__, __always_inline__, __artificial__)) > _mm_fmsub_ss (__m128 __A, __m128 __B, __m128 __C) > { > - return (__m128)__builtin_ia32_vfmaddss3 ((__v4sf)__A, (__v4sf)__B, > - -(__v4sf)__C); > + return (__m128)__builtin_ia32_vfmsubss3 ((__v4sf)__A, (__v4sf)__B, > + (__v4sf)__C); > } > > extern __inline __m128d > diff --git a/gcc/config/i386/i386-builtin.def > b/gcc/config/i386/i386-builtin.def > index dc4c70c7ea3..f5b5e56a01c 100644 > --- a/gcc/config/i386/i386-builtin.def > +++ b/gcc/config/i386/i386-builtin.def > @@ -1903,10 +1903,18 @@ BDESC (OPTION_MASK_ISA_AVX512VL, > CODE_FOR_avx512vl_fmadd_v8sf_maskz, "__builtin_ > BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask, > "__builtin_ia32_vfmaddps128_mask", IX86_BUILTIN_VFMADDPS128_MASK, UNKNOWN, > (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI) > BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask3, > "__builtin_ia32_vfmaddps128_mask3", IX86_BUILTIN_VFMADDPS128_MASK3, UNKNOWN, > (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI) > BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_maskz, > "__builtin_ia32_vfmaddps128_maskz", IX86_BUILTIN_VFMADDPS128_MASKZ, UNKNOWN, > (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI) > +BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4df_mask, > "__builtin_ia32_vfmsubpd256_mask", IX86_BUILTIN_VFMSUBPD256_MASK, UNKNOWN, > (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI) > BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4df_mask3, > "__builtin_ia32_vfmsubpd256_mask3", IX86_BUILTIN_VFMSUBPD256_MASK3, UNKNOWN, > (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI) > +BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4df_maskz, > "__builtin_ia32_vfmsubpd256_maskz", IX86_BUILTIN_VFMSUBPD256_MASKZ, UNKNOWN, > (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI) > +BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v2df_mask, > "__builtin_ia32_vfmsubpd128_mask", IX86_BUILTIN_VFMSUBPD128_MASK, UNKNOWN, > (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI) > BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v2df_mask3, > "__builtin_ia32_vfmsubpd128_mask3", IX86_BUILTIN_VFMSUBPD128_MASK3, UNKNOWN, > (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI) > +BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v2df_maskz, > "__builtin_ia32_vfmsubpd128_maskz", IX86_BUILTIN_VFMSUBPD128_MASKZ, UNKNOWN, > (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI) > +BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v8sf_mask, > "__builtin_ia32_vfmsubps256_mask", IX86_BUILTIN_VFMSUBPS256_MASK, UNKNOWN, > (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI) > BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v8sf_mask3, > "__builtin_ia32_vfmsubps256_mask3", IX86_BUILTIN_VFMSUBPS256_MASK3, UNKNOWN, > (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI) > +BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v8sf_maskz, > "__builtin_ia32_vfmsubps256_maskz", IX86_BUILTIN_VFMSUBPS256_MASKZ, UNKNOWN, > (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI) > +BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4sf_mask, > "__builtin_ia32_vfmsubps128_mask", IX86_BUILTIN_VFMSUBPS128_MASK, UNKNOWN, > (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI) > BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4sf_mask3, > "__builtin_ia32_vfmsubps128_mask3", IX86_BUILTIN_VFMSUBPS128_MASK3, UNKNOWN, > (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI) > +BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4sf_maskz, > "__builtin_ia32_vfmsubps128_maskz", IX86_BUILTIN_VFMSUBPS128_MASKZ, UNKNOWN, > (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI) > BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4df_mask, > "__builtin_ia32_vfnmaddpd256_mask", IX86_BUILTIN_VFNMADDPD256_MASK, UNKNOWN, > (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI) > BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v2df_mask, > "__builtin_ia32_vfnmaddpd128_mask", IX86_BUILTIN_VFNMADDPD128_MASK, UNKNOWN, > (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI) > BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v8sf_mask, > "__builtin_ia32_vfnmaddps256_mask", IX86_BUILTIN_VFNMADDPS256_MASK, UNKNOWN, > (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI) > @@ -2768,8 +2776,12 @@ BDESC (OPTION_MASK_ISA_AVX512F, > CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__ > BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, > "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, > UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) > BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, > "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, > UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) > BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, > "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, > UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) > +BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_mask_round, > "__builtin_ia32_vfmsubpd512_mask", IX86_BUILTIN_VFMSUBPD512_MASK, UNKNOWN, > (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) > BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_mask3_round, > "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, > (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) > +BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_maskz_round, > "__builtin_ia32_vfmsubpd512_maskz", IX86_BUILTIN_VFMSUBPD512_MASKZ, UNKNOWN, > (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) > +BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask_round, > "__builtin_ia32_vfmsubps512_mask", IX86_BUILTIN_VFMSUBPS512_MASK, UNKNOWN, > (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) > BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, > "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, > (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) > +BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_maskz_round, > "__builtin_ia32_vfmsubps512_maskz", IX86_BUILTIN_VFMSUBPS512_MASKZ, UNKNOWN, > (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) > BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_mask_round, > "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, > (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) > BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, > "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, > (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) > BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, > "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, > (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) > @@ -2855,11 +2867,17 @@ BDESC_FIRST (multi_arg, MULTI_ARG, > BDESC (OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df, > "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD, UNKNOWN, > (int)MULTI_ARG_3_DF) > BDESC (OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf, > "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3, UNKNOWN, > (int)MULTI_ARG_3_SF) > BDESC (OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df, > "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3, UNKNOWN, > (int)MULTI_ARG_3_DF) > +BDESC (OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmsub_v4sf, > "__builtin_ia32_vfmsubss3", IX86_BUILTIN_VFMSUBSS3, UNKNOWN, > (int)MULTI_ARG_3_SF) > +BDESC (OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmsub_v2df, > "__builtin_ia32_vfmsubsd3", IX86_BUILTIN_VFMSUBSD3, UNKNOWN, > (int)MULTI_ARG_3_DF) > > BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, > CODE_FOR_fma4i_fmadd_v4sf, "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS, > UNKNOWN, (int)MULTI_ARG_3_SF) > BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, > CODE_FOR_fma4i_fmadd_v2df, "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD, > UNKNOWN, (int)MULTI_ARG_3_DF) > BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, > CODE_FOR_fma4i_fmadd_v8sf, "__builtin_ia32_vfmaddps256", > IX86_BUILTIN_VFMADDPS256, UNKNOWN, (int)MULTI_ARG_3_SF2) > BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, > CODE_FOR_fma4i_fmadd_v4df, "__builtin_ia32_vfmaddpd256", > IX86_BUILTIN_VFMADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2) > +BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, > CODE_FOR_fma4i_fmsub_v4sf, "__builtin_ia32_vfmsubps", IX86_BUILTIN_VFMSUBPS, > UNKNOWN, (int)MULTI_ARG_3_SF) > +BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, > CODE_FOR_fma4i_fmsub_v2df, "__builtin_ia32_vfmsubpd", IX86_BUILTIN_VFMSUBPD, > UNKNOWN, (int)MULTI_ARG_3_DF) > +BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, > CODE_FOR_fma4i_fmsub_v8sf, "__builtin_ia32_vfmsubps256", > IX86_BUILTIN_VFMSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2) > +BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, > CODE_FOR_fma4i_fmsub_v4df, "__builtin_ia32_vfmsubpd256", > IX86_BUILTIN_VFMSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2) > > BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf, > "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS, UNKNOWN, > (int)MULTI_ARG_3_SF) > BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df, > "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD, UNKNOWN, > (int)MULTI_ARG_3_DF) > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md > index 635a6902d33..0e9d3541ccc 100644 > --- a/gcc/config/i386/sse.md > +++ b/gcc/config/i386/sse.md > @@ -3760,6 +3760,14 @@ > (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand") > (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand")))]) > > +(define_expand "fma4i_fmsub_<mode>" > + [(set (match_operand:FMAMODE_AVX512 0 "register_operand") > + (fma:FMAMODE_AVX512 > + (match_operand:FMAMODE_AVX512 1 "nonimmediate_operand") > + (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand") > + (neg:FMAMODE_AVX512 > + (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand"))))]) > + > (define_expand "<avx512>_fmadd_<mode>_maskz<round_expand_name>" > [(match_operand:VF_AVX512VL 0 "register_operand") > (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>") > @@ -3898,6 +3906,20 @@ > (set_attr "type" "ssemuladd") > (set_attr "mode" "<MODE>")]) > > +(define_expand "<avx512>_fmsub_<mode>_maskz<round_expand_name>" > + [(match_operand:VF_AVX512VL 0 "register_operand") > + (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>") > + (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>") > + (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>") > + (match_operand:<avx512fmaskmode> 4 "register_operand")] > + "TARGET_AVX512F && <round_mode512bit_condition>" > +{ > + emit_insn (gen_fma_fmsub_<mode>_maskz_1<round_expand_name> ( > + operands[0], operands[1], operands[2], operands[3], > + CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>)); > + DONE; > +}) > + > (define_insn "<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name><round_name>" > [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v") > (fma:VF_SF_AVX512VL > @@ -3913,6 +3935,49 @@ > [(set_attr "type" "ssemuladd") > (set_attr "mode" "<MODE>")]) > > +(define_insn "*<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name>_bcst_1" > + [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v") > + (fma:VF_AVX512 > + (match_operand:VF_AVX512 1 "register_operand" "0,v") > + (match_operand:VF_AVX512 2 "register_operand" "v,0") > + (neg:VF_AVX512 > + (vec_duplicate:VF_AVX512 > + (match_operand:<ssescalarmode> 3 "memory_operand" "m,m")))))] > + "TARGET_AVX512F && <sd_mask_mode512bit_condition>" > + "vfmsub213<ssemodesuffix>\t{%3<avx512bcst>, %2, > %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<avx512bcst>}" > + [(set_attr "type" "ssemuladd") > + (set_attr "mode" "<MODE>")]) > + > +(define_insn "*<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name>_bcst_2" > + [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v") > + (fma:VF_AVX512 > + (vec_duplicate:VF_AVX512 > + (match_operand:<ssescalarmode> 1 "memory_operand" "m,m")) > + (match_operand:VF_AVX512 2 "register_operand" "0,v") > + (neg:VF_AVX512 > + (match_operand:VF_AVX512 3 "register_operand" "v,0"))))] > + "TARGET_AVX512F && <sd_mask_mode512bit_condition>" > + "@ > + vfmsub132<ssemodesuffix>\t{%1<avx512bcst>, %3, > %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %1<avx512bcst>} > + vfmsub231<ssemodesuffix>\t{%1<avx512bcst>, %2, > %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %1<avx512bcst>}" > + [(set_attr "type" "ssemuladd") > + (set_attr "mode" "<MODE>")]) > + > +(define_insn "*<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name>_bcst_3" > + [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v") > + (fma:VF_AVX512 > + (match_operand:VF_AVX512 1 "register_operand" "0,v") > + (vec_duplicate:VF_AVX512 > + (match_operand:<ssescalarmode> 2 "memory_operand" "m,m")) > + (neg:VF_AVX512 > + (match_operand:VF_AVX512 3 "nonimmediate_operand" "v,0"))))] > + "TARGET_AVX512F && <sd_mask_mode512bit_condition>" > + "@ > + vfmsub132<ssemodesuffix>\t{%2<avx512bcst>, %3, > %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<avx512bcst>} > + vfmsub231<ssemodesuffix>\t{%2<avx512bcst>, %1, > %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<avx512bcst>}" > + [(set_attr "type" "ssemuladd") > + (set_attr "mode" "<MODE>")]) > + > (define_insn "<avx512>_fmsub_<mode>_mask<round_name>" > [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v") > (vec_merge:VF_AVX512VL > @@ -4261,6 +4326,18 @@ > (const_int 1)))] > "TARGET_FMA") > > +(define_expand "fmai_vmfmsub_<mode><round_name>" > + [(set (match_operand:VF_128 0 "register_operand") > + (vec_merge:VF_128 > + (fma:VF_128 > + (match_operand:VF_128 1 "<round_nimm_predicate>") > + (match_operand:VF_128 2 "<round_nimm_predicate>") > + (neg:VF_128 > + (match_operand:VF_128 3 "<round_nimm_predicate>"))) > + (match_dup 1) > + (const_int 1)))] > + "TARGET_FMA") > + > (define_insn "*fmai_fmadd_<mode>" > [(set (match_operand:VF_128 0 "register_operand" "=v,v") > (vec_merge:VF_128 > diff --git a/gcc/testsuite/gcc.target/i386/avx512f-fmsub-df-zmm-1.c > b/gcc/testsuite/gcc.target/i386/avx512f-fmsub-df-zmm-1.c > new file mode 100644 > index 00000000000..840888a2d81 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/avx512f-fmsub-df-zmm-1.c > @@ -0,0 +1,12 @@ > +/* { dg-do compile } */ > +/* { dg-options "-mavx512f -O2" } */ > +/* { dg-final { scan-assembler-times "vfmsub...pd\[ > \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */ > +/* { dg-final { scan-assembler-not "vbroadcastsd\[^\n\]*%zmm\[0-9\]+" } } */ > + > +#define type __m512d > +#define vec 512 > +#define op fmsub > +#define suffix pd > +#define SCALAR double > + > +#include "avx512-fma-1.h" > diff --git a/gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-1.c > b/gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-1.c > new file mode 100644 > index 00000000000..0cb675b7628 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-1.c > @@ -0,0 +1,12 @@ > +/* { dg-do compile } */ > +/* { dg-options "-mavx512f -O2" } */ > +/* { dg-final { scan-assembler-times "vfmsub...ps\[ > \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */ > +/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%zmm\[0-9\]+" } } */ > + > +#define type __m512 > +#define vec 512 > +#define op fmsub > +#define suffix ps > +#define SCALAR float > + > +#include "avx512-fma-1.h" > diff --git a/gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-2.c > b/gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-2.c > new file mode 100644 > index 00000000000..10212d471b1 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-2.c > @@ -0,0 +1,12 @@ > +/* { dg-do compile } */ > +/* { dg-options "-mavx512f -O2" } */ > +/* { dg-final { scan-assembler-times "vfmsub...ps\[ > \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */ > +/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%zmm\[0-9\]+" } } */ > + > +#define type __m512 > +#define vec 512 > +#define op fmsub > +#define suffix ps > +#define SCALAR float > + > +#include "avx512-fma-2.h" > diff --git a/gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-3.c > b/gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-3.c > new file mode 100644 > index 00000000000..feb34077085 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-3.c > @@ -0,0 +1,12 @@ > +/* { dg-do compile } */ > +/* { dg-options "-mavx512f -O2" } */ > +/* { dg-final { scan-assembler-times "vfmsub...ps\[ > \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */ > +/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%zmm\[0-9\]+" } } */ > + > +#define type __m512 > +#define vec 512 > +#define op fmsub > +#define suffix ps > +#define SCALAR float > + > +#include "avx512-fma-3.h" > diff --git a/gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-4.c > b/gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-4.c > new file mode 100644 > index 00000000000..4305fffe628 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-4.c > @@ -0,0 +1,12 @@ > +/* { dg-do compile } */ > +/* { dg-options "-mavx512f -O2" } */ > +/* { dg-final { scan-assembler-times "vfmsub...ps\[ > \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */ > +/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%zmm\[0-9\]+" } } */ > + > +#define type __m512 > +#define vec 512 > +#define op fmsub > +#define suffix ps > +#define SCALAR float > + > +#include "avx512-fma-4.h" > diff --git a/gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-5.c > b/gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-5.c > new file mode 100644 > index 00000000000..d57251f83be > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-5.c > @@ -0,0 +1,12 @@ > +/* { dg-do compile } */ > +/* { dg-options "-mavx512f -O2" } */ > +/* { dg-final { scan-assembler-times "vfmsub...ps\[ > \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */ > +/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%zmm\[0-9\]+" } } */ > + > +#define type __m512 > +#define vec 512 > +#define op fmsub > +#define suffix ps > +#define SCALAR float > + > +#include "avx512-fma-5.h" > diff --git a/gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-6.c > b/gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-6.c > new file mode 100644 > index 00000000000..b26a9ee7eeb > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-6.c > @@ -0,0 +1,12 @@ > +/* { dg-do compile } */ > +/* { dg-options "-mavx512f -O2" } */ > +/* { dg-final { scan-assembler-times "vfmsub...ps\[ > \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */ > +/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%zmm\[0-9\]+" } } */ > + > +#define type __m512 > +#define vec 512 > +#define op fmsub > +#define suffix ps > +#define SCALAR float > + > +#include "avx512-fma-6.h" > diff --git a/gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-7.c > b/gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-7.c > new file mode 100644 > index 00000000000..cc705af8ea5 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-7.c > @@ -0,0 +1,12 @@ > +/* { dg-do compile } */ > +/* { dg-options "-mavx512f -O2" } */ > +/* { dg-final { scan-assembler-times "vbroadcastss\[^\n\]*%zmm\[0-9\]+" 1 } > } */ > +/* { dg-final { scan-assembler-times "vfmsub...ps\[^\n\]*%zmm\[0-9\]+" 1 } } > */ > + > +#define type __m512 > +#define vec 512 > +#define op fmsub > +#define suffix ps > +#define SCALAR float > + > +#include "avx512-fma-7.h" > diff --git a/gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-8.c > b/gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-8.c > new file mode 100644 > index 00000000000..2b929fa11e8 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-8.c > @@ -0,0 +1,12 @@ > +/* { dg-do compile { target { ! ia32 } } } */ > +/* { dg-options "-mavx512f -O2" } */ > +/* { dg-final { scan-assembler-times "vbroadcastss\[^\n\]*%zmm\[0-9\]+" 1 } > } */ > +/* { dg-final { scan-assembler-times "vfmsub...ps\[ \\t\]+%zmm\[0-9\]+, > %zmm\[0-9\]+, %zmm0" 1 } } */ > + > +#define type __m512 > +#define vec 512 > +#define op fmsub > +#define suffix ps > +#define SCALAR float > + > +#include "avx512-fma-8.h" > diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-fmsub-sf-xmm-1.c > b/gcc/testsuite/gcc.target/i386/avx512vl-fmsub-sf-xmm-1.c > new file mode 100644 > index 00000000000..70efbcc98f9 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/avx512vl-fmsub-sf-xmm-1.c > @@ -0,0 +1,12 @@ > +/* { dg-do compile } */ > +/* { dg-options "-mfma -mavx512vl -O2" } */ > +/* { dg-final { scan-assembler-times "vfmsub...ps\[ > \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %xmm\[0-9\]+, %xmm0" 1 } } */ > +/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%xmm\[0-9\]+" } } */ > + > +#define type __m128 > +#define vec > +#define op fmsub > +#define suffix ps > +#define SCALAR float > + > +#include "avx512-fma-1.h" > diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-fmsub-sf-ymm-1.c > b/gcc/testsuite/gcc.target/i386/avx512vl-fmsub-sf-ymm-1.c > new file mode 100644 > index 00000000000..a7c1b370b74 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/avx512vl-fmsub-sf-ymm-1.c > @@ -0,0 +1,12 @@ > +/* { dg-do compile } */ > +/* { dg-options "-mfma -mavx512vl -O2" } */ > +/* { dg-final { scan-assembler-times "vfmsub...ps\[ > \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %ymm\[0-9\]+, %ymm0" 1 } } */ > +/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%ymm\[0-9\]+" } } */ > + > +#define type __m256 > +#define vec 256 > +#define op fmsub > +#define suffix ps > +#define SCALAR float > + > +#include "avx512-fma-1.h" > -- > 2.17.2 >