On 10/18/18, Uros Bizjak <ubiz...@gmail.com> wrote: > On Thu, Oct 18, 2018 at 11:11 AM H.J. Lu <hjl.to...@gmail.com> wrote: >> >> Many AVX512 vector operations can broadcast from a scalar memory source. >> This patch enables memory broadcast for FMA operations. >> >> gcc/ >> >> PR target/72782 >> * config/i386/sse.md (VF_AVX512): New. >> (avx512bcst): Likewise. >> (*<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name>_bcst_1): >> Likewise. >> (*<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name>_bcst_2): >> Likewise. >> (*<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name>_bcst_3): >> Likewise. >> >> gcc/testsuite/ >> >> PR target/72782 >> * gcc.target/i386/avx512-fma-1.h: New file. >> * gcc.target/i386/avx512-fma-2.h: Likewise. >> * gcc.target/i386/avx512-fma-3.h: Likewise. >> * gcc.target/i386/avx512-fma-4.h: Likewise. >> * gcc.target/i386/avx512-fma-5.h: Likewise. >> * gcc.target/i386/avx512-fma-6.h: Likewise. >> * gcc.target/i386/avx512-fma-7.h: Likewise. >> * gcc.target/i386/avx512f-fmadd-df-zmm-1.c: Likewise. >> * gcc.target/i386/avx512f-fmadd-sf-zmm-1.c: Likewise. >> * gcc.target/i386/avx512f-fmadd-sf-zmm-2.c: Likewise. >> * gcc.target/i386/avx512f-fmadd-sf-zmm-3.c: Likewise. >> * gcc.target/i386/avx512f-fmadd-sf-zmm-4.c: Likewise. >> * gcc.target/i386/avx512f-fmadd-sf-zmm-5.c: Likewise. >> * gcc.target/i386/avx512f-fmadd-sf-zmm-6.c: Likewise. >> * gcc.target/i386/avx512f-fmadd-sf-zmm-7.c: Likewise. >> * gcc.target/i386/avx512vl-fmadd-sf-xmm-1.c: Likewise. >> * gcc.target/i386/avx512vl-fmadd-sf-ymm-1.c: Likewise. >> --- >> gcc/config/i386/sse.md | 50 +++++++++++++++++++ >> gcc/testsuite/gcc.target/i386/avx512-fma-1.h | 12 +++++ >> gcc/testsuite/gcc.target/i386/avx512-fma-2.h | 13 +++++ >> gcc/testsuite/gcc.target/i386/avx512-fma-3.h | 13 +++++ >> gcc/testsuite/gcc.target/i386/avx512-fma-4.h | 13 +++++ >> gcc/testsuite/gcc.target/i386/avx512-fma-5.h | 13 +++++ >> gcc/testsuite/gcc.target/i386/avx512-fma-6.h | 13 +++++ >> gcc/testsuite/gcc.target/i386/avx512-fma-7.h | 13 +++++ >> .../gcc.target/i386/avx512f-fmadd-df-zmm-1.c | 12 +++++ >> .../gcc.target/i386/avx512f-fmadd-sf-zmm-1.c | 12 +++++ >> .../gcc.target/i386/avx512f-fmadd-sf-zmm-2.c | 12 +++++ >> .../gcc.target/i386/avx512f-fmadd-sf-zmm-3.c | 12 +++++ >> .../gcc.target/i386/avx512f-fmadd-sf-zmm-4.c | 12 +++++ >> .../gcc.target/i386/avx512f-fmadd-sf-zmm-5.c | 12 +++++ >> .../gcc.target/i386/avx512f-fmadd-sf-zmm-6.c | 12 +++++ >> .../gcc.target/i386/avx512f-fmadd-sf-zmm-7.c | 11 ++++ >> .../gcc.target/i386/avx512vl-fmadd-sf-xmm-1.c | 12 +++++ >> .../gcc.target/i386/avx512vl-fmadd-sf-ymm-1.c | 12 +++++ >> 18 files changed, 259 insertions(+) >> create mode 100644 gcc/testsuite/gcc.target/i386/avx512-fma-1.h >> create mode 100644 gcc/testsuite/gcc.target/i386/avx512-fma-2.h >> create mode 100644 gcc/testsuite/gcc.target/i386/avx512-fma-3.h >> create mode 100644 gcc/testsuite/gcc.target/i386/avx512-fma-4.h >> create mode 100644 gcc/testsuite/gcc.target/i386/avx512-fma-5.h >> create mode 100644 gcc/testsuite/gcc.target/i386/avx512-fma-6.h >> create mode 100644 gcc/testsuite/gcc.target/i386/avx512-fma-7.h >> create mode 100644 >> gcc/testsuite/gcc.target/i386/avx512f-fmadd-df-zmm-1.c >> create mode 100644 >> gcc/testsuite/gcc.target/i386/avx512f-fmadd-sf-zmm-1.c >> create mode 100644 >> gcc/testsuite/gcc.target/i386/avx512f-fmadd-sf-zmm-2.c >> create mode 100644 >> gcc/testsuite/gcc.target/i386/avx512f-fmadd-sf-zmm-3.c >> create mode 100644 >> gcc/testsuite/gcc.target/i386/avx512f-fmadd-sf-zmm-4.c >> create mode 100644 >> gcc/testsuite/gcc.target/i386/avx512f-fmadd-sf-zmm-5.c >> create mode 100644 >> gcc/testsuite/gcc.target/i386/avx512f-fmadd-sf-zmm-6.c >> create mode 100644 >> gcc/testsuite/gcc.target/i386/avx512f-fmadd-sf-zmm-7.c >> create mode 100644 >> gcc/testsuite/gcc.target/i386/avx512vl-fmadd-sf-xmm-1.c >> create mode 100644 >> gcc/testsuite/gcc.target/i386/avx512vl-fmadd-sf-ymm-1.c >> >> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md >> index 13dc7370fd3..594975a8b80 100644 >> --- a/gcc/config/i386/sse.md >> +++ b/gcc/config/i386/sse.md >> @@ -654,6 +654,16 @@ >> (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")]) >> (define_mode_iterator VI48F_256 [V8SI V8SF V4DI V4DF]) >> >> +(define_mode_iterator VF_AVX512 >> + [(V4SF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL") >> + (V8SF "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL") >> + (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")]) > > No need for TARGET_AVX512F conditions, since TARGET_AVX512F is > baseline for these modes and is expressed in insn condition.
Fixed. >> +(define_mode_attr avx512bcst >> + [(V4SF "%{1to4%}") (V2DF "%{1to2%}") >> + (V8SF "%{1to8%}") (V4DF "%{1to4%}") >> + (V16SF "%{1to16%}") (V8DF "%{1to8%}")]) >> + >> ;; Mapping from float mode to required SSE level >> (define_mode_attr sse >> [(SF "sse") (DF "sse2") >> @@ -3740,6 +3750,46 @@ >> [(set_attr "type" "ssemuladd") >> (set_attr "mode" "<MODE>")]) >> >> +(define_insn "*<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name>_bcst_1" >> + [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v") >> + (fma:VF_AVX512 >> + (match_operand:VF_AVX512 1 "nonimmediate_operand" "0,v") >> + (match_operand:VF_AVX512 2 "nonimmediate_operand" "v,0") >> + (vec_duplicate:VF_AVX512 >> + (match_operand:<ssescalarmode> 3 "nonimmediate_operand" >> "m,m"))))] > > Please note that having "nonimmediate_operand" predicate with "m" > constraint will force scalar value that lives in any register to > memory. So, scalar value will be pushed from either integer or SSE > register to memory, and will be broadcast to SSE register from here. I > guess this is not the optimal way, and we still want (eventual movq > from integer reg) + broadcast insn in this case. > > If this predicate is changed to "memory_operand", then only scalars > that live in memory will be considered. Using "memory_operand" causes: FAIL: gcc.target/i386/avx512f-fmadd-sf-zmm-7.c scan-assembler-times vfmadd...ps[ \\t]+[^\n\r]+\\{1to[1-8]+\\}, %zmm[0-9]+, %zmm0 1 FAIL: gcc.target/i386/avx512f-fmadd-sf-zmm-7.c scan-assembler-not vbroadcastss[^\n]*%zmm[0-9]+ __m512 foo (__m512 x, __m512 y) { return _mm512_fmadd_ps (x, y, _mm512_set1_ps (2.f)); } Combiner: Failed to match this instruction: (set (reg:V16SF 91) (fma:V16SF (reg/v:V16SF 85 [ x ]) (reg:V16SF 21 xmm1 [ y ]) (vec_duplicate:V16SF (reg:SF 88)))) With "nonimmediate_operand" __m512 foo (__m512 x, __m512 y, float f) { return _mm512_fmadd_ps (x, y, _mm512_set1_ps (f)); } Combiner: Successfully matched this instruction: (set (reg:V16SF 92) (fma:V16SF (reg/v:V16SF 85 [ x ]) (reg:V16SF 21 xmm1 [ y ]) (vec_duplicate:V16SF (reg:SF 22 xmm2 [ f ])))) Instruction not appropriate for target.starting the processing of deferred insns ix86_legitimate_combined_insn rejects such insn. I added a testcase for this. -- H.J.
From 50a8408f0903620e2ed28faf02bad09f00132471 Mon Sep 17 00:00:00 2001 From: "H.J. Lu" <hjl.to...@gmail.com> Date: Tue, 2 Oct 2018 12:34:40 -0700 Subject: [PATCH] i386: Enable AVX512 memory broadcast for FMA Many AVX512 vector operations can broadcast from a scalar memory source. This patch enables memory broadcast for FMA operations. gcc/ PR target/72782 * config/i386/sse.md (VF_AVX512): New. (avx512bcst): Likewise. (*<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name>_bcst_1): Likewise. (*<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name>_bcst_2): Likewise. (*<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name>_bcst_3): Likewise. gcc/testsuite/ PR target/72782 * gcc.target/i386/avx512-fma-1.h: New file. * gcc.target/i386/avx512-fma-2.h: Likewise. * gcc.target/i386/avx512-fma-3.h: Likewise. * gcc.target/i386/avx512-fma-4.h: Likewise. * gcc.target/i386/avx512-fma-5.h: Likewise. * gcc.target/i386/avx512-fma-6.h: Likewise. * gcc.target/i386/avx512-fma-7.h: Likewise. * gcc.target/i386/avx512-fma-8.h: Likewise. * gcc.target/i386/avx512f-fmadd-df-zmm-1.c: Likewise. * gcc.target/i386/avx512f-fmadd-sf-zmm-1.c: Likewise. * gcc.target/i386/avx512f-fmadd-sf-zmm-2.c: Likewise. * gcc.target/i386/avx512f-fmadd-sf-zmm-3.c: Likewise. * gcc.target/i386/avx512f-fmadd-sf-zmm-4.c: Likewise. * gcc.target/i386/avx512f-fmadd-sf-zmm-5.c: Likewise. * gcc.target/i386/avx512f-fmadd-sf-zmm-6.c: Likewise. * gcc.target/i386/avx512f-fmadd-sf-zmm-7.c: Likewise. * gcc.target/i386/avx512f-fmadd-sf-zmm-8.c: Likewise. * gcc.target/i386/avx512vl-fmadd-sf-xmm-1.c: Likewise. * gcc.target/i386/avx512vl-fmadd-sf-ymm-1.c: Likewise. --- gcc/config/i386/sse.md | 50 +++++++++++++++++++ gcc/testsuite/gcc.target/i386/avx512-fma-1.h | 12 +++++ gcc/testsuite/gcc.target/i386/avx512-fma-2.h | 13 +++++ gcc/testsuite/gcc.target/i386/avx512-fma-3.h | 13 +++++ gcc/testsuite/gcc.target/i386/avx512-fma-4.h | 13 +++++ gcc/testsuite/gcc.target/i386/avx512-fma-5.h | 13 +++++ gcc/testsuite/gcc.target/i386/avx512-fma-6.h | 13 +++++ gcc/testsuite/gcc.target/i386/avx512-fma-7.h | 13 +++++ gcc/testsuite/gcc.target/i386/avx512-fma-8.h | 13 +++++ .../gcc.target/i386/avx512f-fmadd-df-zmm-1.c | 12 +++++ .../gcc.target/i386/avx512f-fmadd-sf-zmm-1.c | 12 +++++ .../gcc.target/i386/avx512f-fmadd-sf-zmm-2.c | 12 +++++ .../gcc.target/i386/avx512f-fmadd-sf-zmm-3.c | 12 +++++ .../gcc.target/i386/avx512f-fmadd-sf-zmm-4.c | 12 +++++ .../gcc.target/i386/avx512f-fmadd-sf-zmm-5.c | 12 +++++ .../gcc.target/i386/avx512f-fmadd-sf-zmm-6.c | 12 +++++ .../gcc.target/i386/avx512f-fmadd-sf-zmm-7.c | 11 ++++ .../gcc.target/i386/avx512f-fmadd-sf-zmm-8.c | 12 +++++ .../gcc.target/i386/avx512vl-fmadd-sf-xmm-1.c | 12 +++++ .../gcc.target/i386/avx512vl-fmadd-sf-ymm-1.c | 12 +++++ 20 files changed, 284 insertions(+) create mode 100644 gcc/testsuite/gcc.target/i386/avx512-fma-1.h create mode 100644 gcc/testsuite/gcc.target/i386/avx512-fma-2.h create mode 100644 gcc/testsuite/gcc.target/i386/avx512-fma-3.h create mode 100644 gcc/testsuite/gcc.target/i386/avx512-fma-4.h create mode 100644 gcc/testsuite/gcc.target/i386/avx512-fma-5.h create mode 100644 gcc/testsuite/gcc.target/i386/avx512-fma-6.h create mode 100644 gcc/testsuite/gcc.target/i386/avx512-fma-7.h create mode 100644 gcc/testsuite/gcc.target/i386/avx512-fma-8.h create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-fmadd-df-zmm-1.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-fmadd-sf-zmm-1.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-fmadd-sf-zmm-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-fmadd-sf-zmm-3.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-fmadd-sf-zmm-4.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-fmadd-sf-zmm-5.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-fmadd-sf-zmm-6.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-fmadd-sf-zmm-7.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-fmadd-sf-zmm-8.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-fmadd-sf-xmm-1.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-fmadd-sf-ymm-1.c diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 13dc7370fd3..3cad58b19bd 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -654,6 +654,16 @@ (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")]) (define_mode_iterator VI48F_256 [V8SI V8SF V4DI V4DF]) +(define_mode_iterator VF_AVX512 + [(V4SF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL") + (V8SF "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL") + V16SF V8DF]) + +(define_mode_attr avx512bcst + [(V4SF "%{1to4%}") (V2DF "%{1to2%}") + (V8SF "%{1to8%}") (V4DF "%{1to4%}") + (V16SF "%{1to16%}") (V8DF "%{1to8%}")]) + ;; Mapping from float mode to required SSE level (define_mode_attr sse [(SF "sse") (DF "sse2") @@ -3740,6 +3750,46 @@ [(set_attr "type" "ssemuladd") (set_attr "mode" "<MODE>")]) +(define_insn "*<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name>_bcst_1" + [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v") + (fma:VF_AVX512 + (match_operand:VF_AVX512 1 "nonimmediate_operand" "0,v") + (match_operand:VF_AVX512 2 "nonimmediate_operand" "v,0") + (vec_duplicate:VF_AVX512 + (match_operand:<ssescalarmode> 3 "nonimmediate_operand" "m,m"))))] + "TARGET_AVX512F && <sd_mask_mode512bit_condition>" + "vfmadd213<ssemodesuffix>\t{%3<avx512bcst>, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<avx512bcst>}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "<MODE>")]) + +(define_insn "*<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name>_bcst_2" + [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v") + (fma:VF_AVX512 + (vec_duplicate:VF_AVX512 + (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,m")) + (match_operand:VF_AVX512 2 "nonimmediate_operand" "0,v") + (match_operand:VF_AVX512 3 "nonimmediate_operand" "v,0")))] + "TARGET_AVX512F && <sd_mask_mode512bit_condition>" + "@ + vfmadd132<ssemodesuffix>\t{%1<avx512bcst>, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %1<avx512bcst>} + vfmadd231<ssemodesuffix>\t{%1<avx512bcst>, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %1<avx512bcst>}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "<MODE>")]) + +(define_insn "*<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name>_bcst_3" + [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v") + (fma:VF_AVX512 + (match_operand:VF_AVX512 1 "nonimmediate_operand" "0,v") + (vec_duplicate:VF_AVX512 + (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "m,m")) + (match_operand:VF_AVX512 3 "nonimmediate_operand" "v,0")))] + "TARGET_AVX512F && <sd_mask_mode512bit_condition>" + "@ + vfmadd132<ssemodesuffix>\t{%2<avx512bcst>, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<avx512bcst>} + vfmadd231<ssemodesuffix>\t{%2<avx512bcst>, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<avx512bcst>}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "<MODE>")]) + (define_insn "<avx512>_fmadd_<mode>_mask<round_name>" [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v") (vec_merge:VF_AVX512VL diff --git a/gcc/testsuite/gcc.target/i386/avx512-fma-1.h b/gcc/testsuite/gcc.target/i386/avx512-fma-1.h new file mode 100644 index 00000000000..a8dc0b5d7a9 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512-fma-1.h @@ -0,0 +1,12 @@ +#include <immintrin.h> + +#define PASTER2(x,y) x##y +#define PASTER3(x,y,z) _mm##x##_##y##_##z +#define OP(vec, op, suffix) PASTER3 (vec, op, suffix) +#define DUP(vec, suffix, val) PASTER3 (vec, set1, suffix) (val) + +type +foo (type x, type y, SCALAR *f) +{ + return OP (vec, op, suffix) (x, y, DUP (vec, suffix, *f)); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512-fma-2.h b/gcc/testsuite/gcc.target/i386/avx512-fma-2.h new file mode 100644 index 00000000000..a3a0e9c0dd3 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512-fma-2.h @@ -0,0 +1,13 @@ +#include <immintrin.h> + +#define PASTER2(x,y) x##y +#define PASTER3(x,y,z) _mm##x##_##y##_##z +#define TYPE(vec) PASTER2 (__m, vec) +#define OP(vec, op, suffix) PASTER3 (vec, op, suffix) +#define DUP(vec, suffix, val) PASTER3 (vec, set1, suffix) (val) + +type +foo (type x, type y, SCALAR *f) +{ + return OP (vec, op, suffix) (y, x, DUP (vec, suffix, *f)); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512-fma-3.h b/gcc/testsuite/gcc.target/i386/avx512-fma-3.h new file mode 100644 index 00000000000..8c38d178a25 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512-fma-3.h @@ -0,0 +1,13 @@ +#include <immintrin.h> + +#define PASTER2(x,y) x##y +#define PASTER3(x,y,z) _mm##x##_##y##_##z +#define TYPE(vec) PASTER2 (__m, vec) +#define OP(vec, op, suffix) PASTER3 (vec, op, suffix) +#define DUP(vec, suffix, val) PASTER3 (vec, set1, suffix) (val) + +type +foo (type x, type y, SCALAR *f) +{ + return OP (vec, op, suffix) (x, DUP (vec, suffix, *f), y); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512-fma-4.h b/gcc/testsuite/gcc.target/i386/avx512-fma-4.h new file mode 100644 index 00000000000..fec01c292a3 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512-fma-4.h @@ -0,0 +1,13 @@ +#include <immintrin.h> + +#define PASTER2(x,y) x##y +#define PASTER3(x,y,z) _mm##x##_##y##_##z +#define TYPE(vec) PASTER2 (__m, vec) +#define OP(vec, op, suffix) PASTER3 (vec, op, suffix) +#define DUP(vec, suffix, val) PASTER3 (vec, set1, suffix) (val) + +type +foo (type x, type y, SCALAR *f) +{ + return OP (vec, op, suffix) (y, DUP (vec, suffix, *f), x); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512-fma-5.h b/gcc/testsuite/gcc.target/i386/avx512-fma-5.h new file mode 100644 index 00000000000..386960ae5e0 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512-fma-5.h @@ -0,0 +1,13 @@ +#include <immintrin.h> + +#define PASTER2(x,y) x##y +#define PASTER3(x,y,z) _mm##x##_##y##_##z +#define TYPE(vec) PASTER2 (__m, vec) +#define OP(vec, op, suffix) PASTER3 (vec, op, suffix) +#define DUP(vec, suffix, val) PASTER3 (vec, set1, suffix) (val) + +type +foo (type x, type y, SCALAR *f) +{ + return OP (vec, op, suffix) (DUP (vec, suffix, *f), x, y); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512-fma-6.h b/gcc/testsuite/gcc.target/i386/avx512-fma-6.h new file mode 100644 index 00000000000..de209016ee7 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512-fma-6.h @@ -0,0 +1,13 @@ +#include <immintrin.h> + +#define PASTER2(x,y) x##y +#define PASTER3(x,y,z) _mm##x##_##y##_##z +#define TYPE(vec) PASTER2 (__m, vec) +#define OP(vec, op, suffix) PASTER3 (vec, op, suffix) +#define DUP(vec, suffix, val) PASTER3 (vec, set1, suffix) (val) + +type +foo (type x, type y, SCALAR *f) +{ + return OP (vec, op, suffix) (DUP (vec, suffix, *f), y, x); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512-fma-7.h b/gcc/testsuite/gcc.target/i386/avx512-fma-7.h new file mode 100644 index 00000000000..95dc6fb6218 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512-fma-7.h @@ -0,0 +1,13 @@ +#include <immintrin.h> + +#define PASTER2(x,y) x##y +#define PASTER3(x,y,z) _mm##x##_##y##_##z +#define TYPE(vec) PASTER2 (__m, vec) +#define OP(vec, op, suffix) PASTER3 (vec, op, suffix) +#define DUP(vec, suffix, val) PASTER3 (vec, set1, suffix) (val) + +type +foo (type x, type y) +{ + return OP (vec, op, suffix) (x, y, DUP (vec, suffix, 2.f)); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512-fma-8.h b/gcc/testsuite/gcc.target/i386/avx512-fma-8.h new file mode 100644 index 00000000000..e5bd28e5471 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512-fma-8.h @@ -0,0 +1,13 @@ +#include <immintrin.h> + +#define PASTER2(x,y) x##y +#define PASTER3(x,y,z) _mm##x##_##y##_##z +#define TYPE(vec) PASTER2 (__m, vec) +#define OP(vec, op, suffix) PASTER3 (vec, op, suffix) +#define DUP(vec, suffix, val) PASTER3 (vec, set1, suffix) (val) + +type +foo (type x, type y, SCALAR f) +{ + return OP (vec, op, suffix) (y, x, DUP (vec, suffix, f)); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512f-fmadd-df-zmm-1.c b/gcc/testsuite/gcc.target/i386/avx512f-fmadd-df-zmm-1.c new file mode 100644 index 00000000000..6d6e522d521 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-fmadd-df-zmm-1.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512f -O2" } */ +/* { dg-final { scan-assembler-times "vfmadd...pd\[ \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */ +/* { dg-final { scan-assembler-not "vbroadcastsd\[^\n\]*%zmm\[0-9\]+" } } */ + +#define type __m512d +#define vec 512 +#define op fmadd +#define suffix pd +#define SCALAR double + +#include "avx512-fma-1.h" diff --git a/gcc/testsuite/gcc.target/i386/avx512f-fmadd-sf-zmm-1.c b/gcc/testsuite/gcc.target/i386/avx512f-fmadd-sf-zmm-1.c new file mode 100644 index 00000000000..98333103d1d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-fmadd-sf-zmm-1.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512f -O2" } */ +/* { dg-final { scan-assembler-times "vfmadd...ps\[ \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */ +/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%zmm\[0-9\]+" } } */ + +#define type __m512 +#define vec 512 +#define op fmadd +#define suffix ps +#define SCALAR float + +#include "avx512-fma-1.h" diff --git a/gcc/testsuite/gcc.target/i386/avx512f-fmadd-sf-zmm-2.c b/gcc/testsuite/gcc.target/i386/avx512f-fmadd-sf-zmm-2.c new file mode 100644 index 00000000000..34e5620102b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-fmadd-sf-zmm-2.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512f -O2" } */ +/* { dg-final { scan-assembler-times "vfmadd...ps\[ \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */ +/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%zmm\[0-9\]+" } } */ + +#define type __m512 +#define vec 512 +#define op fmadd +#define suffix ps +#define SCALAR float + +#include "avx512-fma-2.h" diff --git a/gcc/testsuite/gcc.target/i386/avx512f-fmadd-sf-zmm-3.c b/gcc/testsuite/gcc.target/i386/avx512f-fmadd-sf-zmm-3.c new file mode 100644 index 00000000000..83456259f6e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-fmadd-sf-zmm-3.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512f -O2" } */ +/* { dg-final { scan-assembler-times "vfmadd...ps\[ \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */ +/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%zmm\[0-9\]+" } } */ + +#define type __m512 +#define vec 512 +#define op fmadd +#define suffix ps +#define SCALAR float + +#include "avx512-fma-3.h" diff --git a/gcc/testsuite/gcc.target/i386/avx512f-fmadd-sf-zmm-4.c b/gcc/testsuite/gcc.target/i386/avx512f-fmadd-sf-zmm-4.c new file mode 100644 index 00000000000..c028c8e3860 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-fmadd-sf-zmm-4.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512f -O2" } */ +/* { dg-final { scan-assembler-times "vfmadd...ps\[ \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */ +/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%zmm\[0-9\]+" } } */ + +#define type __m512 +#define vec 512 +#define op fmadd +#define suffix ps +#define SCALAR float + +#include "avx512-fma-4.h" diff --git a/gcc/testsuite/gcc.target/i386/avx512f-fmadd-sf-zmm-5.c b/gcc/testsuite/gcc.target/i386/avx512f-fmadd-sf-zmm-5.c new file mode 100644 index 00000000000..3eac8e886b6 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-fmadd-sf-zmm-5.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512f -O2" } */ +/* { dg-final { scan-assembler-times "vfmadd...ps\[ \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */ +/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%zmm\[0-9\]+" } } */ + +#define type __m512 +#define vec 512 +#define op fmadd +#define suffix ps +#define SCALAR float + +#include "avx512-fma-5.h" diff --git a/gcc/testsuite/gcc.target/i386/avx512f-fmadd-sf-zmm-6.c b/gcc/testsuite/gcc.target/i386/avx512f-fmadd-sf-zmm-6.c new file mode 100644 index 00000000000..990cdc64f1b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-fmadd-sf-zmm-6.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512f -O2" } */ +/* { dg-final { scan-assembler-times "vfmadd...ps\[ \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */ +/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%zmm\[0-9\]+" } } */ + +#define type __m512 +#define vec 512 +#define op fmadd +#define suffix ps +#define SCALAR float + +#include "avx512-fma-6.h" diff --git a/gcc/testsuite/gcc.target/i386/avx512f-fmadd-sf-zmm-7.c b/gcc/testsuite/gcc.target/i386/avx512f-fmadd-sf-zmm-7.c new file mode 100644 index 00000000000..0c23058825b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-fmadd-sf-zmm-7.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512f -O2" } */ +/* { dg-final { scan-assembler-times "vfmadd...ps\[ \\t\]+\[^\n\r\]+\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */ +/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%zmm\[0-9\]+" } } */ + +#define type __m512 +#define vec 512 +#define op fmadd +#define suffix ps + +#include "avx512-fma-7.h" diff --git a/gcc/testsuite/gcc.target/i386/avx512f-fmadd-sf-zmm-8.c b/gcc/testsuite/gcc.target/i386/avx512f-fmadd-sf-zmm-8.c new file mode 100644 index 00000000000..8a9da75dd07 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-fmadd-sf-zmm-8.c @@ -0,0 +1,12 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-mavx512f -O2" } */ +/* { dg-final { scan-assembler-times "vbroadcastss\[^\n\]*%zmm\[0-9\]+" 1 } } */ +/* { dg-final { scan-assembler-times "vfmadd...ps\[ \\t\]+%zmm\[0-9\]+, %zmm\[0-9\]+, %zmm0" 1 } } */ + +#define type __m512 +#define vec 512 +#define op fmadd +#define suffix ps +#define SCALAR float + +#include "avx512-fma-8.h" diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-fmadd-sf-xmm-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-fmadd-sf-xmm-1.c new file mode 100644 index 00000000000..e03a93552fc --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512vl-fmadd-sf-xmm-1.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-mfma -mavx512vl -O2" } */ +/* { dg-final { scan-assembler-times "vfmadd...ps\[ \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %xmm\[0-9\]+, %xmm0" 1 } } */ +/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%xmm\[0-9\]+" } } */ + +#define type __m128 +#define vec +#define op fmadd +#define suffix ps +#define SCALAR float + +#include "avx512-fma-1.h" diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-fmadd-sf-ymm-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-fmadd-sf-ymm-1.c new file mode 100644 index 00000000000..225766d722a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512vl-fmadd-sf-ymm-1.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-mfma -mavx512vl -O2" } */ +/* { dg-final { scan-assembler-times "vfmadd...ps\[ \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %ymm\[0-9\]+, %ymm0" 1 } } */ +/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%ymm\[0-9\]+" } } */ + +#define type __m256 +#define vec 256 +#define op fmadd +#define suffix ps +#define SCALAR float + +#include "avx512-fma-1.h" -- 2.17.2