> > > > and I have no easy way to test things there. Handling AVX512 > > > > should be easy as followup though.
Here's the patch adding avx512f tests for FMADDSUB/FMSUBADD slp patterns. Pushed to the trunk. -- BR, Hongtao
From 2dc666974cca3a62686f4d7135ca36c25d61a802 Mon Sep 17 00:00:00 2001 From: liuhongt <hongtao....@intel.com> Date: Wed, 7 Jul 2021 15:19:42 +0800 Subject: [PATCH] [i386] Add avx512 tests for MADDSUB and FMSUBADD SLP vectorization patterns. gcc/testsuite/ChangeLog: * gcc.target/i386/avx512f-vect-fmaddsubXXXpd.c: New test. * gcc.target/i386/avx512f-vect-fmaddsubXXXps.c: New test. * gcc.target/i386/avx512f-vect-fmsubaddXXXpd.c: New test. * gcc.target/i386/avx512f-vect-fmsubaddXXXps.c: New test. --- .../i386/avx512f-vect-fmaddsubXXXpd.c | 41 +++++++++++++++ .../i386/avx512f-vect-fmaddsubXXXps.c | 50 +++++++++++++++++++ .../i386/avx512f-vect-fmsubaddXXXpd.c | 41 +++++++++++++++ .../i386/avx512f-vect-fmsubaddXXXps.c | 50 +++++++++++++++++++ 4 files changed, 182 insertions(+) create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-vect-fmaddsubXXXpd.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-vect-fmaddsubXXXps.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-vect-fmsubaddXXXpd.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-vect-fmsubaddXXXps.c diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vect-fmaddsubXXXpd.c b/gcc/testsuite/gcc.target/i386/avx512f-vect-fmaddsubXXXpd.c new file mode 100644 index 00000000000..734f9e01443 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-vect-fmaddsubXXXpd.c @@ -0,0 +1,41 @@ +/* { dg-do run } */ +/* { dg-require-effective-target avx512f } */ +/* { dg-options "-O3 -mfma -save-temps -mavx512f -mprefer-vector-width=512" } */ + +#include "fma-check.h" +void __attribute__((noipa)) +check_fmaddsub (double * __restrict a, double *b, double *c, int n) +{ + for (int i = 0; i < n; ++i) + { + a[8*i + 0] = b[8*i + 0] * c[8*i + 0] - a[8*i + 0]; + a[8*i + 1] = b[8*i + 1] * c[8*i + 1] + a[8*i + 1]; + a[8*i + 2] = b[8*i + 2] * c[8*i + 2] - a[8*i + 2]; + a[8*i + 3] = b[8*i + 3] * c[8*i + 3] + a[8*i + 3]; + a[8*i + 4] = b[8*i + 4] * c[8*i + 4] - a[8*i + 4]; + a[8*i + 5] = b[8*i + 5] * c[8*i + 5] + a[8*i + 5]; + a[8*i + 6] = b[8*i + 6] * c[8*i + 6] - a[8*i + 6]; + a[8*i + 7] = b[8*i + 7] * c[8*i + 7] + a[8*i + 7]; + } +} + +static void +fma_test (void) +{ + if (!__builtin_cpu_supports ("avx512f")) + return; + double a[8], b[8], c[8]; + for (int i = 0; i < 8; ++i) + { + a[i] = i; + b[i] = 3*i; + c[i] = 7*i; + } + check_fmaddsub (a, b, c, 1); + const double d[8] = { 0., 22., 82., 192., 332., 530., 750., 1036.}; + for (int i = 0; i < 8; ++i) + if (a[i] != d[i]) + __builtin_abort (); +} + +/* { dg-final { scan-assembler {(?n)fmaddsub...pd[ \t].*%zmm[0-9]} } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vect-fmaddsubXXXps.c b/gcc/testsuite/gcc.target/i386/avx512f-vect-fmaddsubXXXps.c new file mode 100644 index 00000000000..ae196c5ef48 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-vect-fmaddsubXXXps.c @@ -0,0 +1,50 @@ +/* { dg-do run } */ +/* { dg-require-effective-target avx512f } */ +/* { dg-options "-O3 -mavx512f -mprefer-vector-width=512 -save-temps" } */ + +#include "fma-check.h" +void __attribute__((noipa)) +check_fmaddsub (float * __restrict a, float *b, float *c, int n) +{ + for (int i = 0; i < n; ++i) + { + a[16*i + 0] = b[16*i + 0] * c[16*i + 0] - a[16*i + 0]; + a[16*i + 1] = b[16*i + 1] * c[16*i + 1] + a[16*i + 1]; + a[16*i + 2] = b[16*i + 2] * c[16*i + 2] - a[16*i + 2]; + a[16*i + 3] = b[16*i + 3] * c[16*i + 3] + a[16*i + 3]; + a[16*i + 4] = b[16*i + 4] * c[16*i + 4] - a[16*i + 4]; + a[16*i + 5] = b[16*i + 5] * c[16*i + 5] + a[16*i + 5]; + a[16*i + 6] = b[16*i + 6] * c[16*i + 6] - a[16*i + 6]; + a[16*i + 7] = b[16*i + 7] * c[16*i + 7] + a[16*i + 7]; + a[16*i + 8] = b[16*i + 8] * c[16*i + 8] - a[16*i + 8]; + a[16*i + 9] = b[16*i + 9] * c[16*i + 9] + a[16*i + 9]; + a[16*i + 10] = b[16*i + 10] * c[16*i + 10] - a[16*i + 10]; + a[16*i + 11] = b[16*i + 11] * c[16*i + 11] + a[16*i + 11]; + a[16*i + 12] = b[16*i + 12] * c[16*i + 12] - a[16*i + 12]; + a[16*i + 13] = b[16*i + 13] * c[16*i + 13] + a[16*i + 13]; + a[16*i + 14] = b[16*i + 14] * c[16*i + 14] - a[16*i + 14]; + a[16*i + 15] = b[16*i + 15] * c[16*i + 15] + a[16*i + 15]; + } +} + +static void +fma_test (void) +{ + if (!__builtin_cpu_supports ("avx512f")) + return; + float a[16], b[16], c[16]; + for (int i = 0; i < 16; ++i) + { + a[i] = i; + b[i] = 3*i; + c[i] = 7*i; + } + check_fmaddsub (a, b, c, 1); + const float d[16] = { 0., 22., 82., 192., 332., 530., 750., 1036., + 1336, 1710., 2090., 2552., 3012., 3562., 4102., 4740.}; + for (int i = 0; i < 16; ++i) + if (a[i] != d[i]) + __builtin_abort (); +} + +/* { dg-final { scan-assembler {(?n)fmaddsub...ps[ \t].*%zmm[0-9]} } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vect-fmsubaddXXXpd.c b/gcc/testsuite/gcc.target/i386/avx512f-vect-fmsubaddXXXpd.c new file mode 100644 index 00000000000..cde76db1755 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-vect-fmsubaddXXXpd.c @@ -0,0 +1,41 @@ +/* { dg-do run } */ +/* { dg-require-effective-target avx512f } */ +/* { dg-options "-O3 -mavx512f -mprefer-vector-width=512 -save-temps" } */ + +#include "fma-check.h" +void __attribute__((noipa)) +check_fmaddsub (double * __restrict a, double *b, double *c, int n) +{ + for (int i = 0; i < n; ++i) + { + a[8*i + 0] = b[8*i + 0] * c[8*i + 0] + a[8*i + 0]; + a[8*i + 1] = b[8*i + 1] * c[8*i + 1] - a[8*i + 1]; + a[8*i + 2] = b[8*i + 2] * c[8*i + 2] + a[8*i + 2]; + a[8*i + 3] = b[8*i + 3] * c[8*i + 3] - a[8*i + 3]; + a[8*i + 4] = b[8*i + 4] * c[8*i + 4] + a[8*i + 4]; + a[8*i + 5] = b[8*i + 5] * c[8*i + 5] - a[8*i + 5]; + a[8*i + 6] = b[8*i + 6] * c[8*i + 6] + a[8*i + 6]; + a[8*i + 7] = b[8*i + 7] * c[8*i + 7] - a[8*i + 7]; + } +} + +static void +fma_test (void) +{ + if (!__builtin_cpu_supports ("avx512f")) + return; + double a[8], b[8], c[8]; + for (int i = 0; i < 8; ++i) + { + a[i] = i; + b[i] = 3*i; + c[i] = 7*i; + } + check_fmaddsub (a, b, c, 1); + const double d[8] = { 0., 20., 86., 186., 340., 520., 762., 1022.}; + for (int i = 0; i < 8; ++i) + if (a[i] != d[i]) + __builtin_abort (); +} + +/* { dg-final { scan-assembler {(?n)fmsubadd...pd[ \t].*%zmm[0-9]} } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vect-fmsubaddXXXps.c b/gcc/testsuite/gcc.target/i386/avx512f-vect-fmsubaddXXXps.c new file mode 100644 index 00000000000..59de39f4112 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-vect-fmsubaddXXXps.c @@ -0,0 +1,50 @@ +/* { dg-do run } */ +/* { dg-require-effective-target avx512f } */ +/* { dg-options "-O3 -mavx512f -mprefer-vector-width=512 -save-temps" } */ + +#include "fma-check.h" +void __attribute__((noipa)) +check_fmaddsub (float * __restrict a, float *b, float *c, int n) +{ + for (int i = 0; i < n; ++i) + { + a[16*i + 0] = b[16*i + 0] * c[16*i + 0] + a[16*i + 0]; + a[16*i + 1] = b[16*i + 1] * c[16*i + 1] - a[16*i + 1]; + a[16*i + 2] = b[16*i + 2] * c[16*i + 2] + a[16*i + 2]; + a[16*i + 3] = b[16*i + 3] * c[16*i + 3] - a[16*i + 3]; + a[16*i + 4] = b[16*i + 4] * c[16*i + 4] + a[16*i + 4]; + a[16*i + 5] = b[16*i + 5] * c[16*i + 5] - a[16*i + 5]; + a[16*i + 6] = b[16*i + 6] * c[16*i + 6] + a[16*i + 6]; + a[16*i + 7] = b[16*i + 7] * c[16*i + 7] - a[16*i + 7]; + a[16*i + 8] = b[16*i + 8] * c[16*i + 8] + a[16*i + 8]; + a[16*i + 9] = b[16*i + 9] * c[16*i + 9] - a[16*i + 9]; + a[16*i + 10] = b[16*i + 10] * c[16*i + 10] + a[16*i + 10]; + a[16*i + 11] = b[16*i + 11] * c[16*i + 11] - a[16*i + 11]; + a[16*i + 12] = b[16*i + 12] * c[16*i + 12] + a[16*i + 12]; + a[16*i + 13] = b[16*i + 13] * c[16*i + 13] - a[16*i + 13]; + a[16*i + 14] = b[16*i + 14] * c[16*i + 14] + a[16*i + 14]; + a[16*i + 15] = b[16*i + 15] * c[16*i + 15] - a[16*i + 15]; + } +} + +static void +fma_test (void) +{ + if (!__builtin_cpu_supports ("avx512f")) + return; + float a[16], b[16], c[16]; + for (int i = 0; i < 16; ++i) + { + a[i] = i; + b[i] = 3*i; + c[i] = 7*i; + } + check_fmaddsub (a, b, c, 1); + const float d[16] = { 0., 20., 86., 186., 340., 520., 762., 1022., + 1352, 1692., 2110., 2530., 3036., 3536., 4130., 4710.}; + for (int i = 0; i < 16; ++i) + if (a[i] != d[i]) + __builtin_abort (); +} + +/* { dg-final { scan-assembler {(?n)fmsubadd...ps[ \t].*%zmm[0-9]} } } */ -- 2.18.1