From: Levy Hsu <ad...@levyhsu.com> gcc/ChangeLog:
* config/i386/sse.md: Add V8BF/V16BF/V32BF to mode iterator FMAMODEM. gcc/testsuite/ChangeLog: * gcc.target/i386/avx10_2-512-bf-vector-fma-1.c: New test. * gcc.target/i386/avx10_2-bf-vector-fma-1.c: New test. --- gcc/config/i386/sse.md | 5 +- .../i386/avx10_2-512-bf-vector-fma-1.c | 34 ++++++++++ .../gcc.target/i386/avx10_2-bf-vector-fma-1.c | 63 +++++++++++++++++++ 3 files changed, 101 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-512-bf-vector-fma-1.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-bf-vector-fma-1.c diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index ebca462bae8..85fbef331ea 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -5677,7 +5677,10 @@ (HF "TARGET_AVX512FP16") (V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL") (V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL") - (V32HF "TARGET_AVX512FP16 && TARGET_EVEX512")]) + (V32HF "TARGET_AVX512FP16 && TARGET_EVEX512") + (V8BF "TARGET_AVX10_2_256") + (V16BF "TARGET_AVX10_2_256") + (V32BF "TARGET_AVX10_2_512")]) (define_expand "fma<mode>4" [(set (match_operand:FMAMODEM 0 "register_operand") diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-bf-vector-fma-1.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-bf-vector-fma-1.c new file mode 100644 index 00000000000..a857f9b90db --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-bf-vector-fma-1.c @@ -0,0 +1,34 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx10.2-512 -O2" } */ +/* { dg-final { scan-assembler-times "vfmadd132nepbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfmsub132nepbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmadd132nepbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmsub132nepbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +typedef __bf16 v32bf __attribute__ ((__vector_size__ (64))); + +v32bf +foo_madd (v32bf a, v32bf b, v32bf c) +{ + return a * b + c; +} + +v32bf +foo_msub (v32bf a, v32bf b, v32bf c) +{ + return a * b - c; +} + +v32bf +foo_nmadd (v32bf a, v32bf b, v32bf c) +{ + return -a * b + c; +} + +v32bf +foo_nmsub (v32bf a, v32bf b, v32bf c) +{ + return -a * b - c; +} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-bf-vector-fma-1.c b/gcc/testsuite/gcc.target/i386/avx10_2-bf-vector-fma-1.c new file mode 100644 index 00000000000..0fd78efe049 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-bf-vector-fma-1.c @@ -0,0 +1,63 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx10.2 -O2" } */ +/* { dg-final { scan-assembler-times "vfmadd132nepbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfmsub132nepbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmadd132nepbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmsub132nepbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfmadd132nepbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfmsub132nepbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmadd132nepbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmsub132nepbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +typedef __bf16 v16bf __attribute__ ((__vector_size__ (32))); +typedef __bf16 v8bf __attribute__ ((__vector_size__ (16))); + +v16bf +foo_madd_256 (v16bf a, v16bf b, v16bf c) +{ + return a * b + c; +} + +v16bf +foo_msub_256 (v16bf a, v16bf b, v16bf c) +{ + return a * b - c; +} + +v16bf +foo_nmadd_256 (v16bf a, v16bf b, v16bf c) +{ + return -a * b + c; +} + +v16bf +foo_nmsub_256 (v16bf a, v16bf b, v16bf c) +{ + return -a * b - c; +} + +v8bf +foo_madd_128 (v8bf a, v8bf b, v8bf c) +{ + return a * b + c; +} + +v8bf +foo_msub_128 (v8bf a, v8bf b, v8bf c) +{ + return a * b - c; +} + +v8bf +foo_nmadd_128 (v8bf a, v8bf b, v8bf c) +{ + return -a * b + c; +} + +v8bf +foo_nmsub_128 (v8bf a, v8bf b, v8bf c) +{ + return -a * b - c; +} -- 2.31.1