gcc/ChangeLog: * config/i386/sse.md (FMAMODEM): extend to handle FP16. (VFH_SF_AVX512VL): Extend to handle HFmode. (VF_SF_AVX512VL): Deleted.
gcc/testsuite/ChangeLog: * gcc.target/i386/avx512fp16-fma-1.c: New test. * gcc.target/i386/avx512fp16vl-fma-1.c: New test. * gcc.target/i386/avx512fp16vl-fma-vectorize-1.c: New test. --- gcc/config/i386/sse.md | 11 +-- .../gcc.target/i386/avx512fp16-fma-1.c | 69 ++++++++++++++++++ .../gcc.target/i386/avx512fp16vl-fma-1.c | 70 +++++++++++++++++++ .../i386/avx512fp16vl-fma-vectorize-1.c | 45 ++++++++++++ 4 files changed, 190 insertions(+), 5 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-fma-1.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16vl-fma-1.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16vl-fma-vectorize-1.c diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index f87f6893835..2b8d12086f4 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -4489,7 +4489,11 @@ (define_mode_iterator FMAMODEM (V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL") (V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL") (V16SF "TARGET_AVX512F") - (V8DF "TARGET_AVX512F")]) + (V8DF "TARGET_AVX512F") + (HF "TARGET_AVX512FP16") + (V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL") + (V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL") + (V32HF "TARGET_AVX512FP16")]) (define_expand "fma<mode>4" [(set (match_operand:FMAMODEM 0 "register_operand") @@ -4597,14 +4601,11 @@ (define_insn "*fma_fmadd_<mode>" (set_attr "mode" "<MODE>")]) ;; Suppose AVX-512F as baseline -(define_mode_iterator VF_SF_AVX512VL - [SF V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL") - DF V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")]) - (define_mode_iterator VFH_SF_AVX512VL [(V32HF "TARGET_AVX512FP16") (V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL") (V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL") + (HF "TARGET_AVX512FP16") SF V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL") DF V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")]) diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-fma-1.c b/gcc/testsuite/gcc.target/i386/avx512fp16-fma-1.c new file mode 100644 index 00000000000..d78d7629838 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-fma-1.c @@ -0,0 +1,69 @@ +/* { dg-do compile } */ +/* { dg-options "-Ofast -mavx512fp16" } */ + +typedef _Float16 v32hf __attribute__ ((__vector_size__ (64))); + +_Float16 +foo1 (_Float16 a, _Float16 b, _Float16 c) +{ + return a * b + c; +} + +/* { dg-final { scan-assembler-times "vfmadd132sh\[^\n\r\]*xmm\[0-9\]" 1 } } */ + +_Float16 +foo2 (_Float16 a, _Float16 b, _Float16 c) +{ + return -a * b + c; +} + +/* { dg-final { scan-assembler-times "vfnmadd132sh\[^\n\r\]*xmm\[0-9\]" 1 } } */ + +_Float16 +foo3 (_Float16 a, _Float16 b, _Float16 c) +{ + return a * b - c; +} + +/* { dg-final { scan-assembler-times "vfmsub132sh\[^\n\r\]*xmm\[0-9\]" 1 } } */ + +_Float16 +foo4 (_Float16 a, _Float16 b, _Float16 c) +{ + return -a * b - c; +} + +/* { dg-final { scan-assembler-times "vfnmsub132sh\[^\n\r\]*xmm\[0-9\]" 1 } } */ + +v32hf +foo5 (v32hf a, v32hf b, v32hf c) +{ + return a * b + c; +} + +/* { dg-final { scan-assembler-times "vfmadd132ph\[^\n\r\]*zmm\[0-9\]" 1 } } */ + +v32hf +foo6 (v32hf a, v32hf b, v32hf c) +{ + return -a * b + c; +} + +/* { dg-final { scan-assembler-times "vfnmadd132ph\[^\n\r\]*zmm\[0-9\]" 1 } } */ + +v32hf +foo7 (v32hf a, v32hf b, v32hf c) +{ + return a * b - c; +} + +/* { dg-final { scan-assembler-times "vfmsub132ph\[^\n\r\]*zmm\[0-9\]" 1 } } */ + +v32hf +foo8 (v32hf a, v32hf b, v32hf c) +{ + return -a * b - c; +} + +/* { dg-final { scan-assembler-times "vfnmsub132ph\[^\n\r\]*zmm\[0-9\]" 1 } } */ + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-fma-1.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-fma-1.c new file mode 100644 index 00000000000..1a832f37d6c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-fma-1.c @@ -0,0 +1,70 @@ +/* { dg-do compile } */ +/* { dg-options "-Ofast -mavx512fp16 -mavx512vl" } */ + +typedef _Float16 v8hf __attribute__ ((__vector_size__ (16))); +typedef _Float16 v16hf __attribute__ ((__vector_size__ (32))); + +v8hf +foo1 (v8hf a, v8hf b, v8hf c) +{ + return a * b + c; +} + +/* { dg-final { scan-assembler-times "vfmadd132ph\[^\n\r\]*xmm\[0-9\]" 1 } } */ + +v8hf +foo2 (v8hf a, v8hf b, v8hf c) +{ + return -a * b + c; +} + +/* { dg-final { scan-assembler-times "vfnmadd132ph\[^\n\r\]*xmm\[0-9\]" 1 } } */ + +v8hf +foo3 (v8hf a, v8hf b, v8hf c) +{ + return a * b - c; +} + +/* { dg-final { scan-assembler-times "vfmsub132ph\[^\n\r\]*xmm\[0-9\]" 1 } } */ + +v8hf +foo4 (v8hf a, v8hf b, v8hf c) +{ + return -a * b - c; +} + +/* { dg-final { scan-assembler-times "vfnmsub132ph\[^\n\r\]*xmm\[0-9\]" 1 } } */ + +v16hf +foo5 (v16hf a, v16hf b, v16hf c) +{ + return a * b + c; +} + +/* { dg-final { scan-assembler-times "vfmadd132ph\[^\n\r\]*ymm\[0-9\]" 1 } } */ + +v16hf +foo6 (v16hf a, v16hf b, v16hf c) +{ + return -a * b + c; +} + +/* { dg-final { scan-assembler-times "vfnmadd132ph\[^\n\r\]*ymm\[0-9\]" 1 } } */ + +v16hf +foo7 (v16hf a, v16hf b, v16hf c) +{ + return a * b - c; +} + +/* { dg-final { scan-assembler-times "vfmsub132ph\[^\n\r\]*ymm\[0-9\]" 1 } } */ + +v16hf +foo8 (v16hf a, v16hf b, v16hf c) +{ + return -a * b - c; +} + +/* { dg-final { scan-assembler-times "vfnmsub132ph\[^\n\r\]*ymm\[0-9\]" 1 } } */ + diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-fma-vectorize-1.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-fma-vectorize-1.c new file mode 100644 index 00000000000..d0b8bec34f1 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-fma-vectorize-1.c @@ -0,0 +1,45 @@ +/* { dg-do compile } */ +/* { dg-options "-Ofast -mavx512fp16 -mavx512vl" } */ + +typedef _Float16 v8hf __attribute__ ((__vector_size__ (16))); +typedef _Float16 v16hf __attribute__ ((__vector_size__ (32))); + +void +foo1 (_Float16* __restrict pa, _Float16* __restrict pb, + _Float16* __restrict pc, _Float16* __restrict pd) +{ + for (int i = 0; i != 8; i++) + pd[i] = pa[i] * pb[i] + pc[i]; +} + +/* { dg-final { scan-assembler-times "vfmadd132ph\[^\n\r\]*xmm\[0-9\]" 1 } } */ + +void +foo2 (_Float16* __restrict pa, _Float16* __restrict pb, + _Float16* __restrict pc, _Float16* __restrict pd) +{ + for (int i = 0; i != 8; i++) + pd[i] = -pa[i] * pb[i] + pc[i]; +} + +/* { dg-final { scan-assembler-times "vfnmadd132ph\[^\n\r\]*xmm\[0-9\]" 1 } } */ + +void +foo3 (_Float16* __restrict pa, _Float16* __restrict pb, + _Float16* __restrict pc, _Float16* __restrict pd) +{ + for (int i = 0; i != 8; i++) + pd[i] = pa[i] * pb[i] - pc[i]; +} + +/* { dg-final { scan-assembler-times "vfmsub132ph\[^\n\r\]*xmm\[0-9\]" 1 } } */ + +void +foo4 (_Float16* __restrict pa, _Float16* __restrict pb, + _Float16* __restrict pc, _Float16* __restrict pd) +{ + for (int i = 0; i != 8; i++) + pd[i] = -pa[i] * pb[i] - pc[i]; +} + +/* { dg-final { scan-assembler-times "vfnmsub132ph\[^\n\r\]*xmm\[0-9\]" 1 } } */ -- 2.18.1