[PATCH] i386: Support complex fma/conj_fma for _Float16.

Kong, Lingling via Gcc-patches Fri, 05 Nov 2021 00:09:49 -0700

Hi,

This patch is to support cmla_optab, cmul_optab, cmla_conj_optab, 
cmul_conj_optab for vector _Float16.
Ok for master?


gcc/ChangeLog:

        * config/i386/sse.md (cmul<conj_op><mode>3): add new define_expand.
        (cmla<conj_op><mode>4): Likewise

gcc/testsuite/ChangeLog:

        * gcc.target/i386/avx512fp16-vector-complex-float.c: New test.
---
 gcc/config/i386/sse.md                        | 23 +++++++++++
 .../i386/avx512fp16-vector-complex-float.c    | 40 +++++++++++++++++++
 2 files changed, 63 insertions(+)
 create mode 100644 
gcc/testsuite/gcc.target/i386/avx512fp16-vector-complex-float.c

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 
0a7f5b178f9..8d3fef0a31a 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -5922,6 +5922,12 @@
         (UNSPEC_COMPLEX_FMUL "fmulc")
         (UNSPEC_COMPLEX_FCMUL "fcmulc")])
 
+(define_int_attr conj_op
+       [(UNSPEC_COMPLEX_FMA "")
+        (UNSPEC_COMPLEX_FCMA "_conj")
+        (UNSPEC_COMPLEX_FMUL "")
+        (UNSPEC_COMPLEX_FCMUL "_conj")])
+
 (define_mode_attr complexmove
   [(V32HF "avx512f_loadv16sf")
    (V16HF "avx512vl_loadv8sf")
@@ -6003,6 +6009,15 @@
   DONE;
 })
 
+(define_expand "cmla<conj_op><mode>4"
+  [(set (match_operand:VF_AVX512FP16VL 0 "register_operand")
+       (unspec:VF_AVX512FP16VL
+           [(match_operand:VF_AVX512FP16VL 1 "vector_operand")
+            (match_operand:VF_AVX512FP16VL 2 "vector_operand")
+            (match_operand:VF_AVX512FP16VL 3 "vector_operand")]
+            UNSPEC_COMPLEX_F_C_MA))]
+  "TARGET_AVX512FP16")
+
 (define_insn "fma_<complexopname>_<mode><sdc_maskz_name><round_name>"
   [(set (match_operand:VF_AVX512FP16VL 0 "register_operand" "=&v")
        (unspec:VF_AVX512FP16VL
@@ -6084,6 +6099,14 @@
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
+(define_expand "cmul<conj_op><mode>3"
+  [(set (match_operand:VF_AVX512FP16VL 0 "register_operand")
+       (unspec:VF_AVX512FP16VL
+         [(match_operand:VF_AVX512FP16VL 1 "vector_operand")
+          (match_operand:VF_AVX512FP16VL 2 "vector_operand")]
+          UNSPEC_COMPLEX_F_C_MUL))]
+  "TARGET_AVX512FP16")
+
 (define_insn "<avx512>_<complexopname>_<mode><maskc_name><round_name>"
   [(set (match_operand:VF_AVX512FP16VL 0 "register_operand" "=&v")
          (unspec:VF_AVX512FP16VL
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vector-complex-float.c 
b/gcc/testsuite/gcc.target/i386/avx512fp16-vector-complex-float.c
new file mode 100644
index 00000000000..bcb957f0de0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vector-complex-float.c
@@ -0,0 +1,40 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512fp16 -mavx512vl" } */
+/* { dg-final { scan-assembler-times "vfmaddcph\[ \\t\]" 1 } } */
+/* { dg-final { scan-assembler-not "vfmadd\[123]*ph\[ \\t\]"} } */
+/* { dg-final { scan-assembler-not "vfmadd\[123]*sh\[ \\t\]"} } */
+/* { dg-final { scan-assembler-times "vfcmaddcph\[ \\t\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfmulcph\[ \\t\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfcmulcph\[ \\t\]" 1 } } */
+
+#include<complex.h>
+#define TYPE _Float16
+#define N 16
+
+void fma0 (_Complex TYPE *a, _Complex TYPE *b,
+           _Complex TYPE *c)
+{
+  for (int i = 0; i < N; i++)
+    c[i] += a[i] * b[i];
+}
+
+void fmaconj (_Complex TYPE a[restrict N], _Complex TYPE b[restrict N],
+             _Complex TYPE c[restrict N])
+{
+  for (int i = 0; i < N; i++)
+    c[i] += a[i] * ~b[i];
+}
+
+void fmul (_Complex TYPE a[restrict N], _Complex TYPE b[restrict N],
+          _Complex TYPE c[restrict N])
+{
+  for (int i = 0; i < N; i++)
+    c[i] = a[i] * b[i];
+}
+
+void fmulconj (_Complex TYPE a[restrict N], _Complex TYPE b[restrict N],
+              _Complex TYPE c[restrict N])
+{
+  for (int i = 0; i < N; i++)
+    c[i] = a[i] * ~b[i];
+}
--
2.18.1

[PATCH] i386: Support complex fma/conj_fma for _Float16.

Reply via email to