[PATCH 09/62] AVX512FP16: Enable _Float16 autovectorization

liuhongt via Gcc-patches Wed, 30 Jun 2021 23:25:12 -0700

From: "H.J. Lu" <hjl.to...@gmail.com>

gcc/ChangeLog:


        * config/i386/i386-expand.c
        (ix86_avx256_split_vector_move_misalign): Handle V16HF mode.
        * config/i386/i386.c
        (ix86_preferred_simd_mode): Handle HF mode.
        * config/i386/sse.md (V_256H): New mode iterator.
        (avx_vextractf128<mode>): Use it.

gcc/testsuite/ChangeLog:

        * gcc.target/i386/vect-float16-1.c: New test.
        * gcc.target/i386/vect-float16-10.c: Ditto.
        * gcc.target/i386/vect-float16-11.c: Ditto.
        * gcc.target/i386/vect-float16-12.c: Ditto.
        * gcc.target/i386/vect-float16-2.c: Ditto.
        * gcc.target/i386/vect-float16-3.c: Ditto.
        * gcc.target/i386/vect-float16-4.c: Ditto.
        * gcc.target/i386/vect-float16-5.c: Ditto.
        * gcc.target/i386/vect-float16-6.c: Ditto.
        * gcc.target/i386/vect-float16-7.c: Ditto.
        * gcc.target/i386/vect-float16-8.c: Ditto.
        * gcc.target/i386/vect-float16-9.c: Ditto.
---
 gcc/config/i386/i386-expand.c                   |  4 ++++
 gcc/config/i386/i386.c                          | 14 ++++++++++++++
 gcc/config/i386/sse.md                          |  7 ++++++-
 gcc/testsuite/gcc.target/i386/vect-float16-1.c  | 14 ++++++++++++++
 gcc/testsuite/gcc.target/i386/vect-float16-10.c | 14 ++++++++++++++
 gcc/testsuite/gcc.target/i386/vect-float16-11.c | 14 ++++++++++++++
 gcc/testsuite/gcc.target/i386/vect-float16-12.c | 14 ++++++++++++++
 gcc/testsuite/gcc.target/i386/vect-float16-2.c  | 14 ++++++++++++++
 gcc/testsuite/gcc.target/i386/vect-float16-3.c  | 14 ++++++++++++++
 gcc/testsuite/gcc.target/i386/vect-float16-4.c  | 14 ++++++++++++++
 gcc/testsuite/gcc.target/i386/vect-float16-5.c  | 14 ++++++++++++++
 gcc/testsuite/gcc.target/i386/vect-float16-6.c  | 14 ++++++++++++++
 gcc/testsuite/gcc.target/i386/vect-float16-7.c  | 14 ++++++++++++++
 gcc/testsuite/gcc.target/i386/vect-float16-8.c  | 14 ++++++++++++++
 gcc/testsuite/gcc.target/i386/vect-float16-9.c  | 14 ++++++++++++++
 15 files changed, 192 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-10.c
 create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-11.c
 create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-12.c
 create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-4.c
 create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-5.c
 create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-6.c
 create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-7.c
 create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-8.c
 create mode 100644 gcc/testsuite/gcc.target/i386/vect-float16-9.c

diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
index 39647eb2cf1..df50c72ab16 100644
--- a/gcc/config/i386/i386-expand.c
+++ b/gcc/config/i386/i386-expand.c
@@ -498,6 +498,10 @@ ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
       extract = gen_avx_vextractf128v32qi;
       mode = V16QImode;
       break;
+    case E_V16HFmode:
+      extract = gen_avx_vextractf128v16hf;
+      mode = V8HFmode;
+      break;
     case E_V8SFmode:
       extract = gen_avx_vextractf128v8sf;
       mode = V4SFmode;
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 79e6880d9dd..dc0d440061b 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -22360,6 +22360,20 @@ ix86_preferred_simd_mode (scalar_mode mode)
       else
        return V2DImode;
 
+    case E_HFmode:
+      if (TARGET_AVX512FP16)
+       {
+         if (TARGET_AVX512VL)
+           {
+             if (TARGET_PREFER_AVX128)
+               return V8HFmode;
+             else if (TARGET_PREFER_AVX256)
+               return V16HFmode;
+           }
+         return V32HFmode;
+       }
+      return word_mode;
+
     case E_SFmode:
       if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
        return V16SFmode;
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 2c1b6fbcd86..a0cfd611006 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -276,6 +276,11 @@ (define_mode_iterator V_128
 (define_mode_iterator V_256
   [V32QI V16HI V8SI V4DI V8SF V4DF])
 
+;; All 256bit vector modes including HF vector mode
+(define_mode_iterator V_256H
+  [V32QI V16HI V8SI V4DI V8SF V4DF
+   (V16HF "TARGET_AVX512F && TARGET_AVX512VL")])
+
 ;; All 128bit and 256bit vector modes
 (define_mode_iterator V_128_256
   [V32QI V16QI V16HI V8HI V8SI V4SI V4DI V2DI V8SF V4SF V4DF V2DF])
@@ -9045,7 +9050,7 @@ (define_expand "avx512vl_vextractf128<mode>"
 
 (define_expand "avx_vextractf128<mode>"
   [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
-   (match_operand:V_256 1 "register_operand")
+   (match_operand:V_256H 1 "register_operand")
    (match_operand:SI 2 "const_0_to_1_operand")]
   "TARGET_AVX"
 {
diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-1.c 
b/gcc/testsuite/gcc.target/i386/vect-float16-1.c
new file mode 100644
index 00000000000..0f82cf94932
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-float16-1.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512fp16 -mno-avx512vl" } */
+
+/* Check that we vectorize to a full 128-bit vector for _Float16 types.  */
+
+void
+foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b,
+     _Float16 *__restrict__ c)
+{
+  for (int i = 0; i < 256; i++)
+    a[i] = b[i] + c[i];
+}
+
+/* { dg-final { scan-assembler-times "vaddph" 8 } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-10.c 
b/gcc/testsuite/gcc.target/i386/vect-float16-10.c
new file mode 100644
index 00000000000..217645692ad
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-float16-10.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512fp16 -mno-avx512vl" } */
+
+/* Check that we vectorize to a full 128-bit vector for _Float16 types.  */
+
+void
+foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b,
+     _Float16 *__restrict__ c)
+{
+  for (int i = 0; i < 256; i++)
+    a[i] = b[i] / c[i];
+}
+
+/* { dg-final { scan-assembler-times "vdivph" 8 } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-11.c 
b/gcc/testsuite/gcc.target/i386/vect-float16-11.c
new file mode 100644
index 00000000000..e0409ce9d3f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-float16-11.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512fp16 -mavx512vl -mprefer-vector-width=128" } */
+
+/* Check that we vectorize to a full 128-bit vector for _Float16 types.  */
+
+void
+foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b,
+     _Float16 *__restrict__ c)
+{
+  for (int i = 0; i < 128; i++)
+    a[i] = b[i] / c[i];
+}
+
+/* { dg-final { scan-assembler-times "vdivph" 16 } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-12.c 
b/gcc/testsuite/gcc.target/i386/vect-float16-12.c
new file mode 100644
index 00000000000..d92a25dc255
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-float16-12.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512fp16 -mavx512vl -mprefer-vector-width=256" } */
+
+/* Check that we vectorize to a full 128-bit vector for _Float16 types.  */
+
+void
+foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b,
+     _Float16 *__restrict__ c)
+{
+  for (int i = 0; i < 256; i++)
+    a[i] = b[i] / c[i];
+}
+
+/* { dg-final { scan-assembler-times "vdivph" 16 } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-2.c 
b/gcc/testsuite/gcc.target/i386/vect-float16-2.c
new file mode 100644
index 00000000000..974fca4ce09
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-float16-2.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512fp16 -mavx512vl -mprefer-vector-width=128" } */
+
+/* Check that we vectorize to a full 128-bit vector for _Float16 types.  */
+
+void
+foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b,
+     _Float16 *__restrict__ c)
+{
+  for (int i = 0; i < 128; i++)
+    a[i] = b[i] + c[i];
+}
+
+/* { dg-final { scan-assembler-times "vaddph" 16 } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-3.c 
b/gcc/testsuite/gcc.target/i386/vect-float16-3.c
new file mode 100644
index 00000000000..9bca9142df7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-float16-3.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512fp16 -mavx512vl -mprefer-vector-width=256" } */
+
+/* Check that we vectorize to a full 128-bit vector for _Float16 types.  */
+
+void
+foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b,
+     _Float16 *__restrict__ c)
+{
+  for (int i = 0; i < 256; i++)
+    a[i] = b[i] + c[i];
+}
+
+/* { dg-final { scan-assembler-times "vaddph" 16 } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-4.c 
b/gcc/testsuite/gcc.target/i386/vect-float16-4.c
new file mode 100644
index 00000000000..e6f26f0aa40
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-float16-4.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512fp16 -mno-avx512vl" } */
+
+/* Check that we vectorize to a full 128-bit vector for _Float16 types.  */
+
+void
+foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b,
+     _Float16 *__restrict__ c)
+{
+  for (int i = 0; i < 256; i++)
+    a[i] = b[i] - c[i];
+}
+
+/* { dg-final { scan-assembler-times "vsubph" 8 } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-5.c 
b/gcc/testsuite/gcc.target/i386/vect-float16-5.c
new file mode 100644
index 00000000000..38f287b1dc0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-float16-5.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512fp16 -mavx512vl -mprefer-vector-width=128" } */
+
+/* Check that we vectorize to a full 128-bit vector for _Float16 types.  */
+
+void
+foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b,
+     _Float16 *__restrict__ c)
+{
+  for (int i = 0; i < 128; i++)
+    a[i] = b[i] - c[i];
+}
+
+/* { dg-final { scan-assembler-times "vsubph" 16 } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-6.c 
b/gcc/testsuite/gcc.target/i386/vect-float16-6.c
new file mode 100644
index 00000000000..bc9f7870061
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-float16-6.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512fp16 -mavx512vl -mprefer-vector-width=256" } */
+
+/* Check that we vectorize to a full 128-bit vector for _Float16 types.  */
+
+void
+foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b,
+     _Float16 *__restrict__ c)
+{
+  for (int i = 0; i < 256; i++)
+    a[i] = b[i] - c[i];
+}
+
+/* { dg-final { scan-assembler-times "vsubph" 16 } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-7.c 
b/gcc/testsuite/gcc.target/i386/vect-float16-7.c
new file mode 100644
index 00000000000..b4849cf77c7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-float16-7.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512fp16 -mno-avx512vl" } */
+
+/* Check that we vectorize to a full 128-bit vector for _Float16 types.  */
+
+void
+foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b,
+     _Float16 *__restrict__ c)
+{
+  for (int i = 0; i < 256; i++)
+    a[i] = b[i] * c[i];
+}
+
+/* { dg-final { scan-assembler-times "vmulph" 8 } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-8.c 
b/gcc/testsuite/gcc.target/i386/vect-float16-8.c
new file mode 100644
index 00000000000..71631b17cc3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-float16-8.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512fp16 -mavx512vl -mprefer-vector-width=128" } */
+
+/* Check that we vectorize to a full 128-bit vector for _Float16 types.  */
+
+void
+foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b,
+     _Float16 *__restrict__ c)
+{
+  for (int i = 0; i < 128; i++)
+    a[i] = b[i] * c[i];
+}
+
+/* { dg-final { scan-assembler-times "vmulph" 16 } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-9.c 
b/gcc/testsuite/gcc.target/i386/vect-float16-9.c
new file mode 100644
index 00000000000..1be5c7f022f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-float16-9.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512fp16 -mavx512vl -mprefer-vector-width=256" } */
+
+/* Check that we vectorize to a full 128-bit vector for _Float16 types.  */
+
+void
+foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b,
+     _Float16 *__restrict__ c)
+{
+  for (int i = 0; i < 256; i++)
+    a[i] = b[i] * c[i];
+}
+
+/* { dg-final { scan-assembler-times "vmulph" 16 } } */
-- 
2.18.1

[PATCH 09/62] AVX512FP16: Enable _Float16 autovectorization

Reply via email to