Hi

The patch is to fix vec_init_dup_v16bf, add correct handle for v16bf mode in 
ix86_expand_vector_init_duplicate.
Add testcase with sse2 without avx2.

OK for master? 

gcc/ChangeLog:

        PR target/106887
        * config/i386/i386-expand.cc (ix86_expand_vector_init_duplicate):
        Fixed V16BF mode case.

gcc/testsuite/ChangeLog:

        PR target/106887
        * gcc.target/i386/vect-bfloat16-2c.c: New test.
---
 gcc/config/i386/i386-expand.cc                |  1 +
 .../gcc.target/i386/vect-bfloat16-2c.c        | 76 +++++++++++++++++++
 2 files changed, 77 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/vect-bfloat16-2c.c

diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc 
index d7b49c99dc8..9451c561489 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -15111,6 +15111,7 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, 
machine_mode mode,
        {
          machine_mode hvmode = (mode == V16HImode ? V8HImode
                                 : mode == V16HFmode ? V8HFmode
+                                : mode == V16BFmode ? V8BFmode
                                 : V16QImode);
          rtx x = gen_reg_rtx (hvmode);
 
diff --git a/gcc/testsuite/gcc.target/i386/vect-bfloat16-2c.c 
b/gcc/testsuite/gcc.target/i386/vect-bfloat16-2c.c
new file mode 100644
index 00000000000..bead94e46a1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-bfloat16-2c.c
@@ -0,0 +1,76 @@
+/* { dg-do compile } */
+/* { dg-options "-mf16c -msse2 -mno-avx2 -O2" } */
+
+typedef __bf16 v8bf __attribute__ ((__vector_size__ (16))); typedef 
+__bf16 v16bf __attribute__ ((__vector_size__ (32)));
+
+#define VEC_EXTRACT(V,S,IDX)                   \
+  S                                            \
+  __attribute__((noipa))                       \
+  vec_extract_##V##_##IDX (V v)                        \
+  {                                            \
+    return v[IDX];                             \
+  }
+
+#define VEC_SET(V,S,IDX)                       \
+  V                                            \
+  __attribute__((noipa))                       \
+  vec_set_##V##_##IDX (V v, S s)               \
+  {                                            \
+    v[IDX] = s;                                \
+    return v;                                  \
+  }
+
+v8bf
+vec_init_v8bf (__bf16 a1, __bf16 a2, __bf16 a3, __bf16 a4,
+              __bf16 a5,  __bf16 a6, __bf16 a7, __bf16 a8) {
+    return __extension__ (v8bf) {a1, a2, a3, a4, a5, a6, a7, a8}; }
+
+v16bf
+vec_init_v16bf (__bf16 a1, __bf16 a2, __bf16 a3, __bf16 a4,
+              __bf16 a5,  __bf16 a6, __bf16 a7, __bf16 a8,
+              __bf16 a9,  __bf16 a10, __bf16 a11, __bf16 a12,
+              __bf16 a13,  __bf16 a14, __bf16 a15, __bf16 a16) {
+    return __extension__ (v16bf) {a1, a2, a3, a4, a5, a6, a7, a8,
+                                 a9, a10, a11, a12, a13, a14, a15, a16}; }
+
+v8bf
+vec_init_dup_v8bf (__bf16 a1)
+{
+    return __extension__ (v8bf) {a1, a1, a1, a1, a1, a1, a1, a1}; }
+
+v16bf
+vec_init_dup_v16bf (__bf16 a1)
+{
+    return __extension__ (v16bf) {a1, a1, a1, a1, a1, a1, a1, a1,
+                                 a1, a1, a1, a1, a1, a1, a1, a1};
+}
+
+/* { dg-final { scan-assembler-times "vpunpcklwd" 12 } } */
+/* { dg-final { scan-assembler-times "vpunpckldq" 6 } } */
+/* { dg-final { scan-assembler-times "vpunpcklqdq" 3 } } */
+
+VEC_EXTRACT (v8bf, __bf16, 0);
+VEC_EXTRACT (v8bf, __bf16, 4);
+VEC_EXTRACT (v16bf, __bf16, 0);
+VEC_EXTRACT (v16bf, __bf16, 3);
+VEC_EXTRACT (v16bf, __bf16, 8);
+VEC_EXTRACT (v16bf, __bf16, 15);
+/* { dg-final { scan-assembler-times "vpsrldq\[\t ]*\\\$8" 1 } } */
+/* { dg-final { scan-assembler-times "vpsrldq\[\t ]*\\\$6" 1 } } */
+/* { dg-final { scan-assembler-times "vpsrldq\[\t ]*\\\$14" 1 } } */
+/* { dg-final { scan-assembler-times "vextract" 4 } } */
+
+VEC_SET (v8bf, __bf16, 4);
+VEC_SET (v16bf, __bf16, 3);
+VEC_SET (v16bf, __bf16, 8);
+VEC_SET (v16bf, __bf16, 15);
+/* { dg-final { scan-assembler-times "vpblendw" 3 { target { ! ia32 } } 
+} } */
+
+/* { dg-final { scan-assembler-times "vpinsrw" 30 { target ia32 } } } 
+*/
+
--
2.18.2

Reply via email to