Covering all bases in vectorizable_shift is hard - this makes sure
to appropriately handle the case of PR95356 without breaking others.

Bootstrapped / tested on x86_64-unknown-linux-gnu, applied.

2020-05-28  Richard Biener  <rguent...@suse.de>

        PR tree-optimization/95273
        PR tree-optimization/95356
        * tree-vect-stmts.c (vectorizable_shift): Adjust when and to
        what we set the vector type of the shift operand SLP node
        again.

        * gcc.target/i386/pr95356.c: New testcase.
---
 gcc/testsuite/gcc.target/i386/pr95356.c | 125 ++++++++++++++++++++++++++++++++
 gcc/tree-vect-stmts.c                   |   6 +-
 2 files changed, 130 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr95356.c

diff --git a/gcc/testsuite/gcc.target/i386/pr95356.c 
b/gcc/testsuite/gcc.target/i386/pr95356.c
new file mode 100644
index 00000000000..fdd917ba5e7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr95356.c
@@ -0,0 +1,125 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512dq" } */
+
+extern void abort (void);
+long long a[16];
+
+__attribute__((noinline, noclone)) void
+f1 (void)
+{
+  long long a0, a1, a2, a3;
+  a0 = a[0];
+  a1 = a[1];
+  a2 = a[2];
+  a3 = a[3];
+  a0 = a0 << 2;
+  a1 = a1 << 3;
+  a2 = a2 << 4;
+  a3 = a3 << 5;
+  a[0] = a0;
+  a[1] = a1;
+  a[2] = a2;
+  a[3] = a3;
+}
+
+__attribute__((noinline, noclone)) void
+f2 (void)
+{
+  long long a0, a1, a2, a3;
+  a0 = a[0];
+  a1 = a[1];
+  a2 = a[2];
+  a3 = a[3];
+  a0 = a0 << 2;
+  a1 = a1 << 2;
+  a2 = a2 << 2;
+  a3 = a3 << 2;
+  a[0] = a0;
+  a[1] = a1;
+  a[2] = a2;
+  a[3] = a3;
+}
+
+__attribute__((noinline, noclone)) void
+f2a (int x)
+{
+  long long a0, a1, a2, a3;
+  a0 = a[0];
+  a1 = a[1];
+  a2 = a[2];
+  a3 = a[3];
+  a0 = a0 << x;
+  a1 = a1 << 2;
+  a2 = a2 << 2;
+  a3 = a3 << 2;
+  a[0] = a0;
+  a[1] = a1;
+  a[2] = a2;
+  a[3] = a3;
+}
+
+__attribute__((noinline, noclone)) void
+f2b (int x)
+{
+  long long a0, a1, a2, a3;
+  a0 = a[0];
+  a1 = a[1];
+  a2 = a[2];
+  a3 = a[3];
+  a0 = a0 << 2;
+  a1 = a1 << 2;
+  a2 = a2 << x;
+  a3 = a3 << 2;
+  a[0] = a0;
+  a[1] = a1;
+  a[2] = a2;
+  a[3] = a3;
+}
+
+__attribute__((noinline, noclone)) void
+f3 (int x)
+{
+  long long a0, a1, a2, a3;
+  a0 = a[0];
+  a1 = a[1];
+  a2 = a[2];
+  a3 = a[3];
+  a0 = a0 << x;
+  a1 = a1 << x;
+  a2 = a2 << x;
+  a3 = a3 << x;
+  a[0] = a0;
+  a[1] = a1;
+  a[2] = a2;
+  a[3] = a3;
+}
+
+int
+main ()
+{
+  a[0] = 4LL;
+  a[1] = 3LL;
+  a[2] = 2LL;
+  a[3] = 1LL;
+  f1 ();
+  if (a[0] != (4LL << 2) || a[1] != (3LL << 3)
+      || a[2] != (2LL << 4) || a[3] != (1LL << 5))
+    abort ();
+  f2 ();
+  if (a[0] != (4LL << 4) || a[1] != (3LL << 5)
+      || a[2] != (2LL << 6) || a[3] != (1LL << 7))
+    abort ();
+  f3 (3);
+  if (a[0] != (4LL << 7) || a[1] != (3LL << 8)
+      || a[2] != (2LL << 9) || a[3] != (1LL << 10))
+    abort ();
+  f2a (3);
+  if (a[0] != (4LL << 10) || a[1] != (3LL << 10)
+      || a[2] != (2LL << 11) || a[3] != (1LL << 12))
+    abort ();
+  f2b (3);
+  if (a[0] != (4LL << 12) || a[1] != (3LL << 12)
+      || a[2] != (2LL << 14) || a[3] != (1LL << 14))
+    abort ();
+  return 0;
+}
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 2f92bb5555e..ff335aa531e 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -5792,7 +5792,11 @@ vectorizable_shift (vec_info *vinfo,
       if (slp_node
          && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
              || (!scalar_shift_arg
-                 && !vect_maybe_update_slp_op_vectype (slp_op1, vectype))))
+                 && (!incompatible_op1_vectype_p
+                     || dt[1] == vect_constant_def)
+                 && !vect_maybe_update_slp_op_vectype
+                       (slp_op1,
+                        incompatible_op1_vectype_p ? vectype : op1_vectype))))
        {
          if (dump_enabled_p ())
            dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-- 
2.16.4

Reply via email to