From: Andrew Pinski <apin...@marvell.com>

The function aarch64_evpc_ins would reuse the target even though
it might be the same register as the two inputs.
Instead of checking to see if we can reuse the target, creating
a new register always is better.

OK? Bootstrapped and tested on aarch64-linux-gnu with no regressions.

        PR target/101529

gcc/ChangeLog:

        * config/aarch64/aarch64.c (aarch64_evpc_ins): Don't use target
        as an input instead create a new reg.

gcc/testsuite/ChangeLog:

        * c-c++-common/torture/builtin-convertvector-2.c: New test.
        * c-c++-common/torture/builtin-shufflevector-2.c: New test.
---
 gcc/config/aarch64/aarch64.c                  |  8 ++++--
 .../torture/builtin-convertvector-2.c         | 26 +++++++++++++++++++
 .../torture/builtin-shufflevector-2.c         | 26 +++++++++++++++++++
 3 files changed, 58 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/c-c++-common/torture/builtin-convertvector-2.c
 create mode 100644 gcc/testsuite/c-c++-common/torture/builtin-shufflevector-2.c

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 2c00583e12c..e4fc546fae7 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -23084,11 +23084,15 @@ aarch64_evpc_ins (struct expand_vec_perm_d *d)
     }
   gcc_assert (extractindex < nelt);
 
-  emit_move_insn (d->target, insv);
+  /* Use a new reg instead of target as one of the
+     operands might be target. */
+  rtx original = gen_reg_rtx (GET_MODE (d->target));
+
+  emit_move_insn (original, insv);
   insn_code icode = code_for_aarch64_simd_vec_copy_lane (mode);
   expand_operand ops[5];
   create_output_operand (&ops[0], d->target, mode);
-  create_input_operand (&ops[1], d->target, mode);
+  create_input_operand (&ops[1], original, mode);
   create_integer_operand (&ops[2], 1 << idx);
   create_input_operand (&ops[3], extractv, mode);
   create_integer_operand (&ops[4], extractindex);
diff --git a/gcc/testsuite/c-c++-common/torture/builtin-convertvector-2.c 
b/gcc/testsuite/c-c++-common/torture/builtin-convertvector-2.c
new file mode 100644
index 00000000000..d88f6a72b5c
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/torture/builtin-convertvector-2.c
@@ -0,0 +1,26 @@
+/* { dg-do run } */
+/* PR target/101529 */
+
+typedef unsigned char __attribute__((__vector_size__ (1))) W;
+typedef unsigned char __attribute__((__vector_size__ (8))) V;
+typedef unsigned short __attribute__((__vector_size__ (16))) U;
+
+unsigned short us;
+
+/* aarch64 used to miscompile foo to just return 0. */
+W
+foo (unsigned char uc)
+{
+  V v = __builtin_convertvector ((U){ } >= us, V);
+  return __builtin_shufflevector ((W){ }, v, 4) & uc;
+}
+
+int
+main (void)
+{
+  W x = foo (5);
+  if (x[0] != 5)
+    __builtin_abort();
+  return 0;
+}
+
diff --git a/gcc/testsuite/c-c++-common/torture/builtin-shufflevector-2.c 
b/gcc/testsuite/c-c++-common/torture/builtin-shufflevector-2.c
new file mode 100644
index 00000000000..7c4999ed4e9
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/torture/builtin-shufflevector-2.c
@@ -0,0 +1,26 @@
+/* { dg-do run}  */
+/* PR target/101529 */
+typedef unsigned char C;
+typedef unsigned char __attribute__((__vector_size__ (8))) V;
+typedef unsigned char __attribute__((__vector_size__ (32))) U;
+
+C c;
+
+/* aarch64 used to miscompile foo to just return a vector of 0s */
+V
+foo (V v)
+{
+  v |= __builtin_shufflevector (c * v, (U) (0 == (U){ }),
+                               0, 1, 8, 32, 8, 20, 36, 36);
+  return v;
+}
+
+int
+main (void)
+{
+  V v = foo ((V) { });
+  for (unsigned i = 0; i < sizeof (v); i++)
+    if (v[i] != (i >= 2 ? 0xff : 0))
+      __builtin_abort ();
+  return 0;
+}
-- 
2.17.1

Reply via email to