From: Andrew Pinski <apin...@marvell.com> The function aarch64_evpc_ins would reuse the target even though it might be the same register as the two inputs. Instead of checking to see if we can reuse the target, creating a new register always is better.
OK? Bootstrapped and tested on aarch64-linux-gnu with no regressions. PR target/101529 gcc/ChangeLog: * config/aarch64/aarch64.c (aarch64_evpc_ins): Don't use target as an input instead create a new reg. gcc/testsuite/ChangeLog: * c-c++-common/torture/builtin-convertvector-2.c: New test. * c-c++-common/torture/builtin-shufflevector-2.c: New test. --- gcc/config/aarch64/aarch64.c | 8 ++++-- .../torture/builtin-convertvector-2.c | 26 +++++++++++++++++++ .../torture/builtin-shufflevector-2.c | 26 +++++++++++++++++++ 3 files changed, 58 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/c-c++-common/torture/builtin-convertvector-2.c create mode 100644 gcc/testsuite/c-c++-common/torture/builtin-shufflevector-2.c diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 2c00583e12c..e4fc546fae7 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -23084,11 +23084,15 @@ aarch64_evpc_ins (struct expand_vec_perm_d *d) } gcc_assert (extractindex < nelt); - emit_move_insn (d->target, insv); + /* Use a new reg instead of target as one of the + operands might be target. */ + rtx original = gen_reg_rtx (GET_MODE (d->target)); + + emit_move_insn (original, insv); insn_code icode = code_for_aarch64_simd_vec_copy_lane (mode); expand_operand ops[5]; create_output_operand (&ops[0], d->target, mode); - create_input_operand (&ops[1], d->target, mode); + create_input_operand (&ops[1], original, mode); create_integer_operand (&ops[2], 1 << idx); create_input_operand (&ops[3], extractv, mode); create_integer_operand (&ops[4], extractindex); diff --git a/gcc/testsuite/c-c++-common/torture/builtin-convertvector-2.c b/gcc/testsuite/c-c++-common/torture/builtin-convertvector-2.c new file mode 100644 index 00000000000..d88f6a72b5c --- /dev/null +++ b/gcc/testsuite/c-c++-common/torture/builtin-convertvector-2.c @@ -0,0 +1,26 @@ +/* { dg-do run } */ +/* PR target/101529 */ + +typedef unsigned char __attribute__((__vector_size__ (1))) W; +typedef unsigned char __attribute__((__vector_size__ (8))) V; +typedef unsigned short __attribute__((__vector_size__ (16))) U; + +unsigned short us; + +/* aarch64 used to miscompile foo to just return 0. */ +W +foo (unsigned char uc) +{ + V v = __builtin_convertvector ((U){ } >= us, V); + return __builtin_shufflevector ((W){ }, v, 4) & uc; +} + +int +main (void) +{ + W x = foo (5); + if (x[0] != 5) + __builtin_abort(); + return 0; +} + diff --git a/gcc/testsuite/c-c++-common/torture/builtin-shufflevector-2.c b/gcc/testsuite/c-c++-common/torture/builtin-shufflevector-2.c new file mode 100644 index 00000000000..7c4999ed4e9 --- /dev/null +++ b/gcc/testsuite/c-c++-common/torture/builtin-shufflevector-2.c @@ -0,0 +1,26 @@ +/* { dg-do run} */ +/* PR target/101529 */ +typedef unsigned char C; +typedef unsigned char __attribute__((__vector_size__ (8))) V; +typedef unsigned char __attribute__((__vector_size__ (32))) U; + +C c; + +/* aarch64 used to miscompile foo to just return a vector of 0s */ +V +foo (V v) +{ + v |= __builtin_shufflevector (c * v, (U) (0 == (U){ }), + 0, 1, 8, 32, 8, 20, 36, 36); + return v; +} + +int +main (void) +{ + V v = foo ((V) { }); + for (unsigned i = 0; i < sizeof (v); i++) + if (v[i] != (i >= 2 ? 0xff : 0)) + __builtin_abort (); + return 0; +} -- 2.17.1