It was noticed if we have `.VEC_SHL_INSERT ({ 0, ... }, 0)` it was not being simplified to just `{ 0, ... }`. This was generated from the autovectorizer (maybe even on accident, see PR tree-optmization/116081).
This adds a few SVE testcases to see if this is optimized since the auto-vectorizer or intrinsics are the only two ways of getting this produced. Build and tested for aarch64-linux-gnu with no regressions. PR target/116075 gcc/ChangeLog: * match.pd (`VEC_SHL_INSERT (dup (A), A)`): New pattern. gcc/testsuite/ChangeLog: * gcc.target/aarch64/sve/dup-insr-1.c: New test. * gcc.target/aarch64/sve/dup-insr-2.c: New test. Signed-off-by: Andrew Pinski <quic_apin...@quicinc.com> --- gcc/match.pd | 17 ++++++++++++ .../gcc.target/aarch64/sve/dup-insr-1.c | 26 +++++++++++++++++++ .../gcc.target/aarch64/sve/dup-insr-2.c | 26 +++++++++++++++++++ 3 files changed, 69 insertions(+) create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/dup-insr-1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/dup-insr-2.c diff --git a/gcc/match.pd b/gcc/match.pd index 680dfea523f..a3a64bd742e 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -10657,3 +10657,20 @@ and, } (if (full_perm_p) (vec_perm (op@3 @0 @1) @3 @2)))))) + +/* vec shift left insert (dup(A), A) -> dup(A) */ +(simplify + (IFN_VEC_SHL_INSERT vec_same_elem_p@0 @1) + (with { + tree elem = uniform_vector_p (@0); + if (!elem && TREE_CODE (@0) == SSA_NAME) + { + gimple *def = SSA_NAME_DEF_STMT (@0); + if (gimple_assign_rhs_code (def) == CONSTRUCTOR) + elem = uniform_vector_p (gimple_assign_rhs1 (def)); + else if (gimple_assign_rhs_code (def) == VEC_DUPLICATE_EXPR) + elem = gimple_assign_rhs1 (def); + } + } + (if (elem && operand_equal_p (@1, elem)) + @0))) diff --git a/gcc/testsuite/gcc.target/aarch64/sve/dup-insr-1.c b/gcc/testsuite/gcc.target/aarch64/sve/dup-insr-1.c new file mode 100644 index 00000000000..41dcbba45cf --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/dup-insr-1.c @@ -0,0 +1,26 @@ +/* { dg-do compile } */ +/* { dg-options "-O -fdump-tree-optimized" } */ +/* PR target/116075 */ + +#include <arm_sve.h> + +svint8_t f(void) +{ + svint8_t tt; + tt = svdup_s8 (0); + tt = svinsr (tt, 0); + return tt; +} + +svint8_t f1(int8_t t) +{ + svint8_t tt; + tt = svdup_s8 (t); + tt = svinsr (tt, t); + return tt; +} + +/* The above 2 functions should have removed the VEC_SHL_INSERT. */ + +/* { dg-final { scan-tree-dump-not ".VEC_SHL_INSERT " "optimized" } } */ + diff --git a/gcc/testsuite/gcc.target/aarch64/sve/dup-insr-2.c b/gcc/testsuite/gcc.target/aarch64/sve/dup-insr-2.c new file mode 100644 index 00000000000..8eafe974624 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/dup-insr-2.c @@ -0,0 +1,26 @@ +/* { dg-do compile } */ +/* { dg-options "-O -fdump-tree-optimized" } */ +/* PR target/116075 */ + +#include <arm_sve.h> + +svint8_t f(int8_t t) +{ + svint8_t tt; + tt = svdup_s8 (0); + tt = svinsr (tt, t); + return tt; +} + +svint8_t f1(int8_t t) +{ + svint8_t tt; + tt = svdup_s8 (t); + tt = svinsr (tt, 0); + return tt; +} + +/* The above 2 functions should not have removed the VEC_SHL_INSERT. */ + +/* { dg-final { scan-tree-dump-times ".VEC_SHL_INSERT " 2 "optimized" } } */ + -- 2.43.0