It was noticed if we have `.VEC_SHL_INSERT ({ 0, ... }, 0)` it was not being
simplified to just `{ 0, ... }`. This was generated from the autovectorizer
(maybe even on accident, see PR tree-optmization/116081).

This adds a few SVE testcases to see if this is optimized since the
auto-vectorizer or intrinsics are the only two ways of getting this
produced.

Build and tested for aarch64-linux-gnu with no regressions.

        PR target/116075

gcc/ChangeLog:

        * match.pd (`VEC_SHL_INSERT (dup (A), A)`): New pattern.

gcc/testsuite/ChangeLog:

        * gcc.target/aarch64/sve/dup-insr-1.c: New test.
        * gcc.target/aarch64/sve/dup-insr-2.c: New test.

Signed-off-by: Andrew Pinski <quic_apin...@quicinc.com>
---
 gcc/match.pd                                  | 17 ++++++++++++
 .../gcc.target/aarch64/sve/dup-insr-1.c       | 26 +++++++++++++++++++
 .../gcc.target/aarch64/sve/dup-insr-2.c       | 26 +++++++++++++++++++
 3 files changed, 69 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/dup-insr-1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/dup-insr-2.c

diff --git a/gcc/match.pd b/gcc/match.pd
index 680dfea523f..a3a64bd742e 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -10657,3 +10657,20 @@ and,
       }
       (if (full_perm_p)
        (vec_perm (op@3 @0 @1) @3 @2))))))
+
+/* vec shift left insert (dup(A), A) -> dup(A) */
+(simplify
+ (IFN_VEC_SHL_INSERT vec_same_elem_p@0 @1)
+  (with {
+    tree elem = uniform_vector_p (@0);
+    if (!elem && TREE_CODE (@0) == SSA_NAME)
+      {
+        gimple *def = SSA_NAME_DEF_STMT (@0);
+       if (gimple_assign_rhs_code (def) == CONSTRUCTOR)
+         elem = uniform_vector_p (gimple_assign_rhs1 (def));
+       else if (gimple_assign_rhs_code (def) == VEC_DUPLICATE_EXPR)
+         elem = gimple_assign_rhs1 (def);
+      }
+   }
+    (if (elem && operand_equal_p (@1, elem))
+     @0)))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/dup-insr-1.c 
b/gcc/testsuite/gcc.target/aarch64/sve/dup-insr-1.c
new file mode 100644
index 00000000000..41dcbba45cf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/dup-insr-1.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fdump-tree-optimized" } */
+/* PR target/116075 */
+
+#include <arm_sve.h>
+
+svint8_t f(void)
+{
+  svint8_t tt;
+  tt = svdup_s8 (0);
+  tt = svinsr (tt, 0);
+  return tt;
+}
+
+svint8_t f1(int8_t t)
+{
+  svint8_t tt;
+  tt = svdup_s8 (t);
+  tt = svinsr (tt, t);
+  return tt;
+}
+
+/* The above 2 functions should have removed the VEC_SHL_INSERT. */
+
+/* { dg-final { scan-tree-dump-not ".VEC_SHL_INSERT " "optimized" } } */
+
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/dup-insr-2.c 
b/gcc/testsuite/gcc.target/aarch64/sve/dup-insr-2.c
new file mode 100644
index 00000000000..8eafe974624
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/dup-insr-2.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fdump-tree-optimized" } */
+/* PR target/116075 */
+
+#include <arm_sve.h>
+
+svint8_t f(int8_t t)
+{
+  svint8_t tt;
+  tt = svdup_s8 (0);
+  tt = svinsr (tt, t);
+  return tt;
+}
+
+svint8_t f1(int8_t t)
+{
+  svint8_t tt;
+  tt = svdup_s8 (t);
+  tt = svinsr (tt, 0);
+  return tt;
+}
+
+/* The above 2 functions should not have removed the VEC_SHL_INSERT. */
+
+/* { dg-final { scan-tree-dump-times ".VEC_SHL_INSERT " 2 "optimized" } } */
+
-- 
2.43.0

Reply via email to