Hi,
For the following test:

svint32_t f(svint32_t v)
{
  return svrev_s32 (svrev_s32 (v));
}

We generate 2 rev instructions instead of nop:
f:
        rev     z0.s, z0.s
        rev     z0.s, z0.s
        ret

The attached patch tries to fix that by trying to recognize the following
pattern in match.pd:
v1 = VEC_PERM_EXPR (v0, v0, mask)
v2 = VEC_PERM_EXPR (v1, v1, mask)
-->
v2 = v0
if mask is { nelts - 1, nelts - 2, nelts - 3, ... }

Code-gen with patch:
f:
        ret

Bootstrap+test passes on aarch64-linux-gnu, and SVE bootstrap in progress.
Does it look OK for stage-1 ?

Thanks,
Prathamesh
gcc/ChangeLog:
        * match.pd: New pattern to simplify two successive VEC_PERM_EXPRs with 
single
        operand and same mask, where mask chooses elements in reverse order.

gcc/testesuite/ChangeLog:
        * gcc.target/aarch64/sve/acle/general/rev-1.c: New test.

diff --git a/gcc/match.pd b/gcc/match.pd
index b8d3538b809..19dfc8f3722 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -8456,3 +8456,27 @@ and,
       }
       (if (full_perm_p)
        (vec_perm (op@3 @0 @1) @3 @2))))))
+
+/* Transform:
+   v1 = VEC_PERM_EXPR (v0, v0, mask)
+   v2 = VEC_PERM_EXPR (v1, v1, mask)
+   -->
+   v2 = v0
+   if mask is {nelts - 1, nelts - 2, ...}  */
+
+(simplify
+ (vec_perm (vec_perm@2 @0 @0 VECTOR_CST@1) @2 @1)
+  (with
+   {
+    vec_perm_builder builder;
+    bool rev_p = false;
+    if (tree_to_vec_perm_builder (&builder, @1))
+      {
+       poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (type);
+       vec_perm_indices sel (builder, 1, nelts);
+       if (sel.series_p (0, 1, nelts - 1, -1))
+         rev_p = true;
+      }
+   }
+   (if (rev_p)
+    @0)))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/rev-1.c 
b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/rev-1.c
new file mode 100644
index 00000000000..e57ee67d716
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/rev-1.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-tree-optimized" } */
+
+#include <arm_sve.h>
+
+svint32_t f(svint32_t v)
+{
+  return svrev_s32 (svrev_s32 (v));
+}
+
+/* { dg-final { scan-tree-dump "return v_1\\(D\\)" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "VEC_PERM_EXPR" "optimized" } } */

Reply via email to