The following adds simplification of two VEC_PERM_EXPRs where
the later one replaces all elements from either the first or the
second input of the earlier permute.  This allows a three input
permute to be simplified to a two input one.

I'm following the existing two input simplification case and only
allow non-VLA permutes.  The now existing three cases and the
single case in tree-ssa-forwprop.cc somehow ask for merging,
I'm not doing this as part of this change though.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

        PR tree-optimization/111228
        * match.pd ((vec_perm (vec_perm ..) @5 ..) -> (vec_perm @x @5 ..)):
        New simplifications.

        * gcc.dg/tree-ssa/forwprop-42.c: New testcase.
---
 gcc/match.pd                                | 141 +++++++++++++++++++-
 gcc/testsuite/gcc.dg/tree-ssa/forwprop-42.c |  17 +++
 2 files changed, 155 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/forwprop-42.c

diff --git a/gcc/match.pd b/gcc/match.pd
index 47d2733211a..6a7edde5736 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -8993,10 +8993,10 @@ and,
 
 
 /* Merge
-   c = VEC_PERM_EXPR <a, b, VCST0>;
-   d = VEC_PERM_EXPR <c, c, VCST1>;
+     c = VEC_PERM_EXPR <a, b, VCST0>;
+     d = VEC_PERM_EXPR <c, c, VCST1>;
    to
-   d = VEC_PERM_EXPR <a, b, NEW_VCST>;  */
+     d = VEC_PERM_EXPR <a, b, NEW_VCST>;  */
 
 (simplify
  (vec_perm (vec_perm@0 @1 @2 VECTOR_CST@3) @0 VECTOR_CST@4)
@@ -9038,6 +9038,141 @@ and,
      (if (op0)
       (vec_perm @1 @2 { op0; })))))))
 
+/* Merge
+     c = VEC_PERM_EXPR <a, b, VCST0>;
+     d = VEC_PERM_EXPR <x, c, VCST1>;
+   to
+     d = VEC_PERM_EXPR <x, {a,b}, NEW_VCST>;
+   when all elements from a or b are replaced by the later
+   permutation.  */
+
+(simplify
+ (vec_perm @5 (vec_perm@0 @1 @2 VECTOR_CST@3) VECTOR_CST@4)
+ (if (TYPE_VECTOR_SUBPARTS (type).is_constant ())
+  (with
+   {
+     machine_mode result_mode = TYPE_MODE (type);
+     machine_mode op_mode = TYPE_MODE (TREE_TYPE (@1));
+     int nelts = TYPE_VECTOR_SUBPARTS (type).to_constant ();
+     vec_perm_builder builder0;
+     vec_perm_builder builder1;
+     vec_perm_builder builder2 (nelts, nelts, 2);
+   }
+   (if (tree_to_vec_perm_builder (&builder0, @3)
+       && tree_to_vec_perm_builder (&builder1, @4))
+    (with
+     {
+       vec_perm_indices sel0 (builder0, 2, nelts);
+       vec_perm_indices sel1 (builder1, 2, nelts);
+       bool use_1 = false, use_2 = false;
+
+       for (int i = 0; i < nelts; i++)
+         {
+          if (known_lt ((poly_uint64)sel1[i], sel1.nelts_per_input ()))
+            builder2.quick_push (sel1[i]);
+          else
+            {
+              poly_uint64 j = sel0[(sel1[i] - sel1.nelts_per_input ())
+                                   .to_constant ()];
+              if (known_lt (j, sel0.nelts_per_input ()))
+                use_1 = true;
+              else
+                {
+                  use_2 = true;
+                  j -= sel0.nelts_per_input ();
+                }
+              builder2.quick_push (j + sel1.nelts_per_input ());
+            }
+        }
+     }
+     (if (use_1 ^ use_2)
+      (with
+       {
+        vec_perm_indices sel2 (builder2, 2, nelts);
+        tree op0 = NULL_TREE;
+        /* If the new VEC_PERM_EXPR can't be handled but both
+           original VEC_PERM_EXPRs can, punt.
+           If one or both of the original VEC_PERM_EXPRs can't be
+           handled and the new one can't be either, don't increase
+           number of VEC_PERM_EXPRs that can't be handled.  */
+        if (can_vec_perm_const_p (result_mode, op_mode, sel2, false)
+            || (single_use (@0)
+                ? (!can_vec_perm_const_p (result_mode, op_mode, sel0, false)
+                   || !can_vec_perm_const_p (result_mode, op_mode, sel1, 
false))
+                : !can_vec_perm_const_p (result_mode, op_mode, sel1, false)))
+          op0 = vec_perm_indices_to_tree (TREE_TYPE (@4), sel2);
+       }
+       (if (op0)
+       (switch
+        (if (use_1)
+         (vec_perm @5 @1 { op0; }))
+        (if (use_2)
+         (vec_perm @5 @2 { op0; })))))))))))
+
+/* And the case with swapped outer permute sources.  */
+
+(simplify
+ (vec_perm (vec_perm@0 @1 @2 VECTOR_CST@3) @5 VECTOR_CST@4)
+ (if (TYPE_VECTOR_SUBPARTS (type).is_constant ())
+  (with
+   {
+     machine_mode result_mode = TYPE_MODE (type);
+     machine_mode op_mode = TYPE_MODE (TREE_TYPE (@1));
+     int nelts = TYPE_VECTOR_SUBPARTS (type).to_constant ();
+     vec_perm_builder builder0;
+     vec_perm_builder builder1;
+     vec_perm_builder builder2 (nelts, nelts, 2);
+   }
+   (if (tree_to_vec_perm_builder (&builder0, @3)
+       && tree_to_vec_perm_builder (&builder1, @4))
+    (with
+     {
+       vec_perm_indices sel0 (builder0, 2, nelts);
+       vec_perm_indices sel1 (builder1, 2, nelts);
+       bool use_1 = false, use_2 = false;
+
+       for (int i = 0; i < nelts; i++)
+         {
+          if (known_ge ((poly_uint64)sel1[i], sel1.nelts_per_input ()))
+            builder2.quick_push (sel1[i]);
+          else
+            {
+              poly_uint64 j = sel0[sel1[i].to_constant ()];
+              if (known_lt (j, sel0.nelts_per_input ()))
+                use_1 = true;
+              else
+                {
+                  use_2 = true;
+                  j -= sel0.nelts_per_input ();
+                }
+              builder2.quick_push (j);
+            }
+        }
+     }
+     (if (use_1 ^ use_2)
+      (with
+       {
+        vec_perm_indices sel2 (builder2, 2, nelts);
+        tree op0 = NULL_TREE;
+        /* If the new VEC_PERM_EXPR can't be handled but both
+           original VEC_PERM_EXPRs can, punt.
+           If one or both of the original VEC_PERM_EXPRs can't be
+           handled and the new one can't be either, don't increase
+           number of VEC_PERM_EXPRs that can't be handled.  */
+        if (can_vec_perm_const_p (result_mode, op_mode, sel2, false)
+            || (single_use (@0)
+                ? (!can_vec_perm_const_p (result_mode, op_mode, sel0, false)
+                   || !can_vec_perm_const_p (result_mode, op_mode, sel1, 
false))
+                : !can_vec_perm_const_p (result_mode, op_mode, sel1, false)))
+          op0 = vec_perm_indices_to_tree (TREE_TYPE (@4), sel2);
+       }
+       (if (op0)
+       (switch
+        (if (use_1)
+         (vec_perm @1 @5 { op0; }))
+        (if (use_2)
+         (vec_perm @2 @5 { op0; })))))))))))
+
 
 /* Match count trailing zeroes for simplify_count_trailing_zeroes in fwprop.
    The canonical form is array[((x & -x) * C) >> SHIFT] where C is a magic
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/forwprop-42.c 
b/gcc/testsuite/gcc.dg/tree-ssa/forwprop-42.c
new file mode 100644
index 00000000000..f3dbc3e9394
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/forwprop-42.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fdump-tree-cddce1" } */
+
+typedef unsigned long v2di __attribute__((vector_size(16)));
+
+v2di g;
+void test (v2di *v)
+{
+  v2di lo = v[0];
+  v2di hi = v[1];
+  v2di res;
+  res[1] = hi[1];
+  res[0] = lo[0];
+  g = res;
+}
+
+/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR <\[^>\]*, { 0, 3 }>" 1 
"cddce1" } } */
-- 
2.35.3

Reply via email to