This implements patterns combining vector element insertion of
vector element extraction to a VEC_PERM_EXPR of both vectors
when supported.  Plus it adds the more generic identity transform
of inserting a piece of itself at the same position.

Richard - is there anything I can do to make this SVE aware?
I'd need to construct an identity permute and "insert" into
that permute that element from the other (or same) vector.
I suppose for most element positions that won't work but
at least inserting at [0] should?  I'm mostly struggling
on how to use vec_perm_builder here when nelts is not constant,
since it's derived from vec<> can I simply start with
a single pattern with 1 stride and then insert by using []?

Bootstrap / regtest running on x86_64-unknown-linux-gnu.

Thanks,
Richard.

2020-05-07  Richard Biener  <rguent...@suse.de>

        PR tree-optimization/94865
        * match.pd ((bit_insert @0 (BIT_FIELD_REF @0 ...) ...) -> @0):
        New simplification.
        ((bit_insert @0 (BIT_FIELD_REF @1 ...) ...) -> (vec_perm @0 @1 ..):
        Likewise.

        * gcc.dg/tree-ssa/forwprop-39.c: New testcase.
        * gcc.dg/tree-ssa/forwprop-40.c: Likewise.
---
 gcc/match.pd                                | 42 +++++++++++++++++++++++++++++
 gcc/testsuite/gcc.dg/tree-ssa/forwprop-39.c | 21 +++++++++++++++
 gcc/testsuite/gcc.dg/tree-ssa/forwprop-40.c | 18 +++++++++++++
 3 files changed, 81 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/forwprop-39.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/forwprop-40.c

diff --git a/gcc/match.pd b/gcc/match.pd
index 9259dd4ddaa..4ce728d78c8 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -5838,6 +5838,48 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
                      wi::to_wide (@ipos) + isize))
     (BIT_FIELD_REF @0 @rsize @rpos)))))
 
+/* Optimize a bit-insertion of a bit-extraction from the same object,
+   position and size.  */
+(simplify
+ (bit_insert @0 (BIT_FIELD_REF@1 @0 @size @pos) @pos)
+ @0)
+
+/* Optimize an element insertion into a vector that is extracted from
+   another vector to a permutation of both vectors.  */
+(simplify
+ (bit_insert @0 (BIT_FIELD_REF@1 @2 @size @rpos) @ipos)
+ (if (VECTOR_TYPE_P (type)
+      && types_match (type, TREE_TYPE (@2))
+      && single_use (@1))
+  (with
+   {
+     unsigned HOST_WIDE_INT nelts;
+     unsigned int elem, at, elemsz;
+   }
+   (if (TYPE_VECTOR_SUBPARTS (type).is_constant (&nelts)
+       && tree_fits_uhwi_p (@size)
+       && constant_multiple_p (tree_to_poly_uint64 (@rpos),
+                               tree_to_poly_uint64 (@size), &elem)
+       && constant_multiple_p (tree_to_poly_uint64 (@ipos),
+                               tree_to_poly_uint64 (@size), &at)
+       && constant_multiple_p (tree_to_poly_uint64 (@size),
+                               tree_to_poly_uint64
+                                 (TYPE_SIZE (TREE_TYPE (type))), &elemsz))
+    (with
+     {
+       vec_perm_builder sel (nelts, nelts, 1);
+       for (unsigned i = 0; i < nelts; ++i)
+         sel.quick_push (i / elemsz == at
+                        ? nelts + elem * elemsz + i % elemsz : i);
+       vec_perm_indices indices (sel, @0 == @2 ? 1 : 2, nelts);
+     }
+     (if (can_vec_perm_const_p (TYPE_MODE (TREE_TYPE (@0)), indices))
+      (vec_perm @0 @2 { vec_perm_indices_to_tree
+                         (build_vector_type
+                           (build_nonstandard_integer_type
+                             (tree_to_uhwi (@size), 1),
+                            nelts), indices); })))))))
+
 (if (canonicalize_math_after_vectorization_p ())
  (for fmas (FMA)
   (simplify
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/forwprop-39.c 
b/gcc/testsuite/gcc.dg/tree-ssa/forwprop-39.c
new file mode 100644
index 00000000000..f0212c373e2
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/forwprop-39.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fdump-tree-forwprop1 -fdump-tree-cddce1 -Wno-psabi -w" } 
*/
+
+typedef double v2df __attribute__((vector_size(16)));
+
+v2df move_sd(v2df a, v2df b)
+{
+  v2df result = a;
+  result[1] = b[1];
+  return result;
+}
+
+v2df move_nnop(v2df a)
+{
+  v2df result = a;
+  result[1] = a[1];
+  return result;
+}
+
+/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 1 "forwprop1" } } */
+/* { dg-final { scan-tree-dump-times "BIT_FIELD_REF" 0 "cddce1" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/forwprop-40.c 
b/gcc/testsuite/gcc.dg/tree-ssa/forwprop-40.c
new file mode 100644
index 00000000000..94329437f0d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/forwprop-40.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fgimple -fdump-tree-forwprop1 -w -Wno-psabi" } */
+
+typedef int v4si __attribute__((vector_size(16)));
+typedef int v2si __attribute__((vector_size(8)));
+
+v4si __GIMPLE(ssa) bar (v4si a)
+{
+  v2si el;
+  v4si res;
+
+  __BB(2):
+  el_2 = __BIT_FIELD_REF <v2si> (a_1(D), 64u, 64u);
+  res_3 = __BIT_INSERT (a_1(D), el_2, 0u);
+  return res_3;
+}
+
+/* { dg-final { scan-tree-dump "VEC_PERM" "forwprop1" } } */
-- 
2.13.7

Reply via email to