This implements patterns combining vector element insertion of vector element extraction to a VEC_PERM_EXPR of both vectors when supported. Plus it adds the more generic identity transform of inserting a piece of itself at the same position.
Richard - is there anything I can do to make this SVE aware? I'd need to construct an identity permute and "insert" into that permute that element from the other (or same) vector. I suppose for most element positions that won't work but at least inserting at [0] should? I'm mostly struggling on how to use vec_perm_builder here when nelts is not constant, since it's derived from vec<> can I simply start with a single pattern with 1 stride and then insert by using []? Bootstrap / regtest running on x86_64-unknown-linux-gnu. Thanks, Richard. 2020-05-07 Richard Biener <rguent...@suse.de> PR tree-optimization/94865 * match.pd ((bit_insert @0 (BIT_FIELD_REF @0 ...) ...) -> @0): New simplification. ((bit_insert @0 (BIT_FIELD_REF @1 ...) ...) -> (vec_perm @0 @1 ..): Likewise. * gcc.dg/tree-ssa/forwprop-39.c: New testcase. * gcc.dg/tree-ssa/forwprop-40.c: Likewise. --- gcc/match.pd | 42 +++++++++++++++++++++++++++++ gcc/testsuite/gcc.dg/tree-ssa/forwprop-39.c | 21 +++++++++++++++ gcc/testsuite/gcc.dg/tree-ssa/forwprop-40.c | 18 +++++++++++++ 3 files changed, 81 insertions(+) create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/forwprop-39.c create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/forwprop-40.c diff --git a/gcc/match.pd b/gcc/match.pd index 9259dd4ddaa..4ce728d78c8 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -5838,6 +5838,48 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) wi::to_wide (@ipos) + isize)) (BIT_FIELD_REF @0 @rsize @rpos))))) +/* Optimize a bit-insertion of a bit-extraction from the same object, + position and size. */ +(simplify + (bit_insert @0 (BIT_FIELD_REF@1 @0 @size @pos) @pos) + @0) + +/* Optimize an element insertion into a vector that is extracted from + another vector to a permutation of both vectors. */ +(simplify + (bit_insert @0 (BIT_FIELD_REF@1 @2 @size @rpos) @ipos) + (if (VECTOR_TYPE_P (type) + && types_match (type, TREE_TYPE (@2)) + && single_use (@1)) + (with + { + unsigned HOST_WIDE_INT nelts; + unsigned int elem, at, elemsz; + } + (if (TYPE_VECTOR_SUBPARTS (type).is_constant (&nelts) + && tree_fits_uhwi_p (@size) + && constant_multiple_p (tree_to_poly_uint64 (@rpos), + tree_to_poly_uint64 (@size), &elem) + && constant_multiple_p (tree_to_poly_uint64 (@ipos), + tree_to_poly_uint64 (@size), &at) + && constant_multiple_p (tree_to_poly_uint64 (@size), + tree_to_poly_uint64 + (TYPE_SIZE (TREE_TYPE (type))), &elemsz)) + (with + { + vec_perm_builder sel (nelts, nelts, 1); + for (unsigned i = 0; i < nelts; ++i) + sel.quick_push (i / elemsz == at + ? nelts + elem * elemsz + i % elemsz : i); + vec_perm_indices indices (sel, @0 == @2 ? 1 : 2, nelts); + } + (if (can_vec_perm_const_p (TYPE_MODE (TREE_TYPE (@0)), indices)) + (vec_perm @0 @2 { vec_perm_indices_to_tree + (build_vector_type + (build_nonstandard_integer_type + (tree_to_uhwi (@size), 1), + nelts), indices); }))))))) + (if (canonicalize_math_after_vectorization_p ()) (for fmas (FMA) (simplify diff --git a/gcc/testsuite/gcc.dg/tree-ssa/forwprop-39.c b/gcc/testsuite/gcc.dg/tree-ssa/forwprop-39.c new file mode 100644 index 00000000000..f0212c373e2 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/forwprop-39.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-options "-O -fdump-tree-forwprop1 -fdump-tree-cddce1 -Wno-psabi -w" } */ + +typedef double v2df __attribute__((vector_size(16))); + +v2df move_sd(v2df a, v2df b) +{ + v2df result = a; + result[1] = b[1]; + return result; +} + +v2df move_nnop(v2df a) +{ + v2df result = a; + result[1] = a[1]; + return result; +} + +/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 1 "forwprop1" } } */ +/* { dg-final { scan-tree-dump-times "BIT_FIELD_REF" 0 "cddce1" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/forwprop-40.c b/gcc/testsuite/gcc.dg/tree-ssa/forwprop-40.c new file mode 100644 index 00000000000..94329437f0d --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/forwprop-40.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O -fgimple -fdump-tree-forwprop1 -w -Wno-psabi" } */ + +typedef int v4si __attribute__((vector_size(16))); +typedef int v2si __attribute__((vector_size(8))); + +v4si __GIMPLE(ssa) bar (v4si a) +{ + v2si el; + v4si res; + + __BB(2): + el_2 = __BIT_FIELD_REF <v2si> (a_1(D), 64u, 64u); + res_3 = __BIT_INSERT (a_1(D), el_2, 0u); + return res_3; +} + +/* { dg-final { scan-tree-dump "VEC_PERM" "forwprop1" } } */ -- 2.13.7