When we do SLP discovery of a .MASK_LOAD for a dataref group with gaps the discovery for the mask will have gaps as well and this was unexpected in a few places. The following re-organizes things slightly to accomodate for this.
Bootstrapped on x86_64-unknown-linux-gnu, testing in progress. PR tree-optimization/117484 * tree-vect-slp.cc (vect_build_slp_tree_2): Handle gaps in mask discovery. Fix condition to release the load permutation. (vect_lower_load_permutations): Assert we get no load permutation for the unpermuted node. * tree-vect-slp-patterns.cc (linear_loads_p): Properly identify loads (without permutation). (compatible_complex_nodes_p): Likewise. * gcc.dg/vect/pr117484-1.c: New testcase. * gcc.dg/vect/pr117484-2.c: Likewise. --- gcc/testsuite/gcc.dg/vect/pr117484-1.c | 13 +++++++++++++ gcc/testsuite/gcc.dg/vect/pr117484-2.c | 16 ++++++++++++++++ gcc/tree-vect-slp-patterns.cc | 14 ++++++++++---- gcc/tree-vect-slp.cc | 22 +++++++++++++--------- 4 files changed, 52 insertions(+), 13 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/pr117484-1.c create mode 100644 gcc/testsuite/gcc.dg/vect/pr117484-2.c diff --git a/gcc/testsuite/gcc.dg/vect/pr117484-1.c b/gcc/testsuite/gcc.dg/vect/pr117484-1.c new file mode 100644 index 00000000000..453556c50f9 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr117484-1.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ + +extern int a; +extern short b[]; +extern signed char c[], d[]; +int main() +{ + for (long j = 3; j < 1024; j += 3) + if (c[j] ? b[j] : 0) { + b[j] = d[j - 2]; + a = d[j]; + } +} diff --git a/gcc/testsuite/gcc.dg/vect/pr117484-2.c b/gcc/testsuite/gcc.dg/vect/pr117484-2.c new file mode 100644 index 00000000000..baffe7597ba --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr117484-2.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ + +int a; +extern int d[]; +extern int b[]; +extern _Bool c[]; +extern char h[]; +int main() +{ + for (int i = 0; i < 1024; i += 4) + if (h[i] || c[i]) + { + a = d[i]; + b[i] = d[i - 3]; + } +} diff --git a/gcc/tree-vect-slp-patterns.cc b/gcc/tree-vect-slp-patterns.cc index 8adae8a6ec0..d62682be43c 100644 --- a/gcc/tree-vect-slp-patterns.cc +++ b/gcc/tree-vect-slp-patterns.cc @@ -221,9 +221,15 @@ linear_loads_p (slp_tree_to_load_perm_map_t *perm_cache, slp_tree root) perm_cache->put (root, retval); /* If it's a load node, then just read the load permute. */ - if (SLP_TREE_LOAD_PERMUTATION (root).exists ()) + if (SLP_TREE_DEF_TYPE (root) == vect_internal_def + && SLP_TREE_CODE (root) != VEC_PERM_EXPR + && STMT_VINFO_DATA_REF (SLP_TREE_REPRESENTATIVE (root)) + && DR_IS_READ (STMT_VINFO_DATA_REF (SLP_TREE_REPRESENTATIVE (root)))) { - retval = is_linear_load_p (SLP_TREE_LOAD_PERMUTATION (root)); + if (SLP_TREE_LOAD_PERMUTATION (root).exists ()) + retval = is_linear_load_p (SLP_TREE_LOAD_PERMUTATION (root)); + else + retval = PERM_EVENODD; perm_cache->put (root, retval); return retval; } @@ -798,8 +804,8 @@ compatible_complex_nodes_p (slp_compat_nodes_map_t *compat_cache, return false; } - if (!SLP_TREE_LOAD_PERMUTATION (a).exists () - || !SLP_TREE_LOAD_PERMUTATION (b).exists ()) + if (!STMT_VINFO_DATA_REF (SLP_TREE_REPRESENTATIVE (a)) + || !STMT_VINFO_DATA_REF (SLP_TREE_REPRESENTATIVE (b))) { for (unsigned i = 0; i < gimple_num_args (a_stmt); i++) { diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index ffe9e718575..8e4ad05e2a4 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -2004,14 +2004,15 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node, = STMT_VINFO_GROUPED_ACCESS (stmt_info) ? DR_GROUP_FIRST_ELEMENT (stmt_info) : stmt_info; bool any_permute = false; - bool any_null = false; FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load_info) { int load_place; if (! load_info) { - load_place = j; - any_null = true; + if (STMT_VINFO_GROUPED_ACCESS (stmt_info)) + load_place = j; + else + load_place = 0; } else if (STMT_VINFO_GROUPED_ACCESS (stmt_info)) load_place = vect_get_place_in_interleaving_chain @@ -2022,11 +2023,6 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node, any_permute |= load_place != j; load_permutation.quick_push (load_place); } - if (any_null) - { - gcc_assert (!any_permute); - load_permutation.release (); - } if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt)) { @@ -2081,6 +2077,8 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node, followed by 'node' being the desired final permutation. */ if (unperm_load) { + gcc_assert + (!SLP_TREE_LOAD_PERMUTATION (unperm_load).exists ()); lane_permutation_t lperm; lperm.create (group_size); for (unsigned j = 0; j < load_permutation.length (); ++j) @@ -2101,6 +2099,10 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node, } else { + if (!any_permute + && STMT_VINFO_GROUPED_ACCESS (stmt_info) + && group_size == DR_GROUP_SIZE (first_stmt_info)) + load_permutation.release (); SLP_TREE_LOAD_PERMUTATION (node) = load_permutation; return node; } @@ -2675,7 +2677,8 @@ out: tree op0; tree uniform_val = op0 = oprnd_info->ops[0]; for (j = 1; j < oprnd_info->ops.length (); ++j) - if (!operand_equal_p (uniform_val, oprnd_info->ops[j])) + if (oprnd_info->ops[j] + && !operand_equal_p (uniform_val, oprnd_info->ops[j])) { uniform_val = NULL_TREE; break; @@ -4510,6 +4513,7 @@ vect_lower_load_permutations (loop_vec_info loop_vinfo, group_lanes, &max_nunits, matches, &limit, &tree_size, bst_map); + gcc_assert (!SLP_TREE_LOAD_PERMUTATION (l0).exists ()); if (ld_lanes_lanes != 0) { -- 2.43.0