On Mon, Aug 15, 2016 at 11:48 AM, Alan Hayward <alan.hayw...@arm.com> wrote: > The testcase pr71752.c was failing because the SLP code was generating an > SLP > vector using arguments from the SLP scalar stmts, but was using the wrong > argument number. > > vect_get_slp_defs() is given a vector of operands. When calling down to > vect_get_constant_vectors it uses i as op_num - making the assumption that > the > first op in the vector refers to the first argument in the SLP scalar > statement, the second op refers to the second arg and so on. > > However, previously in vectorizable_reduction, the call to > vect_get_vec_defs > destroyed this ordering by potentially only passing op1. > > The solution is in vectorizable_reduction to create a vector of operands > equal > in size to the number of arguments in the SLP statements. We maintain the > argument ordering and if we don't require defs for that argument we instead > push NULL into the vector. In vect_get_slp_defs we need to handle cases > where > an op might be NULL. > > Tested with a check run on X86 and AArch64. > Ok to commit? >
Ugh. Note the logic in vect_get_slp_defs is incredibly fragile - I think you can't simply "skip" ops the way you do as you need to still increment child_index accordingly for ignored ops. Why not let the function compute defs for all ops? That said, the vectorizable_reduction part certainly is fixing a bug (I think I've seen similar issues elsewhere though). Richard. > Changelog: > > gcc/ > * tree-vect-loop.c (vectorizable_reduction): Keep SLP operand > ordering. > * tree-vect-slp.c (vect_get_slp_defs): Handle null operands. > > gcc/testsuite/ > * gcc.dg/vect/pr71752.c: New. > > > > Thanks, > Alan. > > > > > diff --git a/gcc/testsuite/gcc.dg/vect/pr71752.c > b/gcc/testsuite/gcc.dg/vect/pr71752.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..8d26754b4fedf8b104caae8742a445dff > bf23f0a > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/vect/pr71752.c > @@ -0,0 +1,19 @@ > +/* { dg-do compile } */ > + > +unsigned int q4, yg; > + > +unsigned int > +w6 (unsigned int z5, unsigned int jv) > +{ > + unsigned int *f2 = &jv; > + > + while (*f2 < 21) > + { > + q4 -= jv; > + z5 -= jv; > + f2 = &yg; > + ++(*f2); > + } > + return z5; > +} > + > diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c > index > 2a7e0c6661bc1ba82c9f03720e550749f2252a7c..826481af3d1d8b29bcdbd7d81c0fd5a85 > 9ecd9b0 100644 > --- a/gcc/tree-vect-loop.c > +++ b/gcc/tree-vect-loop.c > @@ -5364,7 +5364,7 @@ vectorizable_reduction (gimple *stmt, > gimple_stmt_iterator *gsi, > auto_vec<tree> vect_defs; > auto_vec<gimple *> phis; > int vec_num; > - tree def0, def1, tem, op0, op1 = NULL_TREE; > + tree def0, def1, tem, op1 = NULL_TREE; > bool first_p = true; > tree cr_index_scalar_type = NULL_TREE, cr_index_vector_type = NULL_TREE; > gimple *cond_expr_induction_def_stmt = NULL; > @@ -5964,29 +5964,36 @@ vectorizable_reduction (gimple *stmt, > gimple_stmt_iterator *gsi, > /* Handle uses. */ > if (j == 0) > { > - op0 = ops[!reduc_index]; > - if (op_type == ternary_op) > - { > - if (reduc_index == 0) > - op1 = ops[2]; > - else > - op1 = ops[1]; > - } > + if (slp_node) > + { > + /* Get vec defs for all the operands except the reduction index, > + ensuring the ordering of the ops in the vector is kept. */ > + auto_vec<tree, 3> slp_ops; > + auto_vec<vec<tree>, 3> vec_defs; > > - if (slp_node) > - vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1, > - slp_node, -1); > + slp_ops.quick_push ((reduc_index == 0) ? NULL : ops[0]); > + slp_ops.quick_push ((reduc_index == 1) ? NULL : ops[1]); > + if (op_type == ternary_op) > + slp_ops.quick_push ((reduc_index == 2) ? NULL : ops[2]); > + > + vect_get_slp_defs (slp_ops, slp_node, &vec_defs, -1); > + > + vec_oprnds0.safe_splice (vec_defs[(reduc_index == 0) ? 1 : 0]); > + if (op_type == ternary_op) > + vec_oprnds1.safe_splice (vec_defs[(reduc_index == 2) ? 1 : > 2]); > + } > else > - { > + { > loop_vec_def0 = vect_get_vec_def_for_operand > (ops[!reduc_index], > stmt); > vec_oprnds0.quick_push (loop_vec_def0); > if (op_type == ternary_op) > { > + op1 = (reduc_index == 0) ? ops[2] : ops[1]; > loop_vec_def1 = vect_get_vec_def_for_operand (op1, stmt); > vec_oprnds1.quick_push (loop_vec_def1); > } > - } > + } > } > else > { > diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c > index > fb325d54f1084461d44cd54a98e5b7f99541a188..7c480d59c823b5258255c8be047f050c8 > 3cc91fd 100644 > --- a/gcc/tree-vect-slp.c > +++ b/gcc/tree-vect-slp.c > @@ -3200,10 +3200,19 @@ vect_get_slp_defs (vec<tree> ops, slp_tree > slp_node, > vec<tree> vec_defs; > tree oprnd; > bool vectorized_defs; > + bool first_iteration = true; > > first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0]; > FOR_EACH_VEC_ELT (ops, i, oprnd) > { > + if (oprnd == NULL) > + { > + vec_defs = vNULL; > + vec_defs.create (0); > + vec_oprnds->quick_push (vec_defs); > + continue; > + } > + > /* For each operand we check if it has vectorized definitions in a > child > node or we need to create them (for invariants and constants). We > check if the LHS of the first stmt of the next child matches OPRND. > @@ -3240,7 +3249,7 @@ vect_get_slp_defs (vec<tree> ops, slp_tree slp_node, > > if (!vectorized_defs) > { > - if (i == 0) > + if (first_iteration) > { > number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); > /* Number of vector stmts was calculated according to LHS in > @@ -3276,6 +3285,8 @@ vect_get_slp_defs (vec<tree> ops, slp_tree slp_node, > /* For reductions, we only need initial values. */ > if (reduc_index != -1) > return; > + > + first_iteration = false; > } > } > >