On Thu, 8 Jun 2017, Richard Biener wrote: > > The following fixes unsafe vectorization of reductions in outer loop > vectorization. It combines it with a bit of cleanup in the affected > function. > > Bootstrap and regtest running on x86_64-unknown-linux-gnu.
Re-testing the following variant -- multiple loop-closed PHI nodes for the same SSA name happen. Richard. 2017-06-08 Richard Biener <rguent...@suse.de> PR tree-optimization/66623 * tree-vect-loop.c (vect_is_simple_reduction): Cleanup, refactor check_reduction into two parts, properly computing whether we have to check reduction validity for outer loop vectorization. * gcc.dg/vect/pr66623.c: New testcase. Index: gcc/tree-vect-loop.c =================================================================== --- gcc/tree-vect-loop.c (revision 249007) +++ gcc/tree-vect-loop.c (working copy) @@ -2727,8 +2727,6 @@ vect_is_simple_reduction (loop_vec_info { struct loop *loop = (gimple_bb (phi))->loop_father; struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info); - edge latch_e = loop_latch_edge (loop); - tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e); gimple *def_stmt, *def1 = NULL, *def2 = NULL, *phi_use_stmt = NULL; enum tree_code orig_code, code; tree op1, op2, op3 = NULL_TREE, op4 = NULL_TREE; @@ -2742,11 +2740,6 @@ vect_is_simple_reduction (loop_vec_info *double_reduc = false; *v_reduc_type = TREE_CODE_REDUCTION; - /* Check validity of the reduction only for the innermost loop. */ - bool check_reduction = ! flow_loop_nested_p (vect_loop, loop); - gcc_assert ((check_reduction && loop == vect_loop) - || (!check_reduction && flow_loop_nested_p (vect_loop, loop))); - name = PHI_RESULT (phi); /* ??? If there are no uses of the PHI result the inner loop reduction won't be detected as possibly double-reduction by vectorizable_reduction @@ -2775,13 +2768,15 @@ vect_is_simple_reduction (loop_vec_info { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "reduction used in loop.\n"); + "reduction value used in loop.\n"); return NULL; } phi_use_stmt = use_stmt; } + edge latch_e = loop_latch_edge (loop); + tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e); if (TREE_CODE (loop_arg) != SSA_NAME) { if (dump_enabled_p ()) @@ -2795,18 +2790,22 @@ vect_is_simple_reduction (loop_vec_info } def_stmt = SSA_NAME_DEF_STMT (loop_arg); - if (!def_stmt) + if (gimple_nop_p (def_stmt)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "reduction: no def_stmt.\n"); + "reduction: no def_stmt\n"); return NULL; } if (!is_gimple_assign (def_stmt) && gimple_code (def_stmt) != GIMPLE_PHI) { if (dump_enabled_p ()) - dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0); + { + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "reduction: unhandled reduction operation: "); + dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, def_stmt, 0); + } return NULL; } @@ -2822,6 +2821,7 @@ vect_is_simple_reduction (loop_vec_info } nloop_uses = 0; + auto_vec<gphi *, 3> lcphis; FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name) { gimple *use_stmt = USE_STMT (use_p); @@ -2829,6 +2829,9 @@ vect_is_simple_reduction (loop_vec_info continue; if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt))) nloop_uses++; + else + /* We can have more than one loop-closed PHI. */ + lcphis.safe_push (as_a <gphi *> (use_stmt)); if (nloop_uses > 1) { if (dump_enabled_p ()) @@ -2873,6 +2876,27 @@ vect_is_simple_reduction (loop_vec_info return NULL; } + /* If we are vectorizing an inner reduction we are executing that + in the original order only in case we are not dealing with a + double reduction. */ + bool check_reduction = true; + if (flow_loop_nested_p (vect_loop, loop)) + { + gphi *lcphi; + unsigned i; + check_reduction = false; + FOR_EACH_VEC_ELT (lcphis, i, lcphi) + FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_phi_result (lcphi)) + { + gimple *use_stmt = USE_STMT (use_p); + if (is_gimple_debug (use_stmt)) + continue; + if (! flow_bb_inside_loop_p (vect_loop, gimple_bb (use_stmt))) + check_reduction = true; + } + } + + bool nested_in_vect_loop = flow_loop_nested_p (vect_loop, loop); code = orig_code = gimple_assign_rhs_code (def_stmt); /* We can handle "res -= x[i]", which is non-associative by @@ -2887,27 +2911,8 @@ vect_is_simple_reduction (loop_vec_info if (code == COND_EXPR) { - if (check_reduction) + if (! nested_in_vect_loop) *v_reduc_type = COND_REDUCTION; - } - else if (!commutative_tree_code (code) || !associative_tree_code (code)) - { - if (dump_enabled_p ()) - report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt, - "reduction: not commutative/associative: "); - return NULL; - } - - if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS) - { - if (code != COND_EXPR) - { - if (dump_enabled_p ()) - report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt, - "reduction: not binary operation: "); - - return NULL; - } op3 = gimple_assign_rhs1 (def_stmt); if (COMPARISON_CLASS_P (op3)) @@ -2918,30 +2923,35 @@ vect_is_simple_reduction (loop_vec_info op1 = gimple_assign_rhs2 (def_stmt); op2 = gimple_assign_rhs3 (def_stmt); - - if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME) - { - if (dump_enabled_p ()) - report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt, - "reduction: uses not ssa_names: "); - - return NULL; - } } - else + else if (!commutative_tree_code (code) || !associative_tree_code (code)) + { + if (dump_enabled_p ()) + report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt, + "reduction: not commutative/associative: "); + return NULL; + } + else if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS) { op1 = gimple_assign_rhs1 (def_stmt); op2 = gimple_assign_rhs2 (def_stmt); + } + else + { + if (dump_enabled_p ()) + report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt, + "reduction: not handled operation: "); + return NULL; + } - if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME) - { - if (dump_enabled_p ()) - report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt, - "reduction: uses not ssa_names: "); + if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME) + { + if (dump_enabled_p ()) + report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt, + "reduction: both uses not ssa_names: "); - return NULL; - } - } + return NULL; + } type = TREE_TYPE (gimple_assign_lhs (def_stmt)); if ((TREE_CODE (op1) == SSA_NAME @@ -3091,7 +3101,7 @@ vect_is_simple_reduction (loop_vec_info == vect_internal_def && !is_loop_header_bb_p (gimple_bb (def2))))))) { - if (check_reduction && orig_code != MINUS_EXPR) + if (! nested_in_vect_loop && orig_code != MINUS_EXPR) { /* Check if we can swap operands (just for simplicity - so that the rest of the code can assume that the reduction variable @@ -3145,7 +3155,8 @@ vect_is_simple_reduction (loop_vec_info } /* Try to find SLP reduction chain. */ - if (check_reduction && code != COND_EXPR + if (! nested_in_vect_loop + && code != COND_EXPR && vect_is_slp_reduction (loop_info, phi, def_stmt)) { if (dump_enabled_p ()) Index: gcc/testsuite/gcc.dg/vect/pr66623.c =================================================================== --- gcc/testsuite/gcc.dg/vect/pr66623.c (revision 0) +++ gcc/testsuite/gcc.dg/vect/pr66623.c (working copy) @@ -0,0 +1,86 @@ +/* { dg-require-effective-target vect_float } */ + +#include "tree-vect.h" + +extern void abort (void); + +#define OP * +#define INIT 1.0 + +float __attribute__((noinline,noclone)) +foo (float *__restrict__ i) +{ + float l = INIT; + int a; + int b; + + for (a = 0; a < 4; a++) + for (b = 0; b < 4; b++) + l = l OP i[b]; + + return l; +} + +float __attribute__((noinline,noclone)) +foo_ref (float *__restrict__ i) +{ + float l = INIT; + + l = l OP i[0]; + l = l OP i[1]; + l = l OP i[2]; + l = l OP i[3]; + + l = l OP i[0]; + l = l OP i[1]; + l = l OP i[2]; + l = l OP i[3]; + + l = l OP i[0]; + l = l OP i[1]; + l = l OP i[2]; + l = l OP i[3]; + + l = l OP i[0]; + l = l OP i[1]; + l = l OP i[2]; + l = l OP i[3]; + + return l; +} + +union u +{ + float f; + unsigned int u; +}; + +int +main (void) +{ + union u res, res2; + float a[4]; + + if (sizeof (float) != sizeof (unsigned int)) + return 0; + + check_vect (); + + a[0] = 0.01; + a[1] = 0.01; + a[2] = 0.01; + a[3] = 1.0; + + res.f = foo_ref (a); + + res2.f = foo (a); + + if (res.u != res2.u) + abort (); + + return 0; +} + +/* need -ffast-math to vectorize this loop. */ +/* ARM NEON passes -ffast-math to these tests, so expect this to fail. */ +/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { xfail arm_neon_ok } } } */