The following makes sure pattern detection also works with "non-canonical" order of operands in reduction stmts.
Bootstrapped and tested on x86_64-unknown-linux-gnu, verified it fixes the powerpc FAIL, applied. Richard. 2019-09-20 Richard Biener <rguent...@suse.de> PR tree-optimization/91821 * tree-vect-loop.c (check_reduction_path): Check we can compute reduc_idx. (vect_is_simple_reduction): Set STMT_VINFO_REDUC_IDX. * tree-vect-patterns.c (vect_reassociating_reduction_p): Return operands in canonical order. * tree-vectorizer.c (vec_info::new_stmt_vec_info): Initialize STMT_VINFO_REDUC_IDX. * tree-vectorizer.h (_stmt_vec_info::reduc_idx): New. (STMT_VINFO_REDUC_IDX): Likewise. Index: gcc/tree-vect-loop.c =================================================================== --- gcc/tree-vect-loop.c (revision 275988) +++ gcc/tree-vect-loop.c (working copy) @@ -2658,7 +2658,13 @@ pop: gimple *use_stmt = USE_STMT (path[i].second); tree op = USE_FROM_PTR (path[i].second); if (! has_single_use (op) - || ! is_gimple_assign (use_stmt)) + || ! is_gimple_assign (use_stmt) + /* The following make sure we can compute the operand index + easily plus it mostly disallows chaining via COND_EXPR condition + operands. */ + || (gimple_assign_rhs1 (use_stmt) != op + && gimple_assign_rhs2 (use_stmt) != op + && gimple_assign_rhs3 (use_stmt) != op)) { fail = true; break; @@ -3058,6 +3064,7 @@ vect_is_simple_reduction (loop_vec_info || !flow_bb_inside_loop_p (loop, gimple_bb (def1_info->stmt)) || vect_valid_reduction_input_p (def1_info))) { + STMT_VINFO_REDUC_IDX (def_stmt_info) = 1; if (dump_enabled_p ()) report_vect_op (MSG_NOTE, def_stmt, "detected reduction: "); return def_stmt_info; @@ -3070,6 +3077,7 @@ vect_is_simple_reduction (loop_vec_info || !flow_bb_inside_loop_p (loop, gimple_bb (def2_info->stmt)) || vect_valid_reduction_input_p (def2_info))) { + STMT_VINFO_REDUC_IDX (def_stmt_info) = 0; if (dump_enabled_p ()) report_vect_op (MSG_NOTE, def_stmt, "detected reduction: "); return def_stmt_info; @@ -3084,16 +3092,18 @@ vect_is_simple_reduction (loop_vec_info restriction is that all operations in the chain are the same. */ auto_vec<stmt_vec_info, 8> reduc_chain; unsigned i; + bool is_slp_reduc = !nested_in_vect_loop && code != COND_EXPR; for (i = path.length () - 1; i >= 1; --i) { gimple *stmt = USE_STMT (path[i].second); if (gimple_assign_rhs_code (stmt) != code) - break; - reduc_chain.safe_push (loop_info->lookup_stmt (stmt)); + is_slp_reduc = false; + stmt_vec_info stmt_info = loop_info->lookup_stmt (stmt); + STMT_VINFO_REDUC_IDX (stmt_info) + = path[i].second->use - gimple_assign_rhs1_ptr (stmt); + reduc_chain.safe_push (stmt_info); } - if (i == 0 - && ! nested_in_vect_loop - && code != COND_EXPR) + if (is_slp_reduc) { for (unsigned i = 0; i < reduc_chain.length () - 1; ++i) { Index: gcc/tree-vect-patterns.c =================================================================== --- gcc/tree-vect-patterns.c (revision 275988) +++ gcc/tree-vect-patterns.c (working copy) @@ -868,6 +868,8 @@ vect_reassociating_reduction_p (stmt_vec *op0_out = gimple_assign_rhs1 (assign); *op1_out = gimple_assign_rhs2 (assign); + if (commutative_tree_code (code) && STMT_VINFO_REDUC_IDX (stmt_info) == 0) + std::swap (*op0_out, *op1_out); return true; } Index: gcc/tree-vectorizer.c =================================================================== --- gcc/tree-vectorizer.c (revision 275988) +++ gcc/tree-vectorizer.c (working copy) @@ -639,6 +639,7 @@ vec_info::new_stmt_vec_info (gimple *stm STMT_VINFO_VECTORIZABLE (res) = true; STMT_VINFO_VEC_REDUCTION_TYPE (res) = TREE_CODE_REDUCTION; STMT_VINFO_VEC_CONST_COND_REDUC_CODE (res) = ERROR_MARK; + STMT_VINFO_REDUC_IDX (res) = -1; STMT_VINFO_SLP_VECT_ONLY (res) = false; if (gimple_code (stmt) == GIMPLE_PHI Index: gcc/tree-vectorizer.h =================================================================== --- gcc/tree-vectorizer.h (revision 275988) +++ gcc/tree-vectorizer.h (working copy) @@ -941,6 +941,10 @@ public: vect_force_simple_reduction. */ enum vect_reduction_type reduc_type; + /* On a stmt participating in the reduction the index of the operand + on the reduction SSA cycle. */ + int reduc_idx; + /* On a reduction PHI the def returned by vect_force_simple_reduction. On the def returned by vect_force_simple_reduction the corresponding PHI. */ @@ -1030,6 +1034,7 @@ STMT_VINFO_BB_VINFO (stmt_vec_info stmt_ #define STMT_VINFO_SIMD_LANE_ACCESS_P(S) (S)->simd_lane_access_p #define STMT_VINFO_VEC_REDUCTION_TYPE(S) (S)->v_reduc_type #define STMT_VINFO_VEC_CONST_COND_REDUC_CODE(S) (S)->const_cond_reduc_code +#define STMT_VINFO_REDUC_IDX(S) (S)->reduc_idx #define STMT_VINFO_DR_WRT_VEC_LOOP(S) (S)->dr_wrt_vec_loop #define STMT_VINFO_DR_BASE_ADDRESS(S) (S)->dr_wrt_vec_loop.base_address