This is part #2, fixes wrong-code because of bogus reduction op used and factors out common code (which I'd otherwise need to duplicate once more...).
Bootstrap and regtest running on x86_64-unknown-linux-gnu. Richard. 2015-05-21 Richard Biener <rguent...@suse.de> * tree-vect-loop.c (get_reduction_op): New function. (vect_model_reduction_cost): Use it, add reduc_index parameter. Make ready for BB reductions. (vect_create_epilog_for_reduction): Use get_reduction_op. (vectorizable_reduction): Init reduc_index to a valid value. Adjust vect_model_reduction_cost call. * tree-vect-slp.c (vect_get_constant_vectors): Use the proper operand for reduction defaults. Add SAD_EXPR support. Assert we have a neutral op for SLP reductions. * tree-vect-stmts.c (vect_mark_stmts_to_be_vectorized): When walking pattern stmt ops only recurse to SSA names. Index: gcc/tree-vect-loop.c =================================================================== *** gcc/tree-vect-loop.c (revision 223482) --- gcc/tree-vect-loop.c (working copy) *************** have_whole_vector_shift (enum machine_mo *** 3166,3171 **** --- 3166,3194 ---- return true; } + /* Return the reduction operand (with index REDUC_INDEX) of STMT. */ + + static tree + get_reduction_op (gimple stmt, int reduc_index) + { + switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))) + { + case GIMPLE_SINGLE_RHS: + gcc_assert (TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt)) + == ternary_op); + return TREE_OPERAND (gimple_assign_rhs1 (stmt), reduc_index); + case GIMPLE_UNARY_RHS: + return gimple_assign_rhs1 (stmt); + case GIMPLE_BINARY_RHS: + return (reduc_index + ? gimple_assign_rhs2 (stmt) : gimple_assign_rhs1 (stmt)); + case GIMPLE_TERNARY_RHS: + return gimple_op (stmt, reduc_index + 1); + default: + gcc_unreachable (); + } + } + /* TODO: Close dependency between vect_model_*_cost and vectorizable_* functions. Design better to avoid maintenance issues. */ *************** have_whole_vector_shift (enum machine_mo *** 3177,3183 **** static bool vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code, ! int ncopies) { int prologue_cost = 0, epilogue_cost = 0; enum tree_code code; --- 3200,3206 ---- static bool vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code, ! int ncopies, int reduc_index) { int prologue_cost = 0, epilogue_cost = 0; enum tree_code code; *************** vect_model_reduction_cost (stmt_vec_info *** 3187,3218 **** tree reduction_op; machine_mode mode; loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); ! struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); ! void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo); /* Cost of reduction op inside loop. */ unsigned inside_cost = add_stmt_cost (target_cost_data, ncopies, vector_stmt, stmt_info, 0, vect_body); stmt = STMT_VINFO_STMT (stmt_info); ! switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))) ! { ! case GIMPLE_SINGLE_RHS: ! gcc_assert (TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt)) == ternary_op); ! reduction_op = TREE_OPERAND (gimple_assign_rhs1 (stmt), 2); ! break; ! case GIMPLE_UNARY_RHS: ! reduction_op = gimple_assign_rhs1 (stmt); ! break; ! case GIMPLE_BINARY_RHS: ! reduction_op = gimple_assign_rhs2 (stmt); ! break; ! case GIMPLE_TERNARY_RHS: ! reduction_op = gimple_assign_rhs3 (stmt); ! break; ! default: ! gcc_unreachable (); ! } vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op)); if (!vectype) --- 3210,3232 ---- tree reduction_op; machine_mode mode; loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); ! struct loop *loop = NULL; ! void *target_cost_data; ! ! if (loop_vinfo) ! { ! loop = LOOP_VINFO_LOOP (loop_vinfo); ! target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo); ! } ! else ! target_cost_data = BB_VINFO_TARGET_COST_DATA (STMT_VINFO_BB_VINFO (stmt_info)); /* Cost of reduction op inside loop. */ unsigned inside_cost = add_stmt_cost (target_cost_data, ncopies, vector_stmt, stmt_info, 0, vect_body); stmt = STMT_VINFO_STMT (stmt_info); ! reduction_op = get_reduction_op (stmt, reduc_index); vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op)); if (!vectype) *************** vect_model_reduction_cost (stmt_vec_info *** 3245,3251 **** We have a reduction operator that will reduce the vector in one statement. Also requires scalar extract. */ ! if (!nested_in_vect_loop_p (loop, orig_stmt)) { if (reduc_code != ERROR_MARK) { --- 3259,3265 ---- We have a reduction operator that will reduce the vector in one statement. Also requires scalar extract. */ ! if (!loop || !nested_in_vect_loop_p (loop, orig_stmt)) { if (reduc_code != ERROR_MARK) { *************** vect_create_epilog_for_reduction (vec<tr *** 3992,4017 **** gcc_assert (!slp_node); } ! switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))) ! { ! case GIMPLE_SINGLE_RHS: ! gcc_assert (TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt)) ! == ternary_op); ! reduction_op = TREE_OPERAND (gimple_assign_rhs1 (stmt), reduc_index); ! break; ! case GIMPLE_UNARY_RHS: ! reduction_op = gimple_assign_rhs1 (stmt); ! break; ! case GIMPLE_BINARY_RHS: ! reduction_op = reduc_index ? ! gimple_assign_rhs2 (stmt) : gimple_assign_rhs1 (stmt); ! break; ! case GIMPLE_TERNARY_RHS: ! reduction_op = gimple_op (stmt, reduc_index + 1); ! break; ! default: ! gcc_unreachable (); ! } vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op)); gcc_assert (vectype); --- 4006,4012 ---- gcc_assert (!slp_node); } ! reduction_op = get_reduction_op (stmt, reduc_index); vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op)); gcc_assert (vectype); *************** vectorizable_reduction (gimple stmt, gim *** 4845,4852 **** tree ops[3]; bool nested_cycle = false, found_nested_cycle_def = false; gimple reduc_def_stmt = NULL; - /* The default is that the reduction variable is the last in statement. */ - int reduc_index = 2; bool double_reduc = false, dummy; basic_block def_bb; struct loop * def_stmt_loop, *outer_loop = NULL; --- 4840,4845 ---- *************** vectorizable_reduction (gimple stmt, gim *** 4951,4956 **** --- 4944,4951 ---- default: gcc_unreachable (); } + /* The default is that the reduction variable is the last in statement. */ + int reduc_index = op_type - 1; if (code == COND_EXPR && slp_node) return false; *************** vectorizable_reduction (gimple stmt, gim *** 5248,5254 **** if (!vec_stmt) /* transformation not required. */ { ! if (!vect_model_reduction_cost (stmt_info, epilog_reduc_code, ncopies)) return false; STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type; return true; --- 5243,5250 ---- if (!vec_stmt) /* transformation not required. */ { ! if (!vect_model_reduction_cost (stmt_info, epilog_reduc_code, ncopies, ! reduc_index)) return false; STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type; return true; Index: gcc/tree-vect-slp.c =================================================================== *** gcc/tree-vect-slp.c (revision 223482) --- gcc/tree-vect-slp.c (working copy) *************** vect_get_constant_vectors (tree op, slp_ *** 2664,2674 **** struct loop *loop; gimple_seq ctor_seq = NULL; if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def && reduc_index != -1) { ! op_num = reduc_index - 1; ! op = gimple_op (stmt, reduc_index); /* For additional copies (see the explanation of NUMBER_OF_COPIES below) we need either neutral operands or the original operands. See get_initial_def_for_reduction() for details. */ --- 2664,2677 ---- struct loop *loop; gimple_seq ctor_seq = NULL; + vector_type = get_vectype_for_scalar_type (TREE_TYPE (op)); + nunits = TYPE_VECTOR_SUBPARTS (vector_type); + if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def && reduc_index != -1) { ! op_num = reduc_index; ! op = gimple_op (stmt, op_num + 1); /* For additional copies (see the explanation of NUMBER_OF_COPIES below) we need either neutral operands or the original operands. See get_initial_def_for_reduction() for details. */ *************** vect_get_constant_vectors (tree op, slp_ *** 2676,2681 **** --- 2679,2685 ---- { case WIDEN_SUM_EXPR: case DOT_PROD_EXPR: + case SAD_EXPR: case PLUS_EXPR: case MINUS_EXPR: case BIT_IOR_EXPR: *************** vect_get_constant_vectors (tree op, slp_ *** 2716,2721 **** --- 2720,2726 ---- break; default: + gcc_assert (!GROUP_FIRST_ELEMENT (stmt_vinfo)); neutral_op = NULL; } } *************** vect_get_constant_vectors (tree op, slp_ *** 2735,2744 **** else constant_p = false; - vector_type = get_vectype_for_scalar_type (TREE_TYPE (op)); - gcc_assert (vector_type); - nunits = TYPE_VECTOR_SUBPARTS (vector_type); - /* NUMBER_OF_COPIES is the number of times we need to use the same values in created vectors. It is greater than 1 if unrolling is performed. --- 2740,2745 ---- Index: gcc/tree-vect-stmts.c =================================================================== *** gcc/tree-vect-stmts.c (revision 223482) --- gcc/tree-vect-stmts.c (working copy) *************** vect_mark_stmts_to_be_vectorized (loop_v *** 812,819 **** for (; i < gimple_num_ops (stmt); i++) { op = gimple_op (stmt, i); ! if (!process_use (stmt, op, loop_vinfo, live_p, relevant, ! &worklist, false)) return false; } } --- 812,820 ---- for (; i < gimple_num_ops (stmt); i++) { op = gimple_op (stmt, i); ! if (TREE_CODE (op) == SSA_NAME ! && !process_use (stmt, op, loop_vinfo, live_p, relevant, ! &worklist, false)) return false; } }