Hi all, Just looking for some feedback on the approach.
Currently the loop vectorizer can't vectorize the following typical loop for getting max value and index from an array: void test_vec(int *data, int n) { int best_i, best = 0; for (int i = 0; i < n; i++) { if (data[i] > best) { best = data[i]; best_i = i; } } data[best_i] = data[0]; data[0] = best; } This patch adds some support for this pattern. This patch addresses Bug 88259. Regression testing is still in progress, This patch does not work correctly with vect-epilogues-nomask, and the reason for that is still being investigated. gcc/ChangeLog: 2019-11-15 Joel Hutton <joel.hut...@arm.com> Tamar Christina <tamar.christ...@arm.com> PR tree-optimization/88259 * tree-vect-loop.c (vect_reassociating_reduction_simple_p): New function. (vect_recog_minmax_index_pattern): New function. (vect_is_simple_reduction): Add check for minmax pattern. (vect_model_reduction_cost): Add case for minmax pattern. (vect_create_epilog_for_reduction): Add fixup for minmax epilog. * tree-vectorizer.h (enum vect_reduction_type): Add INDEX_MINMAX_REDUCTION reduction type.
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index b600d3157457c3180d0456c4f66cbc57012e3c71..dc97dea38a504e8f9391e6d138aad0a2e3872b50 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -387,6 +387,83 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo) return opt_result::success (); } +static bool +vect_reassociating_reduction_simple_p (stmt_vec_info stmt_info, tree_code code, + tree *op0_out, tree *op1_out) +{ + loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info); + if (!loop_info) + return false; + + gassign *assign = dyn_cast <gassign *> (stmt_info->stmt); + if (!assign || gimple_assign_rhs_code (assign) != code) + return false; + + /* We don't allow changing the order of the computation in the inner-loop + when doing outer-loop vectorization. */ + class loop *loop = LOOP_VINFO_LOOP (loop_info); + if (loop && nested_in_vect_loop_p (loop, stmt_info)) + return false; + + *op0_out = gimple_assign_rhs1 (assign); + *op1_out = gimple_assign_rhs2 (assign); + return true; +} + + +static bool +vect_recog_minmax_index_pattern (stmt_vec_info stmt_vinfo, loop_vec_info loop_info) +{ + tree oprnd0, oprnd1; + gimple *last_stmt = stmt_vinfo->stmt; + vec_info *vinfo = stmt_vinfo->vinfo; + gimple *use_stmt; + use_operand_p use_p; + imm_use_iterator iter; + + /* Starting from LAST_STMT, follow the defs of its uses in search + of the above pattern. */ + + if (!vect_reassociating_reduction_simple_p (stmt_vinfo, MAX_EXPR, + &oprnd0, &oprnd1)) + return NULL; + + if (!is_a <gphi *> (SSA_NAME_DEF_STMT (oprnd1))) + return NULL; + + stmt_vec_info phy_vinfo = vinfo->lookup_def (oprnd1); + if (!phy_vinfo) + return NULL; + + basic_block top_bb = gimple_bb (last_stmt); + + FOR_EACH_IMM_USE_FAST (use_p, iter, oprnd1) + { + use_stmt = USE_STMT (use_p); + if (is_gimple_assign (use_stmt) + && gimple_assign_rhs_code (use_stmt) == COND_EXPR) + { + basic_block bb = gimple_bb (use_stmt); + + if (bb == top_bb + && gimple_uid (use_stmt) < gimple_uid (last_stmt)) + { + tree cond = gimple_assign_rhs1 (use_stmt); + if (TREE_CODE (cond) != LE_EXPR) + continue; + + stmt_vec_info ind_stmt = loop_info->lookup_stmt (use_stmt); + ind_stmt->reduc_related_stmt = stmt_vinfo; + stmt_vinfo->reduc_related_stmt = ind_stmt; + return true; + } + } + } + + return false; +} + + /* Function vect_is_simple_iv_evolution. @@ -2771,20 +2848,6 @@ pop: fail = true; break; } - /* Check there's only a single stmt the op is used on inside - of the loop. */ - imm_use_iterator imm_iter; - gimple *op_use_stmt; - unsigned cnt = 0; - FOR_EACH_IMM_USE_STMT (op_use_stmt, imm_iter, op) - if (!is_gimple_debug (op_use_stmt) - && flow_bb_inside_loop_p (loop, gimple_bb (op_use_stmt))) - cnt++; - if (cnt != 1) - { - fail = true; - break; - } tree_code use_code = gimple_assign_rhs_code (use_stmt); if (use_code == MINUS_EXPR) { @@ -2963,10 +3026,22 @@ vect_is_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info, return def_stmt_info; } + /* Detect if this is a reduction that we have special code to handle the use + of the reduction multiple times. */ + bool supported_multi_use_reduction + = vect_recog_minmax_index_pattern (def_stmt_info, loop_info); + + /* Update the reduction type for multi use reduction. */ + if (supported_multi_use_reduction) + STMT_VINFO_REDUC_TYPE (phi_info) = INDEX_MINMAX_REDUCTION; + + def_stmt_info->reduc_phi_node = phi_info; + /* If this isn't a nested cycle or if the nested cycle reduction value is used ouside of the inner loop we cannot handle uses of the reduction value. */ - if (nlatch_def_loop_uses > 1 || nphi_def_loop_uses > 1) + if (!supported_multi_use_reduction + && (nlatch_def_loop_uses > 1 || nphi_def_loop_uses > 1)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -3737,7 +3812,8 @@ vect_model_reduction_cost (stmt_vec_info stmt_info, internal_fn reduc_fn, { if (reduc_fn != IFN_LAST) { - if (reduction_type == COND_REDUCTION) + if (reduction_type == COND_REDUCTION + || reduction_type == INDEX_MINMAX_REDUCTION) { /* An EQ stmt and an COND_EXPR stmt. */ epilogue_cost += record_stmt_cost (cost_vec, 2, @@ -4764,6 +4840,14 @@ vect_create_epilog_for_reduction (stmt_vec_info stmt_info, new_temp = make_ssa_name (new_scalar_dest, epilog_stmt); gimple_set_lhs (epilog_stmt, new_temp); + + if (stmt_info->reduc_related_stmt + && stmt_info->reduc_related_stmt->reduc_phi_node + && stmt_info->reduc_related_stmt->reduc_phi_node->reduc_type == INDEX_MINMAX_REDUCTION) + { + stmt_info->reduc_minmax_epilog_stmt = epilog_stmt; + } + gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); if ((STMT_VINFO_REDUC_TYPE (reduc_info) == INTEGER_INDUC_COND_REDUCTION) @@ -5008,6 +5092,7 @@ vect_create_epilog_for_reduction (stmt_vec_info stmt_info, gimple_seq stmts = NULL; new_temp = gimple_convert (&stmts, vectype1, new_temp); + tree maxresultvec = new_temp; for (elt_offset = nelements / 2; elt_offset >= 1; elt_offset /= 2) @@ -5034,7 +5119,54 @@ vect_create_epilog_for_reduction (stmt_vec_info stmt_info, epilog_stmt = gimple_build_assign (new_scalar_dest, rhs); new_temp = make_ssa_name (new_scalar_dest, epilog_stmt); gimple_assign_set_lhs (epilog_stmt, new_temp); + + stmt_info->reduc_minmax_scalar_result = epilog_stmt; gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); + + /* Fixup the minmax epilog. */ + if (stmt_info->reduc_related_stmt + && stmt_info->reduc_related_stmt->reduc_minmax_epilog_stmt) + { + gimple_stmt_iterator gsi = gsi_for_stmt (stmt_info->reduc_related_stmt->reduc_minmax_epilog_stmt); + gimple *pre_stmt = NULL; + gimple *mm_i_ep_stmt = stmt_info->reduc_related_stmt->reduc_minmax_epilog_stmt; + tree mm_i_lhs = gimple_get_lhs (mm_i_ep_stmt); + tree mm_i_rhs = gimple_call_arg (mm_i_ep_stmt, 0); + tree vectype = TREE_TYPE (mm_i_rhs); + /* Scalar result of min/max in minmax reduction. */ + tree maxval = gimple_get_lhs (stmt_info->reduc_minmax_scalar_result); + tree maxvec = build_vector_from_val (vectype, maxval); + gimple* maxvec_decl = gimple_build_assign (make_ssa_name (vectype), + maxvec); + gsi_insert_before (&gsi, maxvec_decl, GSI_SAME_STMT); + tree maxvec_lhs = gimple_get_lhs (maxvec_decl); + enum tree_code cond_code = EQ_EXPR; + tree vec_comp_type = build_same_sized_truth_vector_type (vectype); + /* Produce a mask that gives the position of max element in max + vector. */ + tree mask = build2 (cond_code, vec_comp_type, maxvec_lhs, + maxresultvec); + tree _mask_ssa_name = make_ssa_name (vec_comp_type); + gimple* _mask_decl = gimple_build_assign (_mask_ssa_name, + mask); + gsi_insert_before (&gsi, _mask_decl, GSI_SAME_STMT); + tree mask_ssa_name = make_ssa_name (vectype); + gimple* mask_decl = gimple_build_assign (mask_ssa_name, CONVERT_EXPR, _mask_ssa_name); + gsi_insert_before (&gsi, mask_decl, GSI_SAME_STMT); + /* Apply the mask to the index vector to select index value in + corresponding position. */ + tree new_ssa = make_ssa_name (vectype); + pre_stmt = gimple_build_assign (new_ssa, BIT_AND_EXPR, mask_ssa_name, mm_i_rhs); + gsi_insert_before (&gsi, pre_stmt, GSI_SAME_STMT); + /* Reduce the index vector to a scalar result. */ + gimple* new_stmt = gimple_build_call_internal (IFN_REDUC_MAX, + 1, + gimple_get_lhs (pre_stmt)); + gimple_set_lhs (new_stmt, mm_i_lhs); + gsi = gsi_for_stmt (mm_i_ep_stmt); + gsi_replace (&gsi, new_stmt, 1); + } + scalar_results.safe_push (new_temp); } else diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index e9575a184ad02787cbdc6ea9059ef1dc35fbca94..8ff6266ecf08213a0f19191c4091bf10b4d6ae37 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -85,7 +85,17 @@ enum vect_reduction_type { res = res OP val[i]; (with no reassocation). */ - FOLD_LEFT_REDUCTION + FOLD_LEFT_REDUCTION, + + /* Reduction of an if selecting both the maximum/minimum element and the + corresponding index. + + for (int i = 0; i < n; i++) { + if (data[i] > best) { + best = data[i]; + best_i = i; + } */ + INDEX_MINMAX_REDUCTION }; #define VECTORIZABLE_CYCLE_DEF(D) (((D) == vect_reduction_def) \ @@ -967,6 +977,15 @@ public: pattern). */ stmt_vec_info related_stmt; + + stmt_vec_info reduc_related_stmt; + + gimple* reduc_minmax_epilog_stmt; + + gimple* reduc_minmax_scalar_result; + + stmt_vec_info reduc_phi_node; + /* Used to keep a sequence of def stmts of a pattern stmt if such exists. The sequence is attached to the original statement rather than the pattern statement. */