On Thu, Jul 8, 2021 at 2:43 PM Richard Sandiford via Gcc-patches <gcc-patches@gcc.gnu.org> wrote: > > vect_create_epilog_for_reduction had a variable called new_phis. > It collected the statements that produce the exit block definitions > of the vector reduction accumulators. Although those statements > are indeed phis initially, they are often replaced with normal > statements later, leading to puzzling code like: > > FOR_EACH_VEC_ELT (new_phis, i, new_phi) > { > int bit_offset; > if (gimple_code (new_phi) == GIMPLE_PHI) > vec_temp = PHI_RESULT (new_phi); > else > vec_temp = gimple_assign_lhs (new_phi); > > Also, although the array collects statements, in practice all users want > the lhs instead. > > This patch therefore replaces new_phis with a vector of gimple values > called “reduc_inputs”. > > Also, reduction chains and ncopies>1 were handled with identical code > (and there was a comment saying so). The patch unites them into > a single “if”.
OK. Thanks, Richard. > gcc/ > * tree-vect-loop.c (vect_create_epilog_for_reduction): Replace > the new_phis vector with a reduc_inputs vector. Combine handling > of reduction chains and ncopies > 1. > --- > gcc/tree-vect-loop.c | 113 ++++++++++++++++--------------------------- > 1 file changed, 41 insertions(+), 72 deletions(-) > > diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c > index 8390ac80ca0..b7f73ca52c7 100644 > --- a/gcc/tree-vect-loop.c > +++ b/gcc/tree-vect-loop.c > @@ -5005,7 +5005,7 @@ vect_create_epilog_for_reduction (loop_vec_info > loop_vinfo, > imm_use_iterator imm_iter, phi_imm_iter; > use_operand_p use_p, phi_use_p; > gimple *use_stmt; > - auto_vec<gimple *> new_phis; > + auto_vec<tree> reduc_inputs; > int j, i; > auto_vec<tree> scalar_results; > unsigned int group_size = 1, k; > @@ -5017,7 +5017,6 @@ vect_create_epilog_for_reduction (loop_vec_info > loop_vinfo, > b2 = operation (b1) */ > bool slp_reduc = (slp_node && !REDUC_GROUP_FIRST_ELEMENT (stmt_info)); > bool direct_slp_reduc; > - tree new_phi_result; > tree induction_index = NULL_TREE; > > if (slp_node) > @@ -5215,7 +5214,7 @@ vect_create_epilog_for_reduction (loop_vec_info > loop_vinfo, > if (double_reduc) > loop = outer_loop; > exit_bb = single_exit (loop)->dest; > - new_phis.create (slp_node ? vec_num : ncopies); > + reduc_inputs.create (slp_node ? vec_num : ncopies); > for (unsigned i = 0; i < vec_num; i++) > { > if (slp_node) > @@ -5223,19 +5222,14 @@ vect_create_epilog_for_reduction (loop_vec_info > loop_vinfo, > else > def = gimple_get_lhs (STMT_VINFO_VEC_STMTS (rdef_info)[0]); > for (j = 0; j < ncopies; j++) > - { > + { > tree new_def = copy_ssa_name (def); > - phi = create_phi_node (new_def, exit_bb); > - if (j == 0) > - new_phis.quick_push (phi); > - else > - { > - def = gimple_get_lhs (STMT_VINFO_VEC_STMTS (rdef_info)[j]); > - new_phis.quick_push (phi); > - } > - > - SET_PHI_ARG_DEF (phi, single_exit (loop)->dest_idx, def); > - } > + phi = create_phi_node (new_def, exit_bb); > + if (j) > + def = gimple_get_lhs (STMT_VINFO_VEC_STMTS (rdef_info)[j]); > + SET_PHI_ARG_DEF (phi, single_exit (loop)->dest_idx, def); > + reduc_inputs.quick_push (new_def); > + } > } > > exit_gsi = gsi_after_labels (exit_bb); > @@ -5274,52 +5268,32 @@ vect_create_epilog_for_reduction (loop_vec_info > loop_vinfo, > a2 = operation (a1) > a3 = operation (a2), > > - we may end up with more than one vector result. Here we reduce them to > - one vector. */ > - if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) || direct_slp_reduc) > + we may end up with more than one vector result. Here we reduce them > + to one vector. > + > + The same is true if we couldn't use a single defuse cycle. */ > + if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) > + || direct_slp_reduc > + || ncopies > 1) > { > gimple_seq stmts = NULL; > - tree first_vect = PHI_RESULT (new_phis[0]); > - first_vect = gimple_convert (&stmts, vectype, first_vect); > - for (k = 1; k < new_phis.length (); k++) > + tree first_vect = gimple_convert (&stmts, vectype, reduc_inputs[0]); > + for (k = 1; k < reduc_inputs.length (); k++) > { > - gimple *next_phi = new_phis[k]; > - tree second_vect = PHI_RESULT (next_phi); > - second_vect = gimple_convert (&stmts, vectype, second_vect); > + tree second_vect = gimple_convert (&stmts, vectype, > reduc_inputs[k]); > first_vect = gimple_build (&stmts, code, vectype, > first_vect, second_vect); > } > gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT); > > - new_phi_result = first_vect; > - new_phis.truncate (0); > - new_phis.safe_push (SSA_NAME_DEF_STMT (first_vect)); > + reduc_inputs.truncate (0); > + reduc_inputs.safe_push (first_vect); > } > - /* Likewise if we couldn't use a single defuse cycle. */ > - else if (ncopies > 1) > - { > - gimple_seq stmts = NULL; > - tree first_vect = PHI_RESULT (new_phis[0]); > - first_vect = gimple_convert (&stmts, vectype, first_vect); > - for (int k = 1; k < ncopies; ++k) > - { > - tree second_vect = PHI_RESULT (new_phis[k]); > - second_vect = gimple_convert (&stmts, vectype, second_vect); > - first_vect = gimple_build (&stmts, code, vectype, > - first_vect, second_vect); > - } > - gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT); > - new_phi_result = first_vect; > - new_phis.truncate (0); > - new_phis.safe_push (SSA_NAME_DEF_STMT (first_vect)); > - } > - else > - new_phi_result = PHI_RESULT (new_phis[0]); > > if (STMT_VINFO_REDUC_TYPE (reduc_info) == COND_REDUCTION > && reduc_fn != IFN_LAST) > { > - /* For condition reductions, we have a vector (NEW_PHI_RESULT) > containing > + /* For condition reductions, we have a vector (REDUC_INPUTS 0) > containing > various data values where the condition matched and another vector > (INDUCTION_INDEX) containing all the indexes of those matches. We > need to extract the last matching index (which will be the index with > @@ -5350,7 +5324,7 @@ vect_create_epilog_for_reduction (loop_vec_info > loop_vinfo, > tree zero_vec = build_zero_cst (vectype); > > gimple_seq stmts = NULL; > - new_phi_result = gimple_convert (&stmts, vectype, new_phi_result); > + reduc_inputs[0] = gimple_convert (&stmts, vectype, reduc_inputs[0]); > gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT); > > /* Find maximum value from the vector of found indexes. */ > @@ -5370,7 +5344,7 @@ vect_create_epilog_for_reduction (loop_vec_info > loop_vinfo, > > /* Next we compare the new vector (MAX_INDEX_VEC) full of max indexes > with the vector (INDUCTION_INDEX) of found indexes, choosing values > - from the data vector (NEW_PHI_RESULT) for matches, 0 (ZERO_VEC) > + from the data vector (REDUC_INPUTS 0) for matches, 0 (ZERO_VEC) > otherwise. Only one value should match, resulting in a vector > (VEC_COND) with one data value and the rest zeros. > In the case where the loop never made any matches, every index will > @@ -5389,7 +5363,8 @@ vect_create_epilog_for_reduction (loop_vec_info > loop_vinfo, > zero. */ > tree vec_cond = make_ssa_name (vectype); > gimple *vec_cond_stmt = gimple_build_assign (vec_cond, VEC_COND_EXPR, > - vec_compare, > new_phi_result, > + vec_compare, > + reduc_inputs[0], > zero_vec); > gsi_insert_before (&exit_gsi, vec_cond_stmt, GSI_SAME_STMT); > > @@ -5437,7 +5412,7 @@ vect_create_epilog_for_reduction (loop_vec_info > loop_vinfo, > val = data_reduc[i], idx_val = induction_index[i]; > return val; */ > > - tree data_eltype = TREE_TYPE (TREE_TYPE (new_phi_result)); > + tree data_eltype = TREE_TYPE (TREE_TYPE (reduc_inputs[0])); > tree idx_eltype = TREE_TYPE (TREE_TYPE (induction_index)); > unsigned HOST_WIDE_INT el_size = tree_to_uhwi (TYPE_SIZE (idx_eltype)); > poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (TREE_TYPE > (induction_index)); > @@ -5461,7 +5436,7 @@ vect_create_epilog_for_reduction (loop_vec_info > loop_vinfo, > epilog_stmt = gimple_build_assign (val, BIT_FIELD_REF, > build3 (BIT_FIELD_REF, > data_eltype, > - new_phi_result, > + reduc_inputs[0], > bitsize_int (el_size), > bitsize_int (off))); > gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); > @@ -5513,10 +5488,10 @@ vect_create_epilog_for_reduction (loop_vec_info > loop_vinfo, > "Reduce using direct vector reduction.\n"); > > gimple_seq stmts = NULL; > - new_phi_result = gimple_convert (&stmts, vectype, new_phi_result); > - vec_elem_type = TREE_TYPE (TREE_TYPE (new_phi_result)); > + reduc_inputs[0] = gimple_convert (&stmts, vectype, reduc_inputs[0]); > + vec_elem_type = TREE_TYPE (TREE_TYPE (reduc_inputs[0])); > new_temp = gimple_build (&stmts, as_combined_fn (reduc_fn), > - vec_elem_type, new_phi_result); > + vec_elem_type, reduc_inputs[0]); > new_temp = gimple_convert (&stmts, scalar_type, new_temp); > gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT); > > @@ -5546,7 +5521,7 @@ vect_create_epilog_for_reduction (loop_vec_info > loop_vinfo, > neutral value. We can then do a normal reduction on each vector. */ > > /* Enforced by vectorizable_reduction. */ > - gcc_assert (new_phis.length () == 1); > + gcc_assert (reduc_inputs.length () == 1); > gcc_assert (pow2p_hwi (group_size)); > > slp_tree orig_phis_slp_node = slp_node_instance->reduc_phis; > @@ -5602,7 +5577,7 @@ vect_create_epilog_for_reduction (loop_vec_info > loop_vinfo, > > sel[j] = (index[j] == i); > > - which selects the elements of NEW_PHI_RESULT that should > + which selects the elements of REDUC_INPUTS[0] that should > be included in the result. */ > tree compare_val = build_int_cst (index_elt_type, i); > compare_val = build_vector_from_val (index_type, compare_val); > @@ -5611,11 +5586,11 @@ vect_create_epilog_for_reduction (loop_vec_info > loop_vinfo, > > /* Calculate the equivalent of: > > - vec = seq ? new_phi_result : vector_identity; > + vec = seq ? reduc_inputs[0] : vector_identity; > > VEC is now suitable for a full vector reduction. */ > tree vec = gimple_build (&seq, VEC_COND_EXPR, vectype, > - sel, new_phi_result, vector_identity); > + sel, reduc_inputs[0], vector_identity); > > /* Do the reduction and convert it to the appropriate type. */ > tree scalar = gimple_build (&seq, as_combined_fn (reduc_fn), > @@ -5630,7 +5605,7 @@ vect_create_epilog_for_reduction (loop_vec_info > loop_vinfo, > bool reduce_with_shift; > tree vec_temp; > > - gcc_assert (slp_reduc || new_phis.length () == 1); > + gcc_assert (slp_reduc || reduc_inputs.length () == 1); > > /* See if the target wants to do the final (shift) reduction > in a vector mode of smaller size and first reduce upper/lower > @@ -5640,7 +5615,7 @@ vect_create_epilog_for_reduction (loop_vec_info > loop_vinfo, > unsigned nunits = TYPE_VECTOR_SUBPARTS (vectype).to_constant (); > unsigned nunits1 = nunits; > if ((mode1 = targetm.vectorize.split_reduction (mode)) != mode > - && new_phis.length () == 1) > + && reduc_inputs.length () == 1) > { > nunits1 = GET_MODE_NUNITS (mode1).to_constant (); > /* For SLP reductions we have to make sure lanes match up, but > @@ -5672,7 +5647,7 @@ vect_create_epilog_for_reduction (loop_vec_info > loop_vinfo, > > /* First reduce the vector to the desired vector size we should > do shift reduction on by combining upper and lower halves. */ > - new_temp = new_phi_result; > + new_temp = reduc_inputs[0]; > while (nunits > nunits1) > { > nunits /= 2; > @@ -5751,7 +5726,7 @@ vect_create_epilog_for_reduction (loop_vec_info > loop_vinfo, > new_temp = make_ssa_name (vectype1); > epilog_stmt = gimple_build_assign (new_temp, code, dst1, dst2); > gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); > - new_phis[0] = epilog_stmt; > + reduc_inputs[0] = new_temp; > } > > if (reduce_with_shift && !slp_reduc) > @@ -5832,13 +5807,9 @@ vect_create_epilog_for_reduction (loop_vec_info > loop_vinfo, > int element_bitsize = tree_to_uhwi (bitsize); > tree compute_type = TREE_TYPE (vectype); > gimple_seq stmts = NULL; > - FOR_EACH_VEC_ELT (new_phis, i, new_phi) > + FOR_EACH_VEC_ELT (reduc_inputs, i, vec_temp) > { > int bit_offset; > - if (gimple_code (new_phi) == GIMPLE_PHI) > - vec_temp = PHI_RESULT (new_phi); > - else > - vec_temp = gimple_assign_lhs (new_phi); > new_temp = gimple_build (&stmts, BIT_FIELD_REF, compute_type, > vec_temp, bitsize, bitsize_zero_node); > > @@ -5929,11 +5900,10 @@ vect_create_epilog_for_reduction (loop_vec_info > loop_vinfo, > gimple_seq stmts = NULL; > if (double_reduc) > { > - new_phi = new_phis[0]; > gcc_assert (VECTOR_TYPE_P (TREE_TYPE (adjustment_def))); > adjustment_def = gimple_convert (&stmts, vectype, adjustment_def); > new_temp = gimple_build (&stmts, code, vectype, > - PHI_RESULT (new_phi), adjustment_def); > + reduc_inputs[0], adjustment_def); > } > else > { > @@ -5947,7 +5917,6 @@ vect_create_epilog_for_reduction (loop_vec_info > loop_vinfo, > epilog_stmt = gimple_seq_last_stmt (stmts); > gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT); > scalar_results[0] = new_temp; > - new_phis[0] = epilog_stmt; > } > > if (double_reduc)