"Kewen.Lin" <li...@linux.ibm.com> writes: > diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c > index ca68d04a919..1fac5898525 100644 > --- a/gcc/tree-vect-loop-manip.c > +++ b/gcc/tree-vect-loop-manip.c > @@ -420,8 +420,8 @@ vect_set_loop_controls_directly (class loop *loop, > loop_vec_info loop_vinfo, > rgroup_controls *rgc, tree niters, > tree niters_skip, bool might_wrap_p) > { > - tree compare_type = LOOP_VINFO_MASK_COMPARE_TYPE (loop_vinfo); > - tree iv_type = LOOP_VINFO_MASK_IV_TYPE (loop_vinfo); > + tree compare_type = LOOP_VINFO_COMPARE_TYPE (loop_vinfo); > + tree iv_type = LOOP_VINFO_IV_TYPE (loop_vinfo);
How about s/MASK/RGROUP/ instead? COMPARE_TYPE and IV_TYPE sound a bit too generic, and might give the impression that they're meaningful for classic full-vector vectorisation too. > @@ -748,13 +748,12 @@ vect_set_loop_condition_masked (class loop *loop, > loop_vec_info loop_vinfo, > } > > /* Like vect_set_loop_condition, but handle the case in which there > - are no loop masks. */ > + are no partial vectorization loops. */ Maybe: … in which the vector loop handles exactly VF scalars per iteration. > diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c > index 7ea75d6d095..b6e96f77f69 100644 > --- a/gcc/tree-vect-loop.c > +++ b/gcc/tree-vect-loop.c > @@ -155,6 +155,7 @@ along with GCC; see the file COPYING3. If not see > static void vect_estimate_min_profitable_iters (loop_vec_info, int *, int *); > static stmt_vec_info vect_is_simple_reduction (loop_vec_info, stmt_vec_info, > bool *, bool *); > +static bool known_niters_smaller_than_vf (loop_vec_info); Please instead define the function before its first caller. Adding “vect_” to the beginning of the name would probably be more consistent. > [...] > @@ -959,14 +960,41 @@ vect_get_max_nscalars_per_iter (loop_vec_info > loop_vinfo) > return res; > } > > +/* Calculate the minimal bits necessary to represent the maximal iteration > + count of loop with loop_vec_info LOOP_VINFO which is scaling with a given > + factor FACTOR. */ How about: /* Calculate the minimum precision necessary to represent: MAX_NITERS * FACTOR as an unsigned integer, where MAX_NITERS is the maximum number of loop header iterations for the original scalar form of LOOP_VINFO. */ > + > +static unsigned > +min_prec_for_max_niters (loop_vec_info loop_vinfo, unsigned int factor) Here too I think a “vect_” prefix would be more consistent. > +{ > + class loop *loop = LOOP_VINFO_LOOP (loop_vinfo); > + > + /* Get the maximum number of iterations that is representable > + in the counter type. */ > + tree ni_type = TREE_TYPE (LOOP_VINFO_NITERSM1 (loop_vinfo)); > + widest_int max_ni = wi::to_widest (TYPE_MAX_VALUE (ni_type)) + 1; > + > + /* Get a more refined estimate for the number of iterations. */ > + widest_int max_back_edges; > + if (max_loop_iterations (loop, &max_back_edges)) > + max_ni = wi::smin (max_ni, max_back_edges + 1); > + > + /* Account for factor, in which each bit is replicated N times. */ The “, in which each bit …” part no longer makes sense in this generic context. Probably best just to drop the comment altogether and… > + max_ni *= factor; > + > + /* Work out how many bits we need to represent the limit. */ > + unsigned int min_ni_width = wi::min_precision (max_ni, UNSIGNED); > + > + return min_ni_width; …change this to: /* Work out how many bits we need to represent the limit. */ return wi::min_precision (max_ni * factor, UNSIGNED); > [...] > @@ -6813,8 +6820,8 @@ vectorizable_reduction (loop_vec_info loop_vinfo, > { > if (dump_enabled_p ()) > dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, > - "can't use a fully-masked loop because no" > - " conditional operation is available.\n"); > + "can't use a partial vectorization loop because" > + " no conditional operation is available.\n"); Maybe “can't operate on partial vectors because…”. Same for the later changes. > @@ -9194,12 +9202,13 @@ optimize_mask_stores (class loop *loop) > } > > /* Decide whether it is possible to use a zero-based induction variable > - when vectorizing LOOP_VINFO with a fully-masked loop. If it is, > - return the value that the induction variable must be able to hold > - in order to ensure that the loop ends with an all-false mask. > + when vectorizing LOOP_VINFO with a partial vectorization loop. If Maybe ”…with partial vectors” > + it is, return the value that the induction variable must be able to > + hold in order to ensure that the loop ends with an all-false rgroup > + control like mask. > Return -1 otherwise. */ This was originally meant to be a single paragraph, so I think it reads better if the ”Return -1 otherwise.” is on the previous line. > [...] > @@ -9234,3 +9243,23 @@ vect_iv_limit_for_full_masking (loop_vec_info > loop_vinfo) > return iv_limit; > } > > +/* If we know the iteration count is smaller than vectorization factor, > return > + true, otherwise return false. */ Maybe: /* Return true if we know that the iteration count is smaller than the vectorization factor. Return false if it isn't, or if we can't be sure either way. */ > @@ -534,9 +534,10 @@ public: > elements that should be false in the first mask). */ > tree mask_skip_niters; > > - /* Type of the variables to use in the WHILE_ULT call for fully-masked > + /* Type of the variables to use in the loop closing comparison for > + partial vectorization, like WHILE_ULT call for fully-masked > loops. */ > - tree mask_compare_type; > + tree compare_type; Maybe: /* The type that the loop control IV should be converted to before testing which of the VF scalars are active and inactive. Only meaningful if LOOP_VINFO_USING_PARTIAL_VECTORS_P. */ > @@ -545,8 +546,8 @@ public: > is false and vectorized loop otherwise. */ > tree simd_if_cond; > > - /* Type of the IV to use in the WHILE_ULT call for fully-masked > - loops. */ > + /* Type of the IV to use in the loop closing comparison for partial > + vectorization, like WHILE_ULT call for fully-masked loops. */ > tree iv_type; Maybe: /* The type that the vector loop control IV should have when LOOP_VINFO_USING_PARTIAL_VECTORS_P is true. */ Thanks, Richard