On Mon, Nov 4, 2019 at 4:26 PM Richard Sandiford <richard.sandif...@arm.com> wrote: > > Once vect_analyze_loop has found a valid loop_vec_info X, we carry > on searching for alternatives if (1) X doesn't satisfy simdlen or > (2) we want to vectorise the epilogue of X. I have a patch that > optionally adds a third reason: we want to see if there are cheaper > alternatives to X. > > This patch restructures vect_analyze_loop so that it's easier > to add more reasons for continuing. There's supposed to be no > behavioural change. > > If we wanted to, we could allow vectorisation of epilogues once > loop->simdlen has been reached by changing "loop->simdlen" to > "simdlen" in the new vect_epilogues condition. That should be > a separate change though. > > This may conflict with Andre's fix for libgomp; I'll adjust if > that goes in first.
OK. > > 2019-11-04 Richard Sandiford <richard.sandif...@arm.com> > > gcc/ > * tree-vect-loop.c (vect_analyze_loop): Break out of the main > loop when we've finished, rather than returning directly from > the loop. Use a local variable to track whether we're still > searching for the preferred simdlen. Make vect_epilogues > record whether the next iteration should try to treat the > loop as an epilogue. > > Index: gcc/tree-vect-loop.c > =================================================================== > --- gcc/tree-vect-loop.c 2019-10-31 17:15:24.838521761 +0000 > +++ gcc/tree-vect-loop.c 2019-11-04 15:17:35.924940111 +0000 > @@ -2383,16 +2383,13 @@ vect_analyze_loop (class loop *loop, loo > poly_uint64 lowest_th = 0; > unsigned vectorized_loops = 0; > > - /* Only vectorize epilogues if PARAM_VECT_EPILOGUES_NOMASK is enabled, this > - is not a simd loop and it is the most inner loop. */ > - bool vect_epilogues > - = !loop->simdlen && loop->inner == NULL > - && PARAM_VALUE (PARAM_VECT_EPILOGUES_NOMASK); > + bool vect_epilogues = false; > + opt_result res = opt_result::success (); > + unsigned HOST_WIDE_INT simdlen = loop->simdlen; > while (1) > { > /* Check the CFG characteristics of the loop (nesting, entry/exit). */ > - opt_loop_vec_info loop_vinfo > - = vect_analyze_loop_form (loop, shared); > + opt_loop_vec_info loop_vinfo = vect_analyze_loop_form (loop, shared); > if (!loop_vinfo) > { > if (dump_enabled_p ()) > @@ -2407,67 +2404,70 @@ vect_analyze_loop (class loop *loop, loo > > if (orig_loop_vinfo) > LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo) = orig_loop_vinfo; > - else if (vect_epilogues && first_loop_vinfo) > + else if (vect_epilogues) > LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo) = first_loop_vinfo; > > - opt_result res = vect_analyze_loop_2 (loop_vinfo, fatal, &n_stmts); > + res = vect_analyze_loop_2 (loop_vinfo, fatal, &n_stmts); > if (next_size == 0) > autodetected_vector_size = loop_vinfo->vector_size; > > + loop->aux = NULL; > if (res) > { > LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1; > vectorized_loops++; > > - if ((loop->simdlen > - && maybe_ne (LOOP_VINFO_VECT_FACTOR (loop_vinfo), > - (unsigned HOST_WIDE_INT) loop->simdlen)) > - || vect_epilogues) > + /* Once we hit the desired simdlen for the first time, > + discard any previous attempts. */ > + if (simdlen > + && known_eq (LOOP_VINFO_VECT_FACTOR (loop_vinfo), simdlen)) > { > - if (first_loop_vinfo == NULL) > - { > - first_loop_vinfo = loop_vinfo; > - lowest_th > - = LOOP_VINFO_VERSIONING_THRESHOLD (first_loop_vinfo); > - loop->aux = NULL; > - } > - else > - { > - /* Keep track of vector sizes that we know we can vectorize > - the epilogue with. Only vectorize first epilogue. */ > - if (vect_epilogues > - && first_loop_vinfo->epilogue_vinfos.is_empty ()) > - { > - loop->aux = NULL; > - first_loop_vinfo->epilogue_vinfos.reserve (1); > - first_loop_vinfo->epilogue_vinfos.quick_push > (loop_vinfo); > - LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo) = > first_loop_vinfo; > - poly_uint64 th > - = LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo); > - gcc_assert (!LOOP_REQUIRES_VERSIONING (loop_vinfo) > - || maybe_ne (lowest_th, 0U)); > - /* Keep track of the known smallest versioning > - threshold. */ > - if (ordered_p (lowest_th, th)) > - lowest_th = ordered_min (lowest_th, th); > - } > - else > - delete loop_vinfo; > - } > + delete first_loop_vinfo; > + first_loop_vinfo = opt_loop_vec_info::success (NULL); > + LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo) = NULL; > + simdlen = 0; > } > - else > + > + if (first_loop_vinfo == NULL) > { > - delete first_loop_vinfo; > - return loop_vinfo; > + first_loop_vinfo = loop_vinfo; > + lowest_th = LOOP_VINFO_VERSIONING_THRESHOLD (first_loop_vinfo); > } > + else if (vect_epilogues) > + { > + first_loop_vinfo->epilogue_vinfos.safe_push (loop_vinfo); > + poly_uint64 th = LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo); > + gcc_assert (!LOOP_REQUIRES_VERSIONING (loop_vinfo) > + || maybe_ne (lowest_th, 0U)); > + /* Keep track of the known smallest versioning > + threshold. */ > + if (ordered_p (lowest_th, th)) > + lowest_th = ordered_min (lowest_th, th); > + } > + else > + delete loop_vinfo; > + > + /* Only vectorize epilogues if PARAM_VECT_EPILOGUES_NOMASK is > + enabled, this is not a simd loop and it is the innermost loop. > */ > + vect_epilogues = (!loop->simdlen > + && loop->inner == NULL > + && PARAM_VALUE (PARAM_VECT_EPILOGUES_NOMASK) > + /* For now only allow one epilogue loop. */ > + && first_loop_vinfo->epilogue_vinfos.is_empty ()); > + > + /* Commit to first_loop_vinfo if we have no reason to try > + alternatives. */ > + if (!simdlen && !vect_epilogues) > + break; > } > else > - delete loop_vinfo; > - > - if (fatal) > { > - gcc_checking_assert (first_loop_vinfo == NULL); > - return opt_loop_vec_info::propagate_failure (res); > + delete loop_vinfo; > + if (fatal) > + { > + gcc_checking_assert (first_loop_vinfo == NULL); > + break; > + } > } > > if (next_size < vector_sizes.length () > @@ -2476,24 +2476,7 @@ vect_analyze_loop (class loop *loop, loo > > if (next_size == vector_sizes.length () > || known_eq (autodetected_vector_size, 0U)) > - { > - if (first_loop_vinfo) > - { > - loop->aux = (loop_vec_info) first_loop_vinfo; > - if (dump_enabled_p ()) > - { > - dump_printf_loc (MSG_NOTE, vect_location, > - "***** Choosing vector size "); > - dump_dec (MSG_NOTE, first_loop_vinfo->vector_size); > - dump_printf (MSG_NOTE, "\n"); > - } > - LOOP_VINFO_VERSIONING_THRESHOLD (first_loop_vinfo) = lowest_th; > - > - return first_loop_vinfo; > - } > - else > - return opt_loop_vec_info::propagate_failure (res); > - } > + break; > > /* Try the next biggest vector size. */ > next_vector_size = vector_sizes[next_size++]; > @@ -2506,6 +2489,22 @@ vect_analyze_loop (class loop *loop, loo > dump_printf (MSG_NOTE, "\n"); > } > } > + > + if (first_loop_vinfo) > + { > + loop->aux = (loop_vec_info) first_loop_vinfo; > + if (dump_enabled_p ()) > + { > + dump_printf_loc (MSG_NOTE, vect_location, > + "***** Choosing vector size "); > + dump_dec (MSG_NOTE, first_loop_vinfo->vector_size); > + dump_printf (MSG_NOTE, "\n"); > + } > + LOOP_VINFO_VERSIONING_THRESHOLD (first_loop_vinfo) = lowest_th; > + return first_loop_vinfo; > + } > + > + return opt_loop_vec_info::propagate_failure (res); > } > > /* Return true if there is an in-order reduction function for CODE, storing