Richard Biener <rguent...@suse.de> writes: >> > [...] >> > @@ -2898,43 +2899,63 @@ vect_joust_loop_vinfos (loop_vec_info >> > new_loop_vinfo, >> > return true; >> > } >> > >> > -/* If LOOP_VINFO is already a main loop, return it unmodified. Otherwise >> > - try to reanalyze it as a main loop. Return the loop_vinfo on success >> > - and null on failure. */ >> > +/* Analyze LOOP with VECTOR_MODE and as epilogue if MAIN_LOOP_VINFO is >> > + not NULL. Process the analyzed loop with PROCESS even if analysis >> > + failed. Sets *N_STMTS and FATAL according to the analysis. >> > + Return the loop_vinfo on success and wrapped null on failure. */ >> > >> > -static loop_vec_info >> > -vect_reanalyze_as_main_loop (loop_vec_info loop_vinfo, unsigned int >> > *n_stmts) >> > +static opt_loop_vec_info >> > +vect_analyze_loop_1 (class loop *loop, vec_info_shared *shared, >> > + machine_mode vector_mode, loop_vec_info main_loop_vinfo, >> > + unsigned int *n_stmts, bool &fatal, >> > + std::function<void(loop_vec_info)> process = nullptr) >> > { >> > - if (!LOOP_VINFO_EPILOGUE_P (loop_vinfo)) >> > - return loop_vinfo; >> > + /* Check the CFG characteristics of the loop (nesting, entry/exit). */ >> > + opt_loop_vec_info loop_vinfo = vect_analyze_loop_form (loop, shared); >> > + if (!loop_vinfo) >> > + { >> > + if (dump_enabled_p ()) >> > + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, >> > + "bad loop form.\n"); >> > + gcc_checking_assert (main_loop_vinfo == NULL); >> > + return loop_vinfo; >> > + } >> > + loop_vinfo->vector_mode = vector_mode; >> > >> > - if (dump_enabled_p ()) >> > - dump_printf_loc (MSG_NOTE, vect_location, >> > - "***** Reanalyzing as a main loop with vector mode %s\n", >> > - GET_MODE_NAME (loop_vinfo->vector_mode)); >> > + if (main_loop_vinfo) >> > + LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo) = main_loop_vinfo; >> > >> > - struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); >> > - vec_info_shared *shared = loop_vinfo->shared; >> > - opt_loop_vec_info main_loop_vinfo = vect_analyze_loop_form (loop, >> > shared); >> > - gcc_assert (main_loop_vinfo); >> > + /* Run the main analysis. */ >> > + fatal = false; >> >> Think this should be at the top, since we have an early return above. >> The early return should be fatal. > > Indeed. I've split and split out vect_analyze_loop_form instead, the > failing part should only be required once for each loop.
Ah, yeah, agree that's nicer. > [...] > >> > + if (!vect_epilogues) >> >> !vect_epilogues is correct for current uses, but I think the original >> !LOOP_VINFO_EPILOGUE_P (loop_vinfo) was more general. As mentioned above, >> in principle there's no reason why we couldn't reanalyse a loop as a >> main loop if we fail to analyse it as an epilogue. > > OK, restored that. > > The following is mainly the original reorg with the additional > refactoring of vect_analyze_loop_form. I think I'll put this in > before rewriting the main iteration to first only analyze main > loops (and then possibly unrolled main loops) and only after > settling for the cheapest main loop consider epilogue > vectorization. > > As you said the original approach of saving extra analyses by > using epilogue analysis as main loop analysis became moot and > with partial vectors the re-analysis as epilogue wasn't > re-usable anyway. What we could eventually remember is > modes that fail vectorization, those will likely not succeed > when analyzed in epilogue context either. > > Bootstrapped and tested on x86_64-unknown-linux-gnu. > > Since this is refactoring that should not change behavior > but re-organizing the analysis loop might I'd like to put > this onto trunk as intermediate step. Is that OK? Yeah, looks good to me FWIW. Just a couple of small comments: > @@ -3023,43 +3023,36 @@ vect_analyze_loop (class loop *loop, vec_info_shared > *shared) > LOOP_VINFO fails when treated as an epilogue loop, succeeds when > treated as a standalone loop, and ends up being genuinely cheaper > than FIRST_LOOP_VINFO. */ > - if (vect_epilogues) > - LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo) = first_loop_vinfo; > > - res = vect_analyze_loop_2 (loop_vinfo, fatal, &n_stmts); > - if (mode_i == 0) > - autodetected_vector_mode = loop_vinfo->vector_mode; > - if (dump_enabled_p ()) > + bool fatal; > + auto cb = [&] (loop_vec_info loop_vinfo) > { > - if (res) > - dump_printf_loc (MSG_NOTE, vect_location, > - "***** Analysis succeeded with vector mode %s\n", > - GET_MODE_NAME (loop_vinfo->vector_mode)); > - else > - dump_printf_loc (MSG_NOTE, vect_location, > - "***** Analysis failed with vector mode %s\n", > - GET_MODE_NAME (loop_vinfo->vector_mode)); > - } > - > - loop->aux = NULL; > - > - if (!fatal) > - while (mode_i < vector_modes.length () > - && vect_chooses_same_modes_p (loop_vinfo, vector_modes[mode_i])) > - { > - if (dump_enabled_p ()) > - dump_printf_loc (MSG_NOTE, vect_location, > - "***** The result for vector mode %s would" > - " be the same\n", > - GET_MODE_NAME (vector_modes[mode_i])); > - mode_i += 1; > - } > + if (mode_i ==0) s/==0/== 0/ > + autodetected_vector_mode = loop_vinfo->vector_mode; > + if (!fatal) > + while (mode_i < vector_modes.length () > + && vect_chooses_same_modes_p (loop_vinfo, > + vector_modes[mode_i])) > + { > + if (dump_enabled_p ()) > + dump_printf_loc (MSG_NOTE, vect_location, > + "***** The result for vector mode %s would" > + " be the same\n", > + GET_MODE_NAME (vector_modes[mode_i])); > + mode_i += 1; > + } I guess the autodetected_vector_mode part is redundant when fatal, so perhaps we could avoid calling the callback for fatal failures and remove the !fatal above. Just a suggestion though, either way's fine with me. Thanks, Richard > + }; > + opt_loop_vec_info loop_vinfo > + = vect_analyze_loop_1 (loop, shared, &loop_form_info, > + next_vector_mode, > + vect_epilogues > + ? (loop_vec_info)first_loop_vinfo : NULL, > + &n_stmts, fatal, cb); > + if (fatal) > + break; > > - if (res) > + if (loop_vinfo) > { > - LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1; > - vectorized_loops++; > - > /* Once we hit the desired simdlen for the first time, > discard any previous attempts. */ > if (simdlen > @@ -3084,33 +3077,44 @@ vect_analyze_loop (class loop *loop, vec_info_shared > *shared) > if (vinfos.is_empty () > && vect_joust_loop_vinfos (loop_vinfo, first_loop_vinfo)) > { > - loop_vec_info main_loop_vinfo > - = vect_reanalyze_as_main_loop (loop_vinfo, &n_stmts); > - if (main_loop_vinfo == loop_vinfo) > + if (!LOOP_VINFO_EPILOGUE_P (loop_vinfo)) > { > delete first_loop_vinfo; > first_loop_vinfo = opt_loop_vec_info::success (NULL); > } > - else if (main_loop_vinfo > - && vect_joust_loop_vinfos (main_loop_vinfo, > - first_loop_vinfo)) > - { > - delete first_loop_vinfo; > - first_loop_vinfo = opt_loop_vec_info::success (NULL); > - delete loop_vinfo; > - loop_vinfo > - = opt_loop_vec_info::success (main_loop_vinfo); > - } > else > { > if (dump_enabled_p ()) > dump_printf_loc (MSG_NOTE, vect_location, > - "***** No longer preferring vector" > - " mode %s after reanalyzing the loop" > - " as a main loop\n", > + "***** Reanalyzing as a main loop " > + "with vector mode %s\n", > GET_MODE_NAME > - (main_loop_vinfo->vector_mode)); > - delete main_loop_vinfo; > + (loop_vinfo->vector_mode)); > + opt_loop_vec_info main_loop_vinfo > + = vect_analyze_loop_1 (loop, shared, &loop_form_info, > + loop_vinfo->vector_mode, > + NULL, &n_stmts, fatal); > + if (main_loop_vinfo > + && vect_joust_loop_vinfos (main_loop_vinfo, > + first_loop_vinfo)) > + { > + delete first_loop_vinfo; > + first_loop_vinfo = opt_loop_vec_info::success (NULL); > + delete loop_vinfo; > + loop_vinfo > + = opt_loop_vec_info::success (main_loop_vinfo); > + } > + else > + { > + if (dump_enabled_p ()) > + dump_printf_loc (MSG_NOTE, vect_location, > + "***** No longer preferring vector" > + " mode %s after reanalyzing the " > + " loop as a main loop\n", > + GET_MODE_NAME > + (loop_vinfo->vector_mode)); > + delete main_loop_vinfo; > + } > } > } > } > @@ -3159,16 +3163,6 @@ vect_analyze_loop (class loop *loop, vec_info_shared > *shared) > if (!simdlen && !vect_epilogues && !pick_lowest_cost_p) > break; > } > - else > - { > - delete loop_vinfo; > - loop_vinfo = opt_loop_vec_info::success (NULL); > - if (fatal) > - { > - gcc_checking_assert (first_loop_vinfo == NULL); > - break; > - } > - } > > /* Handle the case that the original loop can use partial > vectorization, but want to only adopt it for the epilogue. > diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h > index 73347ce1f4e..20eda72f829 100644 > --- a/gcc/tree-vectorizer.h > +++ b/gcc/tree-vectorizer.h > @@ -2061,8 +2061,17 @@ extern bool reduction_fn_for_scalar_code (enum > tree_code, internal_fn *); > > /* Drive for loop transformation stage. */ > extern class loop *vect_transform_loop (loop_vec_info, gimple *); > -extern opt_loop_vec_info vect_analyze_loop_form (class loop *, > - vec_info_shared *); > +struct vect_loop_form_info > +{ > + tree number_of_iterations; > + tree number_of_iterationsm1; > + tree assumptions; > + gcond *loop_cond; > + gcond *inner_loop_cond; > +}; > +extern opt_result vect_analyze_loop_form (class loop *, vect_loop_form_info > *); > +extern loop_vec_info vect_create_loop_vinfo (class loop *, vec_info_shared *, > + const vect_loop_form_info *); > extern bool vectorizable_live_operation (vec_info *, > stmt_vec_info, gimple_stmt_iterator *, > slp_tree, slp_instance, int,