This continues the series of patches cleaning up versioning / peeling in the vectorizer. This moves computation of whether to do a cost model check to a central place where it is also easy to see where it will eventually end up generated. Apart from this re-org this recognizes that runtime checks are pointless if we could have done the check fully at compile-time.
Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk. Richard. 2012-05-09 Richard Guenther <rguent...@suse.de> * tree-vectorizer.h (vect_loop_versioning): Adjust prototype. (vect_do_peeling_for_loop_bound): Likewise. (vect_do_peeling_for_alignment): Likewise. * tree-vect-loop-manip.c (conservative_cost_threshold): Remove. (vect_do_peeling_for_loop_bound): Get check_profitability and threshold as parameters. (vect_do_peeling_for_alignment): Likewise. (vect_loop_versioning): Likewise. * tree-vect-loop.c (vect_transform_loop): Compute check_profitability and threshold here. Control where to put the check here. Index: gcc/tree-vectorizer.h =================================================================== *** gcc/tree-vectorizer.h (revision 187316) --- gcc/tree-vectorizer.h (working copy) *************** extern LOC vect_loop_location; *** 807,816 **** in tree-vect-loop-manip.c. */ extern void slpeel_make_loop_iterate_ntimes (struct loop *, tree); extern bool slpeel_can_duplicate_loop_p (const struct loop *, const_edge); ! extern void vect_loop_versioning (loop_vec_info); extern void vect_do_peeling_for_loop_bound (loop_vec_info, tree *, ! tree, gimple_seq); ! extern void vect_do_peeling_for_alignment (loop_vec_info); extern LOC find_loop_location (struct loop *); extern bool vect_can_advance_ivs_p (loop_vec_info); --- 807,816 ---- in tree-vect-loop-manip.c. */ extern void slpeel_make_loop_iterate_ntimes (struct loop *, tree); extern bool slpeel_can_duplicate_loop_p (const struct loop *, const_edge); ! extern void vect_loop_versioning (loop_vec_info, unsigned int, bool); extern void vect_do_peeling_for_loop_bound (loop_vec_info, tree *, ! unsigned int, bool); ! extern void vect_do_peeling_for_alignment (loop_vec_info, unsigned int, bool); extern LOC find_loop_location (struct loop *); extern bool vect_can_advance_ivs_p (loop_vec_info); Index: gcc/tree-vect-loop-manip.c =================================================================== *** gcc/tree-vect-loop-manip.c (revision 187316) --- gcc/tree-vect-loop-manip.c (working copy) *************** vect_update_ivs_after_vectorizer (loop_v *** 1853,1886 **** } } - /* Return the more conservative threshold between the - min_profitable_iters returned by the cost model and the user - specified threshold, if provided. */ - - static unsigned int - conservative_cost_threshold (loop_vec_info loop_vinfo, - int min_profitable_iters) - { - unsigned int th; - int min_scalar_loop_bound; - - min_scalar_loop_bound = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND) - * LOOP_VINFO_VECT_FACTOR (loop_vinfo)) - 1); - - /* Use the cost model only if it is more conservative than user specified - threshold. */ - th = (unsigned) min_scalar_loop_bound; - if (min_profitable_iters - && (!min_scalar_loop_bound - || min_profitable_iters > min_scalar_loop_bound)) - th = (unsigned) min_profitable_iters; - - if (th && vect_print_dump_info (REPORT_COST)) - fprintf (vect_dump, "Profitability threshold is %u loop iterations.", th); - - return th; - } - /* Function vect_do_peeling_for_loop_bound Peel the last iterations of the loop represented by LOOP_VINFO. --- 1853,1858 ---- *************** conservative_cost_threshold (loop_vec_in *** 1896,1902 **** void vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio, ! tree cond_expr, gimple_seq cond_expr_stmt_list) { tree ni_name, ratio_mult_vf_name; struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); --- 1868,1874 ---- void vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio, ! unsigned int th, bool check_profitability) { tree ni_name, ratio_mult_vf_name; struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); *************** vect_do_peeling_for_loop_bound (loop_vec *** 1904,1913 **** edge update_e; basic_block preheader; int loop_num; - bool check_profitability = false; - unsigned int th = 0; - int min_profitable_iters; int max_iter; if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "=== vect_do_peeling_for_loop_bound ==="); --- 1876,1884 ---- edge update_e; basic_block preheader; int loop_num; int max_iter; + tree cond_expr = NULL_TREE; + gimple_seq cond_expr_stmt_list = NULL; if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "=== vect_do_peeling_for_loop_bound ==="); *************** vect_do_peeling_for_loop_bound (loop_vec *** 1925,1946 **** loop_num = loop->num; - /* If cost model check not done during versioning and - peeling for alignment. */ - if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo) - && !LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo) - && !LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) - && !cond_expr) - { - check_profitability = true; - - /* Get profitability threshold for vectorized loop. */ - min_profitable_iters = LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo); - - th = conservative_cost_threshold (loop_vinfo, - min_profitable_iters); - } - new_loop = slpeel_tree_peel_loop_to_edge (loop, single_exit (loop), &ratio_mult_vf_name, ni_name, false, th, check_profitability, --- 1896,1901 ---- *************** vect_do_peeling_for_loop_bound (loop_vec *** 1967,1973 **** by ratio_mult_vf_name steps. */ vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e); ! max_iter = MAX (LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 1, (int) th); record_niter_bound (new_loop, shwi_to_double_int (max_iter), false, true); if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "Setting upper bound of nb iterations for epilogue " --- 1922,1930 ---- by ratio_mult_vf_name steps. */ vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e); ! max_iter = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 1; ! if (check_profitability) ! max_iter = MAX (max_iter, (int) th); record_niter_bound (new_loop, shwi_to_double_int (max_iter), false, true); if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "Setting upper bound of nb iterations for epilogue " *************** vect_update_inits_of_drs (loop_vec_info *** 2158,2172 **** peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */ void ! vect_do_peeling_for_alignment (loop_vec_info loop_vinfo) { struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); tree niters_of_prolog_loop, ni_name; tree n_iters; tree wide_prolog_niters; struct loop *new_loop; - unsigned int th = 0; - int min_profitable_iters; int max_iter; if (vect_print_dump_info (REPORT_DETAILS)) --- 2115,2128 ---- peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */ void ! vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, ! unsigned int th, bool check_profitability) { struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); tree niters_of_prolog_loop, ni_name; tree n_iters; tree wide_prolog_niters; struct loop *new_loop; int max_iter; if (vect_print_dump_info (REPORT_DETAILS)) *************** vect_do_peeling_for_alignment (loop_vec_ *** 2178,2199 **** niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name); - /* Get profitability threshold for vectorized loop. */ - min_profitable_iters = LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo); - th = conservative_cost_threshold (loop_vinfo, - min_profitable_iters); - /* Peel the prolog loop and iterate it niters_of_prolog_loop. */ new_loop = slpeel_tree_peel_loop_to_edge (loop, loop_preheader_edge (loop), &niters_of_prolog_loop, ni_name, true, ! th, true, NULL_TREE, NULL); gcc_assert (new_loop); #ifdef ENABLE_CHECKING slpeel_verify_cfg_after_peeling (new_loop, loop); #endif ! max_iter = MAX (LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 1, (int) th); record_niter_bound (new_loop, shwi_to_double_int (max_iter), false, true); if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "Setting upper bound of nb iterations for prologue " --- 2134,2152 ---- niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name); /* Peel the prolog loop and iterate it niters_of_prolog_loop. */ new_loop = slpeel_tree_peel_loop_to_edge (loop, loop_preheader_edge (loop), &niters_of_prolog_loop, ni_name, true, ! th, check_profitability, NULL_TREE, NULL); gcc_assert (new_loop); #ifdef ENABLE_CHECKING slpeel_verify_cfg_after_peeling (new_loop, loop); #endif ! max_iter = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 1; ! if (check_profitability) ! max_iter = MAX (max_iter, (int) th); record_niter_bound (new_loop, shwi_to_double_int (max_iter), false, true); if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "Setting upper bound of nb iterations for prologue " *************** vect_create_cond_for_alias_checks (loop_ *** 2547,2553 **** *COND_EXPR_STMT_LIST. */ void ! vect_loop_versioning (loop_vec_info loop_vinfo) { struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); basic_block condition_bb; --- 2500,2507 ---- *COND_EXPR_STMT_LIST. */ void ! vect_loop_versioning (loop_vec_info loop_vinfo, ! unsigned int th, bool check_profitability) { struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); basic_block condition_bb; *************** vect_loop_versioning (loop_vec_info loop *** 2556,2580 **** basic_block new_exit_bb; edge new_exit_e, e; gimple orig_phi, new_phi; ! tree cond_expr; gimple_seq cond_expr_stmt_list = NULL; tree arg; unsigned prob = 4 * REG_BR_PROB_BASE / 5; gimple_seq gimplify_stmt_list = NULL; tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo); - int min_profitable_iters = 0; - unsigned int th; ! /* Get profitability threshold for vectorized loop. */ ! min_profitable_iters = LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo); ! ! th = conservative_cost_threshold (loop_vinfo, ! min_profitable_iters); ! ! cond_expr = fold_build2 (GT_EXPR, boolean_type_node, scalar_loop_iters, ! build_int_cst (TREE_TYPE (scalar_loop_iters), th)); ! cond_expr = force_gimple_operand_1 (cond_expr, &cond_expr_stmt_list, ! is_gimple_condexpr, NULL_TREE); if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)) vect_create_cond_for_align_checks (loop_vinfo, &cond_expr, --- 2510,2529 ---- basic_block new_exit_bb; edge new_exit_e, e; gimple orig_phi, new_phi; ! tree cond_expr = NULL_TREE; gimple_seq cond_expr_stmt_list = NULL; tree arg; unsigned prob = 4 * REG_BR_PROB_BASE / 5; gimple_seq gimplify_stmt_list = NULL; tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo); ! if (check_profitability) ! { ! cond_expr = fold_build2 (GT_EXPR, boolean_type_node, scalar_loop_iters, ! build_int_cst (TREE_TYPE (scalar_loop_iters), th)); ! cond_expr = force_gimple_operand_1 (cond_expr, &cond_expr_stmt_list, ! is_gimple_condexpr, NULL_TREE); ! } if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)) vect_create_cond_for_align_checks (loop_vinfo, &cond_expr, Index: gcc/tree-vect-loop.c =================================================================== *** gcc/tree-vect-loop.c (revision 187316) --- gcc/tree-vect-loop.c (working copy) *************** vect_transform_loop (loop_vec_info loop_ *** 5227,5251 **** bool grouped_store; bool slp_scheduled = false; unsigned int nunits; - tree cond_expr = NULL_TREE; - gimple_seq cond_expr_stmt_list = NULL; gimple stmt, pattern_stmt; gimple_seq pattern_def_seq = NULL; gimple_stmt_iterator pattern_def_si = gsi_none (); bool transform_pattern_stmt = false; if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "=== vec_transform_loop ==="); /* Peel the loop if there are data refs with unknown alignment. Only one data ref with unknown store is allowed. */ if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo)) ! vect_do_peeling_for_alignment (loop_vinfo); if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo) || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo)) ! vect_loop_versioning (loop_vinfo); /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a compile time constant), or it is a constant that doesn't divide by the --- 5227,5274 ---- bool grouped_store; bool slp_scheduled = false; unsigned int nunits; gimple stmt, pattern_stmt; gimple_seq pattern_def_seq = NULL; gimple_stmt_iterator pattern_def_si = gsi_none (); bool transform_pattern_stmt = false; + bool check_profitability; + int th; if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "=== vec_transform_loop ==="); + /* Use the more conservative vectorization threshold. If the number + of iterations is constant assume the cost check has been performed + by our caller. If the threshold makes all loops profitable that + run at least the vectorization factor number of times checking + is pointless, too. */ + th = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND) + * LOOP_VINFO_VECT_FACTOR (loop_vinfo)) - 1); + th = MAX (th, LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo)); + if (th >= LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 1 + && !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)) + { + if (vect_print_dump_info (REPORT_COST)) + fprintf (vect_dump, + "Profitability threshold is %d loop iterations.", th); + check_profitability = true; + } + /* Peel the loop if there are data refs with unknown alignment. Only one data ref with unknown store is allowed. */ if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo)) ! { ! vect_do_peeling_for_alignment (loop_vinfo, th, check_profitability); ! check_profitability = false; ! } if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo) || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo)) ! { ! vect_loop_versioning (loop_vinfo, th, check_profitability); ! check_profitability = false; ! } /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a compile time constant), or it is a constant that doesn't divide by the *************** vect_transform_loop (loop_vec_info loop_ *** 5260,5266 **** && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0) || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)) vect_do_peeling_for_loop_bound (loop_vinfo, &ratio, ! cond_expr, cond_expr_stmt_list); else ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)), LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor); --- 5283,5289 ---- && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0) || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)) vect_do_peeling_for_loop_bound (loop_vinfo, &ratio, ! th, check_profitability); else ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)), LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);