This finally fixes PR59058. Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.
Richard. 2013-12-05 Richard Biener <rguent...@suse.de> PR tree-optimization/59058 * tree-vectorizer.h (struct _loop_vec_info): Add num_itersm1 member. (LOOP_VINFO_NITERSM1): New macro. * tree-vect-loop-manip.c (slpeel_tree_peel_loop_to_edge): Express the vector loop entry test in terms of scalar latch executions. (vect_do_peeling_for_alignment): Update LOOP_VINFO_NITERSM1. * tree-vect-loop.c (vect_get_loop_niters): Also return the number of latch executions. (new_loop_vec_info): Initialize LOOP_VINFO_NITERSM1. (vect_analyze_loop_form): Likewise. (vect_generate_tmps_on_preheader): Compute the number of vectorized iterations differently. * gcc.dg/torture/pr59058.c: New testcase. Index: trunk/gcc/testsuite/gcc.dg/torture/pr59058.c =================================================================== *** /dev/null 1970-01-01 00:00:00.000000000 +0000 --- trunk/gcc/testsuite/gcc.dg/torture/pr59058.c 2013-12-05 14:51:54.231300709 +0100 *************** *** 0 **** --- 1,19 ---- + /* { dg-do run } */ + + extern void abort (void); + + short b = 0; + + int + main () + { + int c = 0; + l1: + b++; + c |= b; + if (b) + goto l1; + if (c != -1) + abort (); + return 0; + } Index: trunk/gcc/tree-vect-loop-manip.c =================================================================== *** trunk.orig/gcc/tree-vect-loop-manip.c 2013-12-05 12:27:23.000000000 +0100 --- trunk/gcc/tree-vect-loop-manip.c 2013-12-05 14:50:59.468666726 +0100 *************** slpeel_tree_peel_loop_to_edge (struct lo *** 1061,1067 **** gimple_stmt_iterator gsi; edge exit_e = single_exit (loop); source_location loop_loc; - tree cost_pre_condition = NULL_TREE; /* There are many aspects to how likely the first loop is going to be executed. Without histogram we can't really do good job. Simply set it to 2/3, so the first loop is not reordered to the end of function and --- 1061,1066 ---- *************** slpeel_tree_peel_loop_to_edge (struct lo *** 1263,1283 **** /* Epilogue peeling. */ if (!update_first_loop_count) { pre_condition = ! fold_build2 (LE_EXPR, boolean_type_node, *first_niters, ! build_int_cst (TREE_TYPE (*first_niters), 0)); ! if (check_profitability) ! { ! tree scalar_loop_iters ! = unshare_expr (LOOP_VINFO_NITERS_UNCHANGED ! (loop_vec_info_for_loop (loop))); ! cost_pre_condition = ! fold_build2 (LE_EXPR, boolean_type_node, scalar_loop_iters, ! build_int_cst (TREE_TYPE (scalar_loop_iters), th)); ! ! pre_condition = fold_build2 (TRUTH_OR_EXPR, boolean_type_node, ! cost_pre_condition, pre_condition); ! } if (cond_expr) { pre_condition = --- 1262,1278 ---- /* Epilogue peeling. */ if (!update_first_loop_count) { + loop_vec_info loop_vinfo = loop_vec_info_for_loop (loop); + tree scalar_loop_iters = LOOP_VINFO_NITERSM1 (loop_vinfo); + unsigned limit = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 1; + if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)) + limit = limit + 1; + if (check_profitability + && th > limit) + limit = th; pre_condition = ! fold_build2 (LT_EXPR, boolean_type_node, scalar_loop_iters, ! build_int_cst (TREE_TYPE (scalar_loop_iters), limit)); if (cond_expr) { pre_condition = *************** vect_do_peeling_for_alignment (loop_vec_ *** 1922,1927 **** --- 1917,1925 ---- /* Update number of times loop executes. */ LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR, TREE_TYPE (ni_name), ni_name, niters_of_prolog_loop); + LOOP_VINFO_NITERSM1 (loop_vinfo) = fold_build2 (MINUS_EXPR, + TREE_TYPE (ni_name), + LOOP_VINFO_NITERSM1 (loop_vinfo), niters_of_prolog_loop); if (types_compatible_p (sizetype, TREE_TYPE (niters_of_prolog_loop))) wide_prolog_niters = niters_of_prolog_loop; Index: trunk/gcc/tree-vect-loop.c =================================================================== *** trunk.orig/gcc/tree-vect-loop.c 2013-12-05 12:27:23.000000000 +0100 --- trunk/gcc/tree-vect-loop.c 2013-12-05 14:36:14.917389587 +0100 *************** vect_analyze_scalar_cycles (loop_vec_inf *** 791,802 **** /* Function vect_get_loop_niters. Determine how many iterations the loop is executed and place it ! in NUMBER_OF_ITERATIONS. Return the loop exit condition. */ static gimple ! vect_get_loop_niters (struct loop *loop, tree *number_of_iterations) { tree niters; --- 791,804 ---- /* Function vect_get_loop_niters. Determine how many iterations the loop is executed and place it ! in NUMBER_OF_ITERATIONS. Place the number of latch iterations ! in NUMBER_OF_ITERATIONSM1. Return the loop exit condition. */ static gimple ! vect_get_loop_niters (struct loop *loop, tree *number_of_iterations, ! tree *number_of_iterationsm1) { tree niters; *************** vect_get_loop_niters (struct loop *loop, *** 805,816 **** "=== get_loop_niters ===\n"); niters = number_of_latch_executions (loop); /* We want the number of loop header executions which is the number of latch executions plus one. ??? For UINT_MAX latch executions this number overflows to zero for loops like do { n++; } while (n != 0); */ if (niters && !chrec_contains_undetermined (niters)) ! niters = fold_build2 (PLUS_EXPR, TREE_TYPE (niters), niters, build_int_cst (TREE_TYPE (niters), 1)); *number_of_iterations = niters; --- 807,820 ---- "=== get_loop_niters ===\n"); niters = number_of_latch_executions (loop); + *number_of_iterationsm1 = niters; + /* We want the number of loop header executions which is the number of latch executions plus one. ??? For UINT_MAX latch executions this number overflows to zero for loops like do { n++; } while (n != 0); */ if (niters && !chrec_contains_undetermined (niters)) ! niters = fold_build2 (PLUS_EXPR, TREE_TYPE (niters), unshare_expr (niters), build_int_cst (TREE_TYPE (niters), 1)); *number_of_iterations = niters; *************** new_loop_vec_info (struct loop *loop) *** 916,921 **** --- 920,926 ---- gcc_assert (nbbs == loop->num_nodes); LOOP_VINFO_BBS (res) = bbs; + LOOP_VINFO_NITERSM1 (res) = NULL; LOOP_VINFO_NITERS (res) = NULL; LOOP_VINFO_NITERS_UNCHANGED (res) = NULL; LOOP_VINFO_COST_MODEL_MIN_ITERS (res) = 0; *************** vect_analyze_loop_form (struct loop *loo *** 1071,1077 **** { loop_vec_info loop_vinfo; gimple loop_cond; ! tree number_of_iterations = NULL; loop_vec_info inner_loop_vinfo = NULL; if (dump_enabled_p ()) --- 1076,1082 ---- { loop_vec_info loop_vinfo; gimple loop_cond; ! tree number_of_iterations = NULL, number_of_iterationsm1 = NULL; loop_vec_info inner_loop_vinfo = NULL; if (dump_enabled_p ()) *************** vect_analyze_loop_form (struct loop *loo *** 1246,1252 **** } } ! loop_cond = vect_get_loop_niters (loop, &number_of_iterations); if (!loop_cond) { if (dump_enabled_p ()) --- 1251,1258 ---- } } ! loop_cond = vect_get_loop_niters (loop, &number_of_iterations, ! &number_of_iterationsm1); if (!loop_cond) { if (dump_enabled_p ()) *************** vect_analyze_loop_form (struct loop *loo *** 1280,1285 **** --- 1286,1292 ---- } loop_vinfo = new_loop_vec_info (loop); + LOOP_VINFO_NITERSM1 (loop_vinfo) = number_of_iterationsm1; LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations; LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = number_of_iterations; *************** vect_generate_tmps_on_preheader (loop_ve *** 5637,5648 **** tree var; tree ratio_name; tree ratio_mult_vf_name; - tree ni = LOOP_VINFO_NITERS (loop_vinfo); int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo)); tree log_vf; ! log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf)); /* If epilogue loop is required because of data accesses with gaps, we subtract one iteration from the total number of iterations here for --- 5644,5654 ---- tree var; tree ratio_name; tree ratio_mult_vf_name; int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo)); tree log_vf; ! log_vf = build_int_cst (TREE_TYPE (ni_name), exact_log2 (vf)); /* If epilogue loop is required because of data accesses with gaps, we subtract one iteration from the total number of iterations here for *************** vect_generate_tmps_on_preheader (loop_ve *** 5654,5660 **** build_one_cst (TREE_TYPE (ni_name))); if (!is_gimple_val (ni_minus_gap_name)) { ! var = create_tmp_var (TREE_TYPE (ni), "ni_gap"); gimple stmts = NULL; ni_minus_gap_name = force_gimple_operand (ni_minus_gap_name, &stmts, true, var); --- 5660,5666 ---- build_one_cst (TREE_TYPE (ni_name))); if (!is_gimple_val (ni_minus_gap_name)) { ! var = create_tmp_var (TREE_TYPE (ni_name), "ni_gap"); gimple stmts = NULL; ni_minus_gap_name = force_gimple_operand (ni_minus_gap_name, &stmts, true, var); *************** vect_generate_tmps_on_preheader (loop_ve *** 5665,5676 **** ni_minus_gap_name = ni_name; /* Create: ratio = ni >> log2(vf) */ ! ! ratio_name = fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_minus_gap_name), ! ni_minus_gap_name, log_vf); if (!is_gimple_val (ratio_name)) { ! var = create_tmp_var (TREE_TYPE (ni), "bnd"); gimple stmts = NULL; ratio_name = force_gimple_operand (ratio_name, &stmts, true, var); gsi_insert_seq_on_edge_immediate (pe, stmts); --- 5671,5692 ---- ni_minus_gap_name = ni_name; /* Create: ratio = ni >> log2(vf) */ ! /* ??? As we have ni == number of latch executions + 1, ni could ! have overflown to zero. So avoid computing ratio based on ni ! but compute it using the fact that we know ratio will be at least ! one, thus via (ni - vf) >> log2(vf) + 1. */ ! ratio_name ! = fold_build2 (PLUS_EXPR, TREE_TYPE (ni_name), ! fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ! fold_build2 (MINUS_EXPR, TREE_TYPE (ni_name), ! ni_minus_gap_name, ! build_int_cst ! (TREE_TYPE (ni_name), vf)), ! log_vf), ! build_int_cst (TREE_TYPE (ni_name), 1)); if (!is_gimple_val (ratio_name)) { ! var = create_tmp_var (TREE_TYPE (ni_name), "bnd"); gimple stmts = NULL; ratio_name = force_gimple_operand (ratio_name, &stmts, true, var); gsi_insert_seq_on_edge_immediate (pe, stmts); *************** vect_generate_tmps_on_preheader (loop_ve *** 5685,5691 **** ratio_name, log_vf); if (!is_gimple_val (ratio_mult_vf_name)) { ! var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf"); gimple stmts = NULL; ratio_mult_vf_name = force_gimple_operand (ratio_mult_vf_name, &stmts, true, var); --- 5701,5707 ---- ratio_name, log_vf); if (!is_gimple_val (ratio_mult_vf_name)) { ! var = create_tmp_var (TREE_TYPE (ni_name), "ratio_mult_vf"); gimple stmts = NULL; ratio_mult_vf_name = force_gimple_operand (ratio_mult_vf_name, &stmts, true, var); Index: trunk/gcc/tree-vectorizer.h =================================================================== *** trunk.orig/gcc/tree-vectorizer.h 2013-11-28 13:44:24.000000000 +0100 --- trunk/gcc/tree-vectorizer.h 2013-12-05 14:34:07.955901102 +0100 *************** typedef struct _loop_vec_info { *** 250,257 **** --- 250,260 ---- /* The loop basic blocks. */ basic_block *bbs; + /* Number of latch executions. */ + tree num_itersm1; /* Number of iterations. */ tree num_iters; + /* Number of iterations of the original loop. */ tree num_iters_unchanged; /* Minimum number of iterations below which vectorization is expected to *************** typedef struct _loop_vec_info { *** 349,357 **** /* Access Functions. */ #define LOOP_VINFO_LOOP(L) (L)->loop #define LOOP_VINFO_BBS(L) (L)->bbs #define LOOP_VINFO_NITERS(L) (L)->num_iters ! /* Since LOOP_VINFO_NITERS can change after prologue peeling ! retain total unchanged scalar loop iterations for cost model. */ #define LOOP_VINFO_NITERS_UNCHANGED(L) (L)->num_iters_unchanged #define LOOP_VINFO_COST_MODEL_MIN_ITERS(L) (L)->min_profitable_iters #define LOOP_VINFO_VECTORIZABLE_P(L) (L)->vectorizable --- 352,362 ---- /* Access Functions. */ #define LOOP_VINFO_LOOP(L) (L)->loop #define LOOP_VINFO_BBS(L) (L)->bbs + #define LOOP_VINFO_NITERSM1(L) (L)->num_itersm1 #define LOOP_VINFO_NITERS(L) (L)->num_iters ! /* Since LOOP_VINFO_NITERS and LOOP_VINFO_NITERSM1 can change after ! prologue peeling retain total unchanged scalar loop iterations for ! cost model. */ #define LOOP_VINFO_NITERS_UNCHANGED(L) (L)->num_iters_unchanged #define LOOP_VINFO_COST_MODEL_MIN_ITERS(L) (L)->min_profitable_iters #define LOOP_VINFO_VECTORIZABLE_P(L) (L)->vectorizable