Hi, this patch commonizes the maximal iteration estimate logic in between SCEV and loop-iv. Both are now using loop->nb_iterations_upper_bound. I decided to keep same API for SCEV code as for RTL code, so I made estimated_loop_iterations and max_loop_iterations to not try to recompute bounds and ICE when invoked without SCEV fired on.
The patch updates RTL optimizers to use the estimated_loop_iterations and max_loop_iterations. This has few advantages: 1) loop unroller can now take into account estimates stored into loop structure by earlier pass (I think none exist though) It is however better then using expected_loop_iterations since profile might get out of date with expansion. 2) loop peeling code now use max iterations bounds. This makes it i.e. to peel vectorizer prologues/epilogues/scalar loops so -fpeel-loops now improves my low iteration count testcase by about 10% 3) Same for loop unswithcing. I am not really friend with the new double_int API. I copied some existing examples but find it ugly. Why do we miss operators for comparsions and division? Why from_*/to_* can't be a cast at least for basic integer types? Regtested/bootstrapped x86_64-linux, seems sane? I also wonder if loop vectorizer should not update the estimates after loop iteration count is reduced by vectorizing. Honza * loop-unswitch.c (unswitch_single_loop): Use estimated_loop_iterations_int to prevent unswitching when loop is known to not roll. * tree-ssa-loop-niter.c (estimated_loop_iterations): Do not segfault when SCEV is not initialized. (max_loop_iterations): Likewise. * tree-ssa-loop-unswitch.c (tree_ssa_unswitch_loops): Use estimated_loop_iterations_int to prevent unswithcing when loop is known to not roll. * tree-scalar-evolution.c (scev_initialized_p): New function. * tree-scalar-evolution.h (scev_initialized_p): Likewise. * loop-unroll.c (decide_peel_once_rolling): Use max_loop_iterations_int. (unroll_loop_constant_iterations): Update nb_iterations_upper_bound and nb_iterations_estimate. (decide_unroll_runtime_iterations): Use estimated_loop_iterations or max_loop_iterations; (unroll_loop_runtime_iterations): fix profile updating. (decide_peel_simple): Use estimated_loop_iterations and max_loop_iterations. (decide_unroll_stupid): Use estimated_loop_iterations ad max_loop_iterations. * loop-doloop.c (doloop_modify): Use max_loop_iterations_int. (doloop_optimize): Likewise. * loop-iv.c (iv_number_of_iterations): Use record_niter_bound. (find_simple_exit): Likewise. * cfgloop.h (struct niter_desc): Remove niter_max. Index: loop-unswitch.c =================================================================== *** loop-unswitch.c (revision 191867) --- loop-unswitch.c (working copy) *************** unswitch_single_loop (struct loop *loop, *** 257,262 **** --- 257,263 ---- rtx cond, rcond = NULL_RTX, conds, rconds, acond, cinsn; int repeat; edge e; + HOST_WIDE_INT iterations; /* Do not unswitch too much. */ if (num > PARAM_VALUE (PARAM_MAX_UNSWITCH_LEVEL)) *************** unswitch_single_loop (struct loop *loop, *** 299,305 **** } /* Nor if the loop usually does not roll. */ ! if (expected_loop_iterations (loop) < 1) { if (dump_file) fprintf (dump_file, ";; Not unswitching, loop iterations < 1\n"); --- 300,307 ---- } /* Nor if the loop usually does not roll. */ ! iterations = estimated_loop_iterations_int (loop); ! if (iterations >= 0 && iterations <= 1) { if (dump_file) fprintf (dump_file, ";; Not unswitching, loop iterations < 1\n"); Index: tree-ssa-loop-niter.c =================================================================== *** tree-ssa-loop-niter.c (revision 191867) --- tree-ssa-loop-niter.c (working copy) *************** estimate_numbers_of_iterations_loop (str *** 3012,3020 **** bool estimated_loop_iterations (struct loop *loop, double_int *nit) { ! estimate_numbers_of_iterations_loop (loop); if (!loop->any_estimate) ! return false; *nit = loop->nb_iterations_estimate; return true; --- 3012,3034 ---- bool estimated_loop_iterations (struct loop *loop, double_int *nit) { ! /* When SCEV information is available, try to update loop iterations ! estimate. Otherwise just return whatever we recorded earlier. */ ! if (scev_initialized_p ()) ! estimate_numbers_of_iterations_loop (loop); ! ! /* Even if the bound is not recorded, possibly we can derrive one from ! profile. */ if (!loop->any_estimate) ! { ! if (loop->header->count) ! { ! *nit = gcov_type_to_double_int ! (expected_loop_iterations_unbounded (loop) + 1); ! return true; ! } ! return false; ! } *nit = loop->nb_iterations_estimate; return true; *************** estimated_loop_iterations (struct loop * *** 3027,3033 **** bool max_loop_iterations (struct loop *loop, double_int *nit) { ! estimate_numbers_of_iterations_loop (loop); if (!loop->any_upper_bound) return false; --- 3041,3050 ---- bool max_loop_iterations (struct loop *loop, double_int *nit) { ! /* When SCEV information is available, try to update loop iterations ! estimate. Otherwise just return whatever we recorded earlier. */ ! if (scev_initialized_p ()) ! estimate_numbers_of_iterations_loop (loop); if (!loop->any_upper_bound) return false; Index: tree-ssa-loop-unswitch.c =================================================================== *** tree-ssa-loop-unswitch.c (revision 191867) --- tree-ssa-loop-unswitch.c (working copy) *************** tree_ssa_unswitch_loops (void) *** 78,83 **** --- 78,84 ---- loop_iterator li; struct loop *loop; bool changed = false; + HOST_WIDE_INT iterations; /* Go through inner loops (only original ones). */ FOR_EACH_LOOP (li, loop, LI_ONLY_INNERMOST) *************** tree_ssa_unswitch_loops (void) *** 102,107 **** --- 103,118 ---- continue; } + /* If the loop is not expected to iterate, there is no need + for unswitching. */ + iterations = estimated_loop_iterations_int (loop); + if (iterations >= 0 && iterations <= 1) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, ";; Not unswitching, loop is not expected to iterate\n"); + continue; + } + changed |= tree_unswitch_single_loop (loop, 0); } Index: tree-scalar-evolution.c =================================================================== *** tree-scalar-evolution.c (revision 191867) --- tree-scalar-evolution.c (working copy) *************** scev_initialize (void) *** 3124,3129 **** --- 3124,3137 ---- } } + /* Return true if SCEV is initialized. */ + + bool + scev_initialized_p (void) + { + return scalar_evolution_info != NULL; + } + /* Cleans up the information cached by the scalar evolutions analysis in the hash table. */ Index: tree-scalar-evolution.h =================================================================== *** tree-scalar-evolution.h (revision 191867) --- tree-scalar-evolution.h (working copy) *************** extern tree number_of_exit_cond_executio *** 27,32 **** --- 27,33 ---- extern gimple get_loop_exit_condition (const struct loop *); extern void scev_initialize (void); + extern bool scev_initialized_p (void); extern void scev_reset (void); extern void scev_reset_htab (void); extern void scev_finalize (void); Index: loop-unroll.c =================================================================== *** loop-unroll.c (revision 191867) --- loop-unroll.c (working copy) *************** decide_peel_once_rolling (struct loop *l *** 341,347 **** || desc->assumptions || desc->infinite || !desc->const_iter ! || desc->niter != 0) { if (dump_file) fprintf (dump_file, --- 341,348 ---- || desc->assumptions || desc->infinite || !desc->const_iter ! || (desc->niter != 0 ! && max_loop_iterations_int (loop) != 0)) { if (dump_file) fprintf (dump_file, *************** unroll_loop_constant_iterations (struct *** 695,701 **** desc->noloop_assumptions = NULL_RTX; desc->niter -= exit_mod; ! desc->niter_max -= exit_mod; } SET_BIT (wont_exit, 1); --- 696,708 ---- desc->noloop_assumptions = NULL_RTX; desc->niter -= exit_mod; ! loop->nb_iterations_upper_bound -= double_int::from_uhwi (exit_mod); ! if (loop->any_estimate ! && double_int::from_uhwi (exit_mod).ule ! (loop->nb_iterations_estimate)) ! loop->nb_iterations_estimate -= double_int::from_uhwi (exit_mod); ! else ! loop->any_estimate = false; } SET_BIT (wont_exit, 1); *************** unroll_loop_constant_iterations (struct *** 733,739 **** apply_opt_in_copies (opt_info, exit_mod + 1, false, false); desc->niter -= exit_mod + 1; ! desc->niter_max -= exit_mod + 1; desc->noloop_assumptions = NULL_RTX; SET_BIT (wont_exit, 0); --- 740,751 ---- apply_opt_in_copies (opt_info, exit_mod + 1, false, false); desc->niter -= exit_mod + 1; ! if (loop->any_estimate ! && double_int::from_uhwi (exit_mod + 1).ule ! (loop->nb_iterations_estimate)) ! loop->nb_iterations_estimate -= double_int::from_uhwi (exit_mod + 1); ! else ! loop->any_estimate = false; desc->noloop_assumptions = NULL_RTX; SET_BIT (wont_exit, 0); *************** unroll_loop_constant_iterations (struct *** 782,788 **** } desc->niter /= max_unroll + 1; ! desc->niter_max /= max_unroll + 1; desc->niter_expr = GEN_INT (desc->niter); /* Remove the edges. */ --- 794,808 ---- } desc->niter /= max_unroll + 1; ! loop->nb_iterations_upper_bound ! = loop->nb_iterations_upper_bound.udiv (double_int::from_uhwi (exit_mod ! + 1), ! FLOOR_DIV_EXPR); ! if (loop->any_estimate) ! loop->nb_iterations_estimate ! = loop->nb_iterations_estimate.udiv (double_int::from_uhwi (exit_mod ! + 1), ! FLOOR_DIV_EXPR); desc->niter_expr = GEN_INT (desc->niter); /* Remove the edges. */ *************** decide_unroll_runtime_iterations (struct *** 803,808 **** --- 823,829 ---- { unsigned nunroll, nunroll_by_av, i; struct niter_desc *desc; + double_int iterations; if (!(flags & UAP_UNROLL)) { *************** decide_unroll_runtime_iterations (struct *** 856,864 **** } /* If we have profile feedback, check whether the loop rolls. */ ! if ((loop->header->count ! && expected_loop_iterations (loop) < 2 * nunroll) ! || desc->niter_max < 2 * nunroll) { if (dump_file) fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n"); --- 877,886 ---- } /* If we have profile feedback, check whether the loop rolls. */ ! if ((estimated_loop_iterations (loop, &iterations) ! || max_loop_iterations (loop, &iterations)) ! && iterations.fits_shwi () ! && iterations.to_shwi () <= 2 * nunroll) { if (dump_file) fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n"); *************** unroll_loop_runtime_iterations (struct l *** 1092,1097 **** --- 1114,1120 ---- single_pred_edge (swtch)->probability = REG_BR_PROB_BASE - p; e = make_edge (swtch, preheader, single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP); + e->count = RDIV (preheader->count * REG_BR_PROB_BASE, p); e->probability = p; } *************** unroll_loop_runtime_iterations (struct l *** 1111,1116 **** --- 1134,1140 ---- single_succ_edge (swtch)->probability = REG_BR_PROB_BASE - p; e = make_edge (swtch, preheader, single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP); + e->count = RDIV (preheader->count * REG_BR_PROB_BASE, p); e->probability = p; } *************** unroll_loop_runtime_iterations (struct l *** 1172,1184 **** desc->niter_expr = simplify_gen_binary (UDIV, desc->mode, old_niter, GEN_INT (max_unroll + 1)); ! desc->niter_max /= max_unroll + 1; if (exit_at_end) { desc->niter_expr = simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx); desc->noloop_assumptions = NULL_RTX; ! desc->niter_max--; } if (dump_file) --- 1196,1221 ---- desc->niter_expr = simplify_gen_binary (UDIV, desc->mode, old_niter, GEN_INT (max_unroll + 1)); ! loop->nb_iterations_upper_bound ! = loop->nb_iterations_upper_bound.udiv (double_int::from_uhwi (max_unroll ! + 1), ! FLOOR_DIV_EXPR); ! if (loop->any_estimate) ! loop->nb_iterations_estimate ! = loop->nb_iterations_estimate.udiv (double_int::from_uhwi (max_unroll ! + 1), ! FLOOR_DIV_EXPR); if (exit_at_end) { desc->niter_expr = simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx); desc->noloop_assumptions = NULL_RTX; ! --loop->nb_iterations_upper_bound; ! if (loop->any_estimate ! && loop->nb_iterations_estimate != double_int_zero) ! --loop->nb_iterations_estimate; ! else ! loop->any_estimate = false; } if (dump_file) *************** decide_peel_simple (struct loop *loop, i *** 1196,1201 **** --- 1233,1239 ---- { unsigned npeel; struct niter_desc *desc; + double_int iterations; if (!(flags & UAP_PEEL)) { *************** decide_peel_simple (struct loop *loop, i *** 1239,1261 **** return; } ! if (loop->header->count) { ! unsigned niter = expected_loop_iterations (loop); ! if (niter + 1 > npeel) { if (dump_file) { fprintf (dump_file, ";; Not peeling loop, rolls too much ("); fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, ! (HOST_WIDEST_INT) (niter + 1)); fprintf (dump_file, " iterations > %d [maximum peelings])\n", npeel); } return; } ! npeel = niter + 1; } else { /* For now we have no good heuristics to decide whether loop peeling --- 1277,1306 ---- return; } ! /* If we have realistic estimate on number of iterations, use it. */ ! if (estimated_loop_iterations (loop, &iterations)) { ! if (!iterations.fits_shwi () ! || iterations.to_shwi () + 1 > npeel) { if (dump_file) { fprintf (dump_file, ";; Not peeling loop, rolls too much ("); fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, ! (HOST_WIDEST_INT) (iterations.to_shwi () + 1)); fprintf (dump_file, " iterations > %d [maximum peelings])\n", npeel); } return; } ! npeel = iterations.to_shwi () + 1; } + /* If we have small enough bound on iterations, we can still peel (completely + unroll). */ + else if (max_loop_iterations (loop, &iterations) + && iterations.fits_shwi () + && iterations.to_shwi () + 1 <= npeel) + npeel = iterations.to_shwi () + 1; else { /* For now we have no good heuristics to decide whether loop peeling *************** decide_unroll_stupid (struct loop *loop, *** 1349,1354 **** --- 1394,1400 ---- { unsigned nunroll, nunroll_by_av, i; struct niter_desc *desc; + double_int iterations; if (!(flags & UAP_UNROLL_ALL)) { *************** decide_unroll_stupid (struct loop *loop, *** 1401,1409 **** } /* If we have profile feedback, check whether the loop rolls. */ ! if ((loop->header->count ! && expected_loop_iterations (loop) < 2 * nunroll) ! || desc->niter_max < 2 * nunroll) { if (dump_file) fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n"); --- 1447,1456 ---- } /* If we have profile feedback, check whether the loop rolls. */ ! if ((estimated_loop_iterations (loop, &iterations) ! || max_loop_iterations (loop, &iterations)) ! && iterations.fits_shwi () ! && iterations.to_shwi () <= 2 * nunroll) { if (dump_file) fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n"); Index: loop-doloop.c =================================================================== *** loop-doloop.c (revision 191867) --- loop-doloop.c (working copy) *************** doloop_modify (struct loop *loop, struct *** 410,415 **** --- 410,416 ---- basic_block loop_end = desc->out_edge->src; enum machine_mode mode; rtx true_prob_val; + HOST_WIDE_INT iterations; jump_insn = BB_END (loop_end); *************** doloop_modify (struct loop *loop, struct *** 460,468 **** /* Determine if the iteration counter will be non-negative. Note that the maximum value loaded is iterations_max - 1. */ ! if (desc->niter_max ! <= ((unsigned HOST_WIDEST_INT) 1 ! << (GET_MODE_PRECISION (mode) - 1))) nonneg = 1; break; --- 461,471 ---- /* Determine if the iteration counter will be non-negative. Note that the maximum value loaded is iterations_max - 1. */ ! iterations = max_loop_iterations_int (loop); ! if (iterations >= 0 ! && (iterations ! <= ((unsigned HOST_WIDEST_INT) 1 ! << (GET_MODE_PRECISION (mode) - 1)))) nonneg = 1; break; *************** doloop_modify (struct loop *loop, struct *** 548,556 **** { rtx init; unsigned level = get_loop_level (loop) + 1; init = gen_doloop_begin (counter_reg, desc->const_iter ? desc->niter_expr : const0_rtx, ! GEN_INT (desc->niter_max), GEN_INT (level)); if (init) { --- 551,567 ---- { rtx init; unsigned level = get_loop_level (loop) + 1; + double_int iter; + rtx iter_rtx; + + if (!max_loop_iterations (loop, &iter) + || !iter.fits_shwi ()) + iter_rtx = const0_rtx; + else + iter_rtx = GEN_INT (iter.to_shwi()); init = gen_doloop_begin (counter_reg, desc->const_iter ? desc->niter_expr : const0_rtx, ! iter_rtx, GEN_INT (level)); if (init) { *************** doloop_optimize (struct loop *loop) *** 608,613 **** --- 619,625 ---- struct niter_desc *desc; unsigned word_mode_size; unsigned HOST_WIDE_INT word_mode_max; + double_int iter; if (dump_file) fprintf (dump_file, "Doloop: Processing loop %d.\n", loop->num); *************** doloop_optimize (struct loop *loop) *** 658,664 **** count = copy_rtx (desc->niter_expr); iterations = desc->const_iter ? desc->niter_expr : const0_rtx; ! iterations_max = GEN_INT (desc->niter_max); level = get_loop_level (loop) + 1; /* Generate looping insn. If the pattern FAILs then give up trying --- 670,680 ---- count = copy_rtx (desc->niter_expr); iterations = desc->const_iter ? desc->niter_expr : const0_rtx; ! if (!max_loop_iterations (loop, &iter) ! || !iter.fits_shwi ()) ! iterations_max = const0_rtx; ! else ! iterations_max = GEN_INT (iter.to_shwi()); level = get_loop_level (loop) + 1; /* Generate looping insn. If the pattern FAILs then give up trying *************** doloop_optimize (struct loop *loop) *** 678,684 **** computed, we must be sure that the number of iterations fits into the new mode. */ && (word_mode_size >= GET_MODE_PRECISION (mode) ! || desc->niter_max <= word_mode_max)) { if (word_mode_size > GET_MODE_PRECISION (mode)) { --- 694,700 ---- computed, we must be sure that the number of iterations fits into the new mode. */ && (word_mode_size >= GET_MODE_PRECISION (mode) ! || iter <= double_int::from_shwi (word_mode_max))) { if (word_mode_size > GET_MODE_PRECISION (mode)) { Index: loop-iv.c =================================================================== *** loop-iv.c (revision 191867) --- loop-iv.c (working copy) *************** iv_number_of_iterations (struct loop *lo *** 2293,2302 **** desc->const_iter = false; desc->niter_expr = NULL_RTX; - desc->niter_max = 0; - if (loop->any_upper_bound - && loop->nb_iterations_upper_bound.fits_uhwi ()) - desc->niter_max = loop->nb_iterations_upper_bound.low; cond = GET_CODE (condition); gcc_assert (COMPARISON_P (condition)); --- 2293,2298 ---- *************** iv_number_of_iterations (struct loop *lo *** 2566,2574 **** ? iv0.base : mode_mmin); max = (up - down) / inc + 1; ! if (!desc->niter_max ! || max < desc->niter_max) ! desc->niter_max = max; if (iv0.step == const0_rtx) { --- 2562,2569 ---- ? iv0.base : mode_mmin); max = (up - down) / inc + 1; ! record_niter_bound (loop, double_int::from_shwi (max), ! false, true); if (iv0.step == const0_rtx) { *************** iv_number_of_iterations (struct loop *lo *** 2779,2792 **** unsigned HOST_WIDEST_INT val = INTVAL (desc->niter_expr); desc->const_iter = true; ! desc->niter_max = desc->niter = val & GET_MODE_MASK (desc->mode); } else { max = determine_max_iter (loop, desc, old_niter); ! if (!desc->niter_max ! || max < desc->niter_max) ! desc->niter_max = max; /* simplify_using_initial_values does a copy propagation on the registers in the expression for the number of iterations. This prolongs life --- 2774,2789 ---- unsigned HOST_WIDEST_INT val = INTVAL (desc->niter_expr); desc->const_iter = true; ! desc->niter = val & GET_MODE_MASK (desc->mode); ! record_niter_bound (loop, double_int::from_shwi (desc->niter), ! false, true); } else { max = determine_max_iter (loop, desc, old_niter); ! gcc_assert (max); ! record_niter_bound (loop, double_int::from_shwi (max), ! false, true); /* simplify_using_initial_values does a copy propagation on the registers in the expression for the number of iterations. This prolongs life *************** zero_iter_simplify: *** 2811,2817 **** zero_iter: desc->const_iter = true; desc->niter = 0; ! desc->niter_max = 0; desc->noloop_assumptions = NULL_RTX; desc->niter_expr = const0_rtx; return; --- 2808,2815 ---- zero_iter: desc->const_iter = true; desc->niter = 0; ! record_niter_bound (loop, double_int_zero, ! true, true); desc->noloop_assumptions = NULL_RTX; desc->niter_expr = const0_rtx; return; *************** find_simple_exit (struct loop *loop, str *** 2945,2953 **** print_rtl (dump_file, desc->niter_expr); fprintf (dump_file, "\n"); ! fprintf (dump_file, " upper bound: "); ! fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, desc->niter_max); ! fprintf (dump_file, "\n"); } else fprintf (dump_file, "Loop %d is not simple.\n", loop->num); --- 2943,2952 ---- print_rtl (dump_file, desc->niter_expr); fprintf (dump_file, "\n"); ! fprintf (dump_file, " upper bound: %li\n", ! (long)max_loop_iterations_int (loop)); ! fprintf (dump_file, " realistic bound: %li\n", ! (long)estimated_loop_iterations_int (loop)); } else fprintf (dump_file, "Loop %d is not simple.\n", loop->num); Index: cfgloop.h =================================================================== *** cfgloop.h (revision 191867) --- cfgloop.h (working copy) *************** struct niter_desc *** 386,394 **** /* Number of iterations if constant. */ unsigned HOST_WIDEST_INT niter; - /* Upper bound on the number of iterations. */ - unsigned HOST_WIDEST_INT niter_max; - /* Assumptions under that the rest of the information is valid. */ rtx assumptions; --- 386,391 ----