Hi Diego, Thanks for the quick feedback. Here is a an updated version of the patch.
2011-04-27 Sharad Singhai <sing...@google.com> ChangeLog.google-main * params.def: Add new parameters to control peeling. * tree-ssa-loop-ivcanon.c (try_unroll_loop_completely): Use different peeling parameters when profile feedback is available. * loop-unroll.c (decide_peel_once_rolling): Ditto. (decide_peel_completely): Ditto. * doc/invoke.texi: Document new peeling parameters. testsuite/ChangeLog.google-main * gcc.dg/vect/O3-vect-pr34223.c: Add new peeling parameters. * gcc.dg/vect/vect.exp: Allow reading flags in individual tests. Index: doc/invoke.texi =================================================================== --- doc/invoke.texi (revision 172904) +++ doc/invoke.texi (working copy) @@ -8523,11 +8523,28 @@ The maximum number of peelings of a single loop. @item max-completely-peeled-insns +@item max-completely-peeled-insns-feedback The maximum number of insns of a completely peeled loop. +The @option{max-completely-peeled-insns-feedback} is used only when profile +feedback is available and the loop is hot. Because of the real profiles, this +value may set to be larger for hot loops. + +@item max-once-peeled-insns +@item max-once-peeled-insns-feedback +The maximum number of insns of a peeled loop that rolls only once. +The @option{max-once-peeled-insns-feedback} is used only when profile feedback +is available and the loop is hot. Because of the real profiles, this value +may set to be larger for hot loops. + @item max-completely-peel-times +@item max-completely-peel-times-feedback The maximum number of iterations of a loop to be suitable for complete peeling. +The @option{max-completely-peel-times-feedback} is used only when profile feedback +is available and the loop is hot. Because of the real profiles, this value may +set to be larger for hot loops. + @item max-completely-peel-loop-nest-depth The maximum depth of a loop nest suitable for complete peeling. Index: testsuite/gcc.dg/vect/O3-vect-pr34223.c =================================================================== --- testsuite/gcc.dg/vect/O3-vect-pr34223.c (revision 172904) +++ testsuite/gcc.dg/vect/O3-vect-pr34223.c (working copy) @@ -1,4 +1,5 @@ /* { dg-require-effective-target vect_int } */ +/* { dg-options "[vect_cflags] --param max-completely-peel-times=16" } */ #include "tree-vect.h" Index: testsuite/gcc.dg/vect/vect.exp =================================================================== --- testsuite/gcc.dg/vect/vect.exp (revision 172904) +++ testsuite/gcc.dg/vect/vect.exp (working copy) @@ -24,6 +24,12 @@ global DEFAULT_VECTCFLAGS set DEFAULT_VECTCFLAGS "" +# So that we can read flags in individual tests. +proc vect_cflags { } { + global DEFAULT_VECTCFLAGS + return $DEFAULT_VECTCFLAGS +} + # If the target system supports vector instructions, the default action # for a test is 'run', otherwise it's 'compile'. Save current default. # Executing vector instructions on a system without hardware vector support Index: tree-ssa-loop-ivcanon.c =================================================================== --- tree-ssa-loop-ivcanon.c (revision 172904) +++ tree-ssa-loop-ivcanon.c (working copy) @@ -326,6 +326,7 @@ enum unroll_level ul) { unsigned HOST_WIDE_INT n_unroll, ninsns, max_unroll, unr_insns; + unsigned HOST_WIDE_INT max_peeled_insns; gimple cond; struct loop_size size; @@ -336,9 +337,23 @@ return false; n_unroll = tree_low_cst (niter, 1); - max_unroll = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES); + if (profile_info + && flag_branch_probabilities + && optimize_loop_for_speed_p (loop)) + max_unroll = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES_FEEDBACK); + else + max_unroll = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES); + if (n_unroll > max_unroll) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, " Not unrolling loop %d limited by max unroll" + " (%d > %d)\n", + loop->num, (int) n_unroll, (int) max_unroll); + } return false; + } if (n_unroll) { @@ -356,14 +371,21 @@ (int) unr_insns); } - if (unr_insns > ninsns - && (unr_insns - > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS))) + if (profile_info + && flag_branch_probabilities + && optimize_loop_for_speed_p (loop)) + max_peeled_insns = + PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS_FEEDBACK); + else + max_peeled_insns = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS); + + if (unr_insns > max_peeled_insns) { if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "Not unrolling loop %d " - "(--param max-completely-peeled-insns limit reached).\n", - loop->num); + "(--param max-completely-peeled-insns(-feedback) limit. " + "(%u > %u)).\n", + loop->num, (unsigned) unr_insns, (unsigned) max_peeled_insns); return false; } @@ -371,7 +393,8 @@ && unr_insns > ninsns) { if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "Not unrolling loop %d.\n", loop->num); + fprintf (dump_file, "Not unrolling loop %d (NO_GROWTH %d > %d).\n", + loop->num, (int) unr_insns, (int) ninsns); return false; } } @@ -418,8 +441,9 @@ update_stmt (cond); update_ssa (TODO_update_ssa); - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "Unrolled loop %d completely.\n", loop->num); + if (dump_file) + fprintf (dump_file, "Unrolled loop %d completely by factor %d.\n", + loop->num, (int) n_unroll); return true; } Index: loop-unroll.c =================================================================== --- loop-unroll.c (revision 172904) +++ loop-unroll.c (working copy) @@ -324,15 +324,23 @@ decide_peel_once_rolling (struct loop *loop, int flags ATTRIBUTE_UNUSED) { struct niter_desc *desc; + unsigned max_peeled_insns; + if (profile_info && flag_branch_probabilities) + max_peeled_insns = + (unsigned) PARAM_VALUE (PARAM_MAX_ONCE_PEELED_INSNS_FEEDBACK); + else + max_peeled_insns = (unsigned) PARAM_VALUE (PARAM_MAX_ONCE_PEELED_INSNS); + if (dump_file) fprintf (dump_file, "\n;; Considering peeling once rolling loop\n"); /* Is the loop small enough? */ - if ((unsigned) PARAM_VALUE (PARAM_MAX_ONCE_PEELED_INSNS) < loop->ninsns) + if (max_peeled_insns < loop->ninsns) { if (dump_file) - fprintf (dump_file, ";; Not considering loop, is too big\n"); + fprintf (dump_file, ";; Not considering loop, is too big (%d > %u)\n", + loop->ninsns, max_peeled_insns); return; } @@ -362,7 +370,7 @@ static void decide_peel_completely (struct loop *loop, int flags ATTRIBUTE_UNUSED) { - unsigned npeel; + unsigned npeel, max_insns, max_peel; struct niter_desc *desc; if (dump_file) @@ -393,16 +401,30 @@ return; } + if (profile_info && flag_branch_probabilities) + { + max_insns = + (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS_FEEDBACK); + max_peel = + (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES_FEEDBACK); + } + else + { + max_insns = (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS); + max_peel = (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES); + } + /* npeel = number of iterations to peel. */ - npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS) / loop->ninsns; - if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES)) - npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES); + npeel = max_insns / loop->ninsns; + if (npeel > max_peel) + npeel = max_peel; /* Is the loop small enough? */ if (!npeel) { if (dump_file) - fprintf (dump_file, ";; Not considering loop, is too big\n"); + fprintf (dump_file, ";; Not considering loop, is too big, npeel=%u.\n", + npeel); return; } @@ -435,7 +457,7 @@ /* Success. */ if (dump_file) - fprintf (dump_file, ";; Decided to peel loop completely\n"); + fprintf (dump_file, ";; Decided to peel loop completely npeel %u\n", npeel); loop->lpt_decision.decision = LPT_PEEL_COMPLETELY; } Index: params.def =================================================================== --- params.def (revision 172904) +++ params.def (working copy) @@ -299,16 +299,37 @@ "max-completely-peeled-insns", "The maximum number of insns of a completely peeled loop", 400, 0, 0) +/* The maximum number of insns of a peeled loop, when feedback + information is available. */ +DEFPARAM(PARAM_MAX_COMPLETELY_PEELED_INSNS_FEEDBACK, + "max-completely-peeled-insns-feedback", + "The maximum number of insns of a completely peeled loop when profile " + "feedback is available", + 600, 0, 0) /* The maximum number of peelings of a single loop that is peeled completely. */ DEFPARAM(PARAM_MAX_COMPLETELY_PEEL_TIMES, "max-completely-peel-times", "The maximum number of peelings of a single loop that is peeled completely", - 16, 0, 0) + 8, 0, 0) +/* The maximum number of peelings of a single loop that is peeled + completely, when feedback information is available. */ +DEFPARAM(PARAM_MAX_COMPLETELY_PEEL_TIMES_FEEDBACK, + "max-completely-peel-times-feedback", + "The maximum number of peelings of a single loop that is peeled " + "completely, when profile feedback is available", + 16, 0, 0) /* The maximum number of insns of a peeled loop that rolls only once. */ DEFPARAM(PARAM_MAX_ONCE_PEELED_INSNS, "max-once-peeled-insns", "The maximum number of insns of a peeled loop that rolls only once", 400, 0, 0) +/* The maximum number of insns of a peeled loop that rolls only once, + when feedback information is available. */ +DEFPARAM(PARAM_MAX_ONCE_PEELED_INSNS_FEEDBACK, + "max-once-peeled-insns-feedback", + "The maximum number of insns of a peeled loop that rolls only once, " + "when profile feedback is available", + 600, 0, 0) /* The maximum depth of a loop nest we completely peel. */ DEFPARAM(PARAM_MAX_UNROLL_ITERATIONS, "max-completely-peel-loop-nest-depth", -- This patch is available for review at http://codereview.appspot.com/4438079