This is a draft patch to partially address the concerns described in bugzilla problem report https://gcc.gnu.org/bugzilla/show_bug.cgi?id=68212). The patch is incomplete in the sense that there are some known shortcomings with nested loops which I am still working on. I am sending this out for comments at this time because we would like these patches to be integrated into the GCC 6 release and want to begin responding to community feedback as soon as possible in order to make the integration possible.

The problem described in Bugzilla 68212 is that code produced by loop unrolling has incorrect block frequencies. The erroneous block frequencies result because block frequencies are not adjusted to account for the execution contexts into which they are copied. These incorrect frequencies disable and confuse subsequent compiler optimizations. The general idea of how we address this problem is two fold:

1. Before a loop body is unpeeled into a pre-header location, we temporarily adjust the loop body frequencies to represent the values appropriate for the context into which the loop body is to be copied.

2. After unrolling the loop body (by replicating the loop body (N-1) times within the loop), we recompute all frequencies associated with blocks contained within the loop.

Additional test programs will be added to the bugzilla report and will be integrated into the regression test suite as part of the final submission of this patch.

ChangeLog:

2015-11-07  Kelvin Nilsen <kel...@gcc.gnu.org>

        * cfgloopmanip.h
        (in_loop_p): new extern declaration
        (zero_loop_frequencies): new extern declaration
        (increment_loop_frequencies): new extern declaration

        * cfgloopmanip.c
        (in_loop_p): new helper routine
        (zero_loop_frequencies): new helper routine
        (block_ladder_rung): new struct definition for helper routines
        (same_edge_p): new helper routine
        (in_edge_set_p): new helper routine
        (in_call_chain_p): new helper routine
        (recursively_zero_frequency): new helper routine
        (recursion_detected_p): new helper routine
        (in_loop_of_header_p): new helper routine
        (recursively_get_loop_blocks): new helper routine
        (get_loop_blocks): new helper routine
        (in_block_set_p): new helper routine
        (get_exit_edges_from_loop_blocks): new helper routine
        (zero_partial_loop_frequencies): new helper routine
        (recursively_increment_frequency): new helper routine
        (increment_loop_frequencies): new helper routine
        (internal): new helper routine
        (check_loop_frequency_integrity): new helper routine
        (set_zero_probability): added another parameter
(duplicate_loop_to_header_edge): Add code to recompute loop body frequencies after blocks are replicated (unrolled) into the loop body. Introduce certain help routines because existing infrastructure routines are not reliable during typical executions of duplicate_loop_to_header_edge().

        * loop-unroll.c
(unroll_loop_constant_iterations): After replicating the loop body within a loop, recompute the frequencies for all blocks contained within the loop. (unroll_loop_runtime_iterations):Before copying loop body to preheader location, temporarily adjust the loop body frequencies to represent the context into which the loop body will be copied. After replicating the loop body within a loop, recompute the frequencies for all blocks contained within the loop.

Index: loop-unroll.c
===================================================================
--- loop-unroll.c       (.../trunk/gcc) (revision 229257)
+++ loop-unroll.c       (.../branches/ibm/kelvin-1/gcc) (working copy)
@@ -587,14 +587,14 @@ unroll_loop_constant_iterations (struct loop *loop
   /* Now unroll the loop.  */
 
   opt_info_start_duplication (opt_info);
+
   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
                                      max_unroll,
                                      wont_exit, desc->out_edge,
                                      &remove_edges,
-                                     DLTHE_FLAG_UPDATE_FREQ
-                                     | (opt_info
+                                     opt_info
                                         ? DLTHE_RECORD_COPY_NUMBER
-                                          : 0));
+                                          : 0);
   gcc_assert (ok);
 
   if (opt_info)
@@ -876,6 +876,7 @@ unroll_loop_runtime_iterations (struct loop *loop)
   auto_vec<basic_block> dom_bbs;
 
   body = get_loop_body (loop);
+
   for (i = 0; i < loop->num_nodes; i++)
     {
       vec<basic_block> ldom;
@@ -943,6 +944,7 @@ unroll_loop_runtime_iterations (struct loop *loop)
       && !desc->noloop_assumptions)
     bitmap_set_bit (wont_exit, 1);
   ezc_swtch = loop_preheader_edge (loop)->src;
+
   ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
                                      1, wont_exit, desc->out_edge,
                                      &remove_edges,
@@ -952,6 +954,10 @@ unroll_loop_runtime_iterations (struct loop *loop)
   /* Record the place where switch will be built for preconditioning.  */
   swtch = split_edge (loop_preheader_edge (loop));
 
+  int iter_freq, new_freq;
+  iter_freq = new_freq = swtch->frequency / (n_peel+1);
+  swtch->frequency = new_freq;
+
   for (i = 0; i < n_peel; i++)
     {
       /* Peel the copy.  */
@@ -958,12 +964,19 @@ unroll_loop_runtime_iterations (struct loop *loop)
       bitmap_clear (wont_exit);
       if (i != n_peel - 1 || !last_may_exit)
        bitmap_set_bit (wont_exit, 1);
+      int saved_header_frequency = loop->header->frequency;
+      zero_loop_frequencies (loop);
+
+      int new_header_freq = (saved_header_frequency / (n_peel + 1)) * (i + 1);
+      increment_loop_frequencies (loop, loop->header, new_header_freq);
       ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
                                          1, wont_exit, desc->out_edge,
                                          &remove_edges,
                                          DLTHE_FLAG_UPDATE_FREQ);
+      zero_loop_frequencies (loop);
+      increment_loop_frequencies (loop, loop->header, saved_header_frequency);
       gcc_assert (ok);
-
+      
       /* Create item for switch.  */
       j = n_peel - i - (extra_zero_check ? 0 : 1);
       p = REG_BR_PROB_BASE / (i + 2);
@@ -979,11 +992,24 @@ unroll_loop_runtime_iterations (struct loop *loop)
 
       swtch = split_edge_and_insert (single_pred_edge (swtch), branch_code);
       set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
-      single_pred_edge (swtch)->probability = REG_BR_PROB_BASE - p;
+      single_succ_edge (swtch)->probability = REG_BR_PROB_BASE - p;
       e = make_edge (swtch, preheader,
                     single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
       e->count = RDIV (preheader->count * REG_BR_PROB_BASE, p);
       e->probability = p;
+
+      new_freq = new_freq + iter_freq;
+      swtch->frequency = new_freq;
+
+      int prehead_frequency = 0;
+      edge_iterator ei;
+      edge an_edge;
+      FOR_EACH_EDGE (an_edge, ei, preheader->preds)
+       {
+         int the_edge_frequency = EDGE_FREQUENCY (an_edge);
+         prehead_frequency += the_edge_frequency;
+       }
+      preheader->frequency = prehead_frequency;
     }
 
   if (extra_zero_check)
@@ -1015,14 +1041,44 @@ unroll_loop_runtime_iterations (struct loop *loop)
   bitmap_clear_bit (wont_exit, may_exit_copy);
   opt_info_start_duplication (opt_info);
 
+  {  
+    /* Recompute the loop body frequencies. */
+    zero_loop_frequencies (loop);
+    
+    basic_block my_header = loop->header;
+    int sum_incoming_frequencies = 0;
+
+    edge_iterator ei;
+    edge predecessor;
+    FOR_EACH_EDGE (predecessor, ei, my_header->preds)
+      {
+       if (!in_loop_p (predecessor->src, loop))
+         sum_incoming_frequencies += EDGE_FREQUENCY (predecessor);
+      }
+    /* Scale the incoming frequencies according to the heuristic that
+     * the loop frequency is the incoming edge frequency divided by
+     * 0.09.  This heuristic applies only to loops that iterate over a
+     * run-time value that is not known at compile time.  Note that
+     * 1/.09 equals 11.1111.  We'll use integer arithmetic on ten
+     * thousandths, and then divide by 10,000 after we've "rounded".
+     */
+
+    sum_incoming_frequencies *= 111111;  /* multiply by 11.1111 */
+    sum_incoming_frequencies += 5000;    /* round by adding 0.5 */
+    sum_incoming_frequencies /= 10000;  /* convert ten thousandths
+                                           to ones
+                                        */
+    
+    increment_loop_frequencies (loop, my_header, sum_incoming_frequencies);
+  }
+
   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
                                      max_unroll,
                                      wont_exit, desc->out_edge,
                                      &remove_edges,
-                                     DLTHE_FLAG_UPDATE_FREQ
-                                     | (opt_info
+                                     opt_info
                                         ? DLTHE_RECORD_COPY_NUMBER
-                                          : 0));
+                                          : 0);
   gcc_assert (ok);
 
   if (opt_info)
Index: cfgloopmanip.c
===================================================================
--- cfgloopmanip.c      (.../trunk/gcc) (revision 229257)
+++ cfgloopmanip.c      (.../branches/ibm/kelvin-1/gcc) (working copy)
@@ -34,6 +34,17 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree-ssa-loop-manip.h"
 #include "dumpfile.h"
 
+/* Define ENABLE_CHECKING to enforce the following run-time checks.
+ *
+ *  a. The sum of outgoing edge frequencies for the loop equals the
+ *     sum of incoming edge frequencies for the loop header block.
+ *
+ *  b. The sum of predecessor edge frequencies for every block
+ *     in the loop equals the frequency of that block.
+ *
+ * This may report false-positive errors due to round-off errors.
+ */
+
 static void copy_loops_to (struct loop **, int,
                           struct loop *);
 static void loop_redirect_edge (edge, basic_block);
@@ -44,6 +55,543 @@ static void fix_loop_placements (struct loop *, bo
 static bool fix_bb_placement (basic_block);
 static void fix_bb_placements (basic_block, bool *, bitmap);
 
+/*
+ * Return true iff block is considered to reside within the loop
+ * represented by loop_ptr.
+ */
+bool
+in_loop_p (basic_block block, struct loop *loop_ptr)
+{
+  basic_block *bbs = get_loop_body (loop_ptr);
+  bool result = false;
+
+  for (unsigned int i = 0; i < loop_ptr->num_nodes; i++)
+    {
+      if (bbs[i] == block)
+       result = true;
+    }
+  free (bbs);
+  return result;
+}
+
+/*
+ * Zero all frequencies associated with this loop.
+ */
+void 
+zero_loop_frequencies (struct loop *loop_ptr)
+{
+  basic_block *bbs = get_loop_body (loop_ptr);
+  for (unsigned i = 0; i < loop_ptr->num_nodes; ++i)
+    {
+      bbs[i]->frequency = 0;
+    }
+  free (bbs);
+}
+
+/* A list of block_ladder_rung structs is used to keep track of all the
+ * blocks visited in a depth-first recursive traversal of a control-flow
+ * graph.  This list is used to detect and prevent attempts to revisit
+ * a block that is already being visited in the recursive traversal.
+ */
+typedef struct block_ladder_rung {
+  basic_block block;
+  struct block_ladder_rung *lower_rung;
+} *ladder_rung_p;
+
+/* Return true iff an_edge represents the same source and destination
+ * blocks as another_edge.
+ */
+static bool 
+same_edge_p (edge an_edge, edge another_edge)
+{
+  return ((an_edge->src == another_edge->src) 
+         && (an_edge->dest == another_edge->dest));
+}
+
+/* Return true iff an_edge matches one of the nodes that is already
+ * present within set_of_edges.
+ */
+static bool 
+in_edge_set_p (edge an_edge, vec<edge> set_of_edges)
+{
+  unsigned int j;
+  edge e;
+
+  FOR_EACH_VEC_ELT (set_of_edges, j, e)
+    {
+      if (same_edge_p (e, an_edge))
+       return true;
+    }
+  return false;
+}
+
+/* Return true iff an_edge->dest is already represented within
+ * the ladder_rung list.
+ */
+static bool 
+in_call_chain_p (edge an_edge, ladder_rung_p ladder_rung)
+{
+  while (ladder_rung != NULL)
+    {
+      if (an_edge->dest == ladder_rung->block)
+       return true;
+      else
+       ladder_rung = ladder_rung->lower_rung;
+    }
+  return FALSE;
+}
+
+/* This recursive function visits all of the blocks contained within the
+ * loop represented by loop_ptr and reachable from incoming_edge,
+ * and zeroes the frequency field of each block.  The recursion
+ * terminates if incoming_edge is known to exit this loop, or
+ * if the destination of incoming edge has already been visited
+ * in this recursive traversal, or if the destination of incoming_edge
+ * is the loop header.
+ */
+static void
+recursively_zero_frequency (struct loop *loop_ptr, vec<edge> exit_edges,
+                           ladder_rung_p ladder_rung,
+                           edge incoming_edge)
+{
+  if (incoming_edge->dest == loop_ptr->header)
+    return;
+  else if (in_edge_set_p (incoming_edge, exit_edges))
+    return;
+  else if (in_call_chain_p (incoming_edge, ladder_rung))
+    return;
+  else
+    {
+      struct block_ladder_rung a_rung;
+      basic_block block = incoming_edge->dest;
+      
+      a_rung.block = block;
+      a_rung.lower_rung = ladder_rung;
+      block->frequency = 0;
+      
+      edge_iterator ei;
+      edge successor;
+      FOR_EACH_EDGE (successor, ei, block->succs)
+       {
+         recursively_zero_frequency (loop_ptr, exit_edges,
+                                     &a_rung, successor);
+       }
+    }
+}
+                                    
+/* Return true iff the candidate block is found within the linked
+ * list represented by lower_steps, which would indicate that this
+ * block has already been visited by a recursive traversal.
+ */
+static bool 
+recursion_detected_p (basic_block candidate, ladder_rung_p lower_steps) {
+  while (lower_steps != NULL)
+    {
+      if (lower_steps->block == candidate)
+       return true;
+      lower_steps = lower_steps->lower_rung;
+    }
+  /* we iterated through the entire list and did not find candidate */
+  return false;
+}
+
+/* Return true iff candidate is contained within the loop represented
+ * by loop_header and loop_latch.
+ *
+ * We consider the block to be within the loop if there exists a path
+ * within the control flow graph from this node to the loop's latch
+ * which does not pass through the loop's header.  (If all paths to
+ * the latch pass through the loop header, then the node is contained
+ * within an outer-nested loop but not within this loop.)
+ *
+ * Note that if a candidate's successor is in the loop and the successor
+ * is not the loop header, then the candidate itself is also in the loop.
+ * If none of the successors of a candidate are in the loop, then the
+ * candidate itself is not in the loop.
+ */
+static bool 
+in_loop_of_header_p (basic_block candidate, basic_block loop_header,
+                     basic_block loop_latch, bool start_of_recursion,
+                     ladder_rung_p lower_steps)
+{
+  if (candidate == loop_latch)
+    return true;
+  else if (candidate == loop_header) 
+    return start_of_recursion;
+  else if (!start_of_recursion 
+          && recursion_detected_p (candidate, lower_steps))
+    {
+      /* if recursion revisits a node already visited and the loop latch
+       * was not visited in the call chain, then we are traversing an
+       * iterative path that belongs to an outer-nested loop.
+       */
+      return false;
+    }
+  else
+    {
+      struct block_ladder_rung new_step;
+      
+      new_step.block = candidate;
+      new_step.lower_rung = lower_steps;
+      
+      edge_iterator ei;
+      edge successor_edge;
+      FOR_EACH_EDGE (successor_edge, ei, candidate->succs)
+       {
+         basic_block successor = successor_edge->dest;
+         if (in_loop_of_header_p (successor, loop_header,
+                                  loop_latch, false, &new_step))
+           return true;
+       }
+      return false;              /* None of the successors was in loop  */
+    }
+}
+
+/* Add candidate into the results vector if candidate
+ * is in the loop and it is not already contained within the results
+ * vector. 
+ *
+ * We consider the block to be within the loop if there exists a path
+ * within the control flow graph from this node to the loop's latch
+ * which does not pass through the loop's header.  (If all paths to
+ * the latch pass through the loop header, then the node is contained
+ * within an outer-nested loop but not within this loop.)
+ *
+ * If and only if candidate is added to the results vector, recursively
+ * do the same for each successor of candidate block.
+ *
+ * Return the potentially modified results vector.
+ */
+static vec<basic_block> 
+recursively_get_loop_blocks (basic_block candidate, vec<basic_block> results,
+                            basic_block loop_header, basic_block loop_latch)
+{
+  basic_block bb;
+  unsigned int u;
+
+  /* if candidate is already in the results vector, then we're done */
+  FOR_EACH_VEC_ELT (results, u, bb)
+    {
+      if (bb == candidate)
+       return results;
+    }
+
+  if (in_loop_of_header_p (candidate, loop_header, loop_latch, true, NULL))
+    {
+      results.safe_push (candidate);
+
+      edge_iterator ei;
+      edge successor;
+      FOR_EACH_EDGE (successor, ei, candidate->succs)
+       {
+         if (successor->probability != 0)
+           {
+             results = recursively_get_loop_blocks (successor->dest, results, 
+                                                    loop_header, loop_latch);
+           }
+       }
+    }
+  return results;
+}
+
+/* Return a vector containing all of the blocks contained within the
+ * loop identified by loop_ptr.
+ */
+static vec<basic_block> 
+get_loop_blocks (struct loop *loop_ptr)
+{
+  vec<basic_block> results;
+
+  results = vNULL;
+  results = recursively_get_loop_blocks (loop_ptr->header, results,
+                                        loop_ptr->header, loop_ptr->latch);
+  return results;
+}
+
+/* Return true iff block is an element of the block_set vector.
+ */
+static bool 
+in_block_set_p (basic_block block, vec<basic_block> block_set)
+{
+  basic_block bb;
+  unsigned int u;
+  FOR_EACH_VEC_ELT (block_set, u, bb)
+    {
+      if (bb == block)
+       return true;
+    }
+  return false;
+}
+
+/* Return a vector containing all of the edges that exit the loop
+ * represented by the loop_blocks vector.
+ */
+static vec<edge> 
+get_exit_edges_from_loop_blocks (vec<basic_block> loop_blocks) {
+  basic_block bb;
+  unsigned int u;
+  vec<edge> results = vNULL;
+
+  FOR_EACH_VEC_ELT (loop_blocks, u, bb)
+    {
+      edge_iterator ei;
+      edge successor;
+      FOR_EACH_EDGE (successor, ei, bb->succs)
+       {
+         basic_block edge_dest = successor->dest;
+         
+         if (!in_block_set_p (edge_dest, loop_blocks))
+           {
+             results.safe_push (successor);
+           }
+       }
+    }
+  return results;
+}
+
+/*
+ * Zero all frequencies for all blocks contained within the loop
+ * represented by loop_ptr which are reachable from block without
+ * passing through the block header.  If block is not within the loop,
+ * this has no effect. The behavior is as outlined by the following
+ * algorithm:
+ *
+ * If block is contained within loop:
+ *   Set block's frequency to zero
+ *   Using a depth-first traversal, do the same for each successor
+ *   transitively, stopping the recursive traversal if:
+ *     the current block is the loop header, or
+ *     the current block resides outside the loop, or
+ *     the current block has already been visited in this depth-first
+ *       traversal. 
+ */
+static void
+zero_partial_loop_frequencies (struct loop *loop_ptr, basic_block block)
+{
+  /* When zero_partial_loop_frequencies is invoked, the *loop_ptr
+   * object is not entirely coherent, so the library service
+   * get_loop_exit_edges (loop_p) cannot be called from this context.
+   * Instead, we use get_loop_blocks (loop_p) and
+   * get_exit_edges_from_loop_blocks (vec<basic_block>) functions
+   * which assume only the validity of loop_ptr->loop_header,
+   * loop_ptr->loop_latch, and valid successor and predecessor
+   * information for each block contained within the loop.
+   */
+  vec<basic_block> loop_blocks = get_loop_blocks (loop_ptr);
+  if (in_block_set_p (block, loop_blocks))
+    {
+      struct block_ladder_rung ladder_rung;
+      ladder_rung.block = block;
+      ladder_rung.lower_rung = NULL;
+      
+      vec<edge> exit_edges = get_exit_edges_from_loop_blocks (loop_blocks);
+      block->frequency = 0;
+      
+      edge_iterator ei;
+      edge successor;
+      FOR_EACH_EDGE (successor, ei, block->succs)
+       {
+         if (successor->probability != 0)
+           {
+             recursively_zero_frequency (loop_ptr, exit_edges,
+                                         &ladder_rung, successor);
+           }
+       }
+      exit_edges.release ();
+    }
+  loop_blocks.release ();
+}
+
+/* This recursive function visits all of the blocks contained within the
+ * loop represented by loop_ptr and reachable from incoming_edge,
+ * and increments the frequency field of each block by an
+ * appropriate scaling of frequency_increment.  The
+ * frequency_increment value is scaled in recursive invocations of
+ * this function by the probability associated with the edge
+ * corresponding to the particular recursive call.  The recursion
+ * terminates if incoming_edge is known to exit this loop, or
+ * if the destination of incoming edge has already been visited
+ * in this recursive traversal, or if the destination of incoming_edge
+ * is the loop header.
+ */
+static void
+recursively_increment_frequency (struct loop *loop_ptr, vec<edge> exit_edges,
+                                ladder_rung_p ladder_rung,
+                                edge incoming_edge,
+                                int frequency_increment)
+{
+  if (incoming_edge->dest == loop_ptr->header)
+    return;
+  else if (in_edge_set_p (incoming_edge, exit_edges))
+    return;
+  else if (in_call_chain_p (incoming_edge, ladder_rung))
+    return;
+  else
+    {
+      struct block_ladder_rung a_rung;
+      basic_block block = incoming_edge->dest;
+      
+      a_rung.block = block;
+      a_rung.lower_rung = ladder_rung;
+      block->frequency += frequency_increment;
+
+      edge_iterator ei;
+      edge successor;
+      FOR_EACH_EDGE (successor, ei, block->succs)
+       {
+         int successor_increment =
+           (frequency_increment * successor->probability) / REG_BR_PROB_BASE;
+         recursively_increment_frequency (loop_ptr, exit_edges,
+                                          &a_rung, successor,
+                                          successor_increment);
+       }
+    }
+}
+ 
+/*
+ * If block is contained within loop, we do the following:
+ *   Add incremental_frequency (which may be negative) to
+ *   block->frequency and propogate this change to all successors of
+ *   block that reside within the loop, transitively.  Use a depth-first
+ *   tree traversal, stopping the recursion at the loop header, at any
+ *   successor block that resides outside the loop, and at any block
+ *   that is already part of the current depth-first traversal.
+ */
+void 
+increment_loop_frequencies (struct loop *loop_ptr, basic_block block,
+                           int frequency_increment)
+{
+  vec<basic_block> loop_blocks = get_loop_blocks (loop_ptr);
+
+  if (in_block_set_p (block, loop_blocks))
+    {
+      struct block_ladder_rung ladder_rung;
+      ladder_rung.block = block;
+      ladder_rung.lower_rung = NULL;
+      
+      vec<edge> exit_edges = get_exit_edges_from_loop_blocks (loop_blocks);
+      block->frequency += frequency_increment;
+      
+      edge_iterator ei;
+      edge successor;
+      FOR_EACH_EDGE (successor, ei, block->succs)
+       {
+         if (successor->probability != 0)
+           {
+             int successor_increment =
+               ((frequency_increment * successor->probability)
+                / REG_BR_PROB_BASE);
+             recursively_increment_frequency (loop_ptr, exit_edges,
+                                              &ladder_rung, successor,
+                                              successor_increment);
+           }
+       }
+      exit_edges.release ();
+    }
+  loop_blocks.release ();
+}
+
+#ifdef ENABLE_CHECKING
+/**
+ * Issue a fatal error message and abort program execution.
+ */
+static void 
+internal (const char *msg)
+{
+  fprintf (stderr, "Fatal internal error: %s\n", msg);
+  exit (-1);
+}
+
+/*
+ * check_loop_frequency_integrity enforces that:
+ *
+ *  a. The sum of outgoing edge frequencies for the loop equals the
+ *     sum of incoming edge frequencies for the loop header block.
+ *
+ *  b. The sum of predecessor edge frequencies for every block
+ *     in the loop equals the frequency of that block.
+ *
+ * The integrity check is problematic due to round-off errors. Though
+ * it hasn't been tested with max-unroll-times greater than 4, we
+ * suspect that unrolling complex control structures contained within a
+ * loop that is unrolled more than 4 times may result in erroneous
+ * integrity check failures due to round-off errors.
+ */
+static void 
+check_loop_frequency_integrity (struct loop *loop_ptr)
+{
+  unsigned int i, k;
+  basic_block a_block;
+
+  vec<basic_block> loop_body = get_loop_blocks (loop_ptr);
+  basic_block header;
+
+  FOR_EACH_VEC_ELT (loop_body, k, a_block)
+    {
+      int delta;
+      int predecessor_frequencies = 0;
+
+      edge_iterator ei;
+      edge a_predecessor;
+      FOR_EACH_EDGE (a_predecessor, ei, a_block->preds)
+       {
+         predecessor_frequencies += EDGE_FREQUENCY (a_predecessor);
+       }
+      delta = predecessor_frequencies - a_block->frequency;
+
+      /* Enforce tolerance to within 0.2%. */
+      int tolerance = predecessor_frequencies / 500;  
+      if (tolerance < 10)
+       tolerance = 10;
+
+      if (delta < 0)
+       delta = -delta;
+      
+      if (delta > tolerance)
+       {
+         internal ("Predecessor frequencies confused while unrolling loop.");
+       }
+    }
+
+  header = loop_ptr->header;
+  int incoming_frequency = 0;
+
+  edge_iterator ei;
+  edge a_predecessor;
+  FOR_EACH_EDGE (a_predecessor, ei, header->preds)
+    {
+      if (!in_block_set_p (a_predecessor->src, loop_body))
+       {
+         incoming_frequency += EDGE_FREQUENCY (a_predecessor);
+       }
+    }
+
+  int outgoing_frequency = 0;
+  vec<edge> exit_edges = get_exit_edges_from_loop_blocks (loop_body);
+  edge edge;
+  FOR_EACH_VEC_ELT (exit_edges, i, edge)
+    {
+      outgoing_frequency += EDGE_FREQUENCY (edge);
+    }
+
+  /* enforce tolerance to within 0.2% */
+  int tolerance = incoming_frequency / 500;
+  if (tolerance < 10)
+    tolerance = 10;
+  int delta = incoming_frequency - outgoing_frequency;
+
+  if (delta < 0)
+    delta = -delta;
+  
+  if (delta > tolerance)
+    {
+      internal ("Incoherent frequencies while unrolling loop.");
+    }
+  loop_body.release ();
+  exit_edges.release ();
+}
+#endif /* ENABLE_CHECKING */
+
 /* Checks whether basic block BB is dominated by DATA.  */
 static bool
 rpe_enum_p (const_basic_block bb, const void *data)
@@ -1101,7 +1649,7 @@ can_duplicate_loop_p (const struct loop *loop)
    is redistributed evenly to the remaining edges coming from E->src.  */
 
 static void
-set_zero_probability (edge e)
+set_zero_probability (struct loop *loop_ptr, edge e)
 {
   basic_block bb = e->src;
   edge_iterator ei;
@@ -1109,6 +1657,10 @@ static void
   unsigned n = EDGE_COUNT (bb->succs);
   gcov_type cnt = e->count, cnt1;
   unsigned prob = e->probability, prob1;
+  int original_edge_frequency;
+  int new_edge_frequency;
+  int change_in_edge_frequency;
+  bool edge_originates_in_loop = in_loop_p (bb, loop_ptr);
 
   gcc_assert (n > 1);
   cnt1 = cnt / (n - 1);
@@ -1118,18 +1670,63 @@ static void
     {
       if (ae == e)
        continue;
-
-      ae->probability += prob1;
-      ae->count += cnt1;
+      
+      if (edge_originates_in_loop)
+       {
+         original_edge_frequency = EDGE_FREQUENCY (ae);
+         ae->probability += prob1;
+         ae->count += cnt1;
+         new_edge_frequency = EDGE_FREQUENCY (ae);
+         change_in_edge_frequency =
+           new_edge_frequency - original_edge_frequency;
+         
+         increment_loop_frequencies (loop_ptr, ae->dest,
+                                     change_in_edge_frequency);
+       }
+      else
+       {
+         ae->probability += prob1;
+         ae->count += cnt1;
+       }
       last = ae;
     }
-
+    
   /* Move the rest to one of the edges.  */
-  last->probability += prob % (n - 1);
-  last->count += cnt % (n - 1);
+  if (edge_originates_in_loop)
+    {
+      original_edge_frequency = EDGE_FREQUENCY (last);
+      last->probability += prob % (n - 1);
+      last->count += cnt % (n - 1);
+      new_edge_frequency = EDGE_FREQUENCY (last);
+      change_in_edge_frequency = new_edge_frequency - original_edge_frequency;
+      if (change_in_edge_frequency != 0)
+       {
+         increment_loop_frequencies (loop_ptr, last->dest,
+                                     change_in_edge_frequency);
+       }
+    }
+  else
+    {
+      last->probability += prob % (n - 1);
+      last->count += cnt % (n - 1);
+    }
 
-  e->probability = 0;
-  e->count = 0;
+  if (edge_originates_in_loop)
+    {
+      original_edge_frequency = EDGE_FREQUENCY (e);
+      e->probability = 0;
+      e->count = 0;
+      new_edge_frequency = EDGE_FREQUENCY (e);
+      change_in_edge_frequency =
+       new_edge_frequency - original_edge_frequency;
+      increment_loop_frequencies (loop_ptr, e->dest,
+                                 change_in_edge_frequency);
+    }
+  else
+    {
+      e->probability = 0;
+      e->count = 0;
+    }
 }
 
 /* Duplicates body of LOOP to given edge E NDUPL times.  Takes care of updating
@@ -1170,6 +1767,41 @@ duplicate_loop_to_header_edge (struct loop *loop,
   bitmap bbs_to_scale = NULL;
   bitmap_iterator bi;
 
+  /* Remember the initial ratio between frequency
+   * of edge into loop header and the frequency of the loop header.
+   * Preserve this ratio when we make adjustments within the loop.
+   * This distinction is necessary because different flavors of loops
+   * are subject to different heuristics.  In particular, loops
+   * bounded by run-time constants assume that branches exiting a loop
+   * have probability 9%.  Loops bounded by compile-time constants 
+   * assume branches exiting a loop have probability 1%. There may be
+   * other circumstances that assume different behaviors.
+   *
+   * KELVIN_TODO:
+   * For loops that have a single exit, the exit ratio is the same as
+   * the ratio between the sum of the frequency of the header's
+   * incoming edges and the frequency of the header itself.  For loops
+   * that have multiple exits, we still need to investigate.  
+   */
+  int header_frequency = header->frequency;
+  int preheader_frequency = 0;
+  
+  /* Sum the EDGE frequencies for all predecessor edges that
+   * originate outside the loop.
+   */
+
+  edge_iterator ei;
+  edge predecessor;
+  FOR_EACH_EDGE (predecessor, ei, header->preds)
+    {
+      if (!in_loop_p (predecessor->src, loop))
+       {
+         preheader_frequency += EDGE_FREQUENCY (predecessor);
+       }
+    }
+
+  int exit_ratio = (header_frequency * 10000 - 5000) / preheader_frequency;
+
   gcc_assert (e->dest == loop->header);
   gcc_assert (ndupl > 0);
 
@@ -1236,7 +1868,7 @@ duplicate_loop_to_header_edge (struct loop *loop,
        }
 
       scale_step = XNEWVEC (int, ndupl);
-
+      
       for (i = 1; i <= ndupl; i++)
        scale_step[i - 1] = bitmap_bit_p (wont_exit, i)
                                ? prob_pass_wont_exit
@@ -1293,7 +1925,7 @@ duplicate_loop_to_header_edge (struct loop *loop,
 
   /* Loop the new bbs will belong to.  */
   target = e->src->loop_father;
-
+  
   /* Original loops.  */
   n_orig_loops = 0;
   for (aloop = loop->inner; aloop; aloop = aloop->next)
@@ -1301,7 +1933,7 @@ duplicate_loop_to_header_edge (struct loop *loop,
   orig_loops = XNEWVEC (struct loop *, n_orig_loops);
   for (aloop = loop->inner, i = 0; aloop; aloop = aloop->next, i++)
     orig_loops[i] = aloop;
-
+  
   set_loop_copy (loop, target);
 
   first_active = XNEWVEC (basic_block, n);
@@ -1310,10 +1942,30 @@ duplicate_loop_to_header_edge (struct loop *loop,
       memcpy (first_active, bbs, n * sizeof (basic_block));
       first_active_latch = latch;
     }
-
+  
   spec_edges[SE_ORIG] = orig;
   spec_edges[SE_LATCH] = latch_edge;
+  
+  /* Recompute the loop body frequencies. */
+  zero_loop_frequencies (loop);
 
+  basic_block my_header = loop->header;
+  int sum_incoming_frequencies = 0;
+
+  /* ei and predecessor declared above */
+  FOR_EACH_EDGE(predecessor, ei, my_header->preds)
+    {
+      /* exit_ratio is computed based on remembered circumstances upon
+       * entry into this function.  Note that loops bounded by a compile-time
+       * constant have different exit ratio than loops bounded by a run-time
+       * value.
+       */
+      if (!in_loop_p (predecessor->src, loop))
+       sum_incoming_frequencies +=
+         (int) (EDGE_FREQUENCY (predecessor) * exit_ratio + 5000) / 10000;
+    }
+  increment_loop_frequencies (loop, my_header, sum_incoming_frequencies);
+
   place_after = e->src;
   for (j = 0; j < ndupl; j++)
     {
@@ -1322,7 +1974,10 @@ duplicate_loop_to_header_edge (struct loop *loop,
 
       /* Copy bbs.  */
       copy_bbs (bbs, n, new_bbs, spec_edges, 2, new_spec_edges, loop,
-               place_after, true);
+                place_after, true);
+
+      int place_after_frequency = place_after->frequency;
+      basic_block saved_place_after = place_after;
       place_after = new_spec_edges[SE_LATCH]->src;
 
       if (flags & DLTHE_RECORD_COPY_NUMBER)
@@ -1352,8 +2007,7 @@ duplicate_loop_to_header_edge (struct loop *loop,
            }
          for (i = 0; i < n; i++)
            new_bbs[i]->flags &= ~BB_DUPLICATED;
-       }
-
+        }
       /* Redirect the special edges.  */
       if (is_latch)
        {
@@ -1373,13 +2027,18 @@ duplicate_loop_to_header_edge (struct loop *loop,
          e = new_spec_edges[SE_LATCH];
        }
 
+      zero_partial_loop_frequencies (loop, saved_place_after);
+      increment_loop_frequencies (loop,
+                                 saved_place_after, place_after_frequency);
+
       /* Record exit edge in this copy.  */
       if (orig && bitmap_bit_p (wont_exit, j + 1))
        {
          if (to_remove)
-           to_remove->safe_push (new_spec_edges[SE_ORIG]);
-         set_zero_probability (new_spec_edges[SE_ORIG]);
-
+           {
+             to_remove->safe_push (new_spec_edges[SE_ORIG]);
+           }
+         set_zero_probability (loop, new_spec_edges[SE_ORIG]);
          /* Scale the frequencies of the blocks dominated by the exit.  */
          if (bbs_to_scale)
            {
@@ -1413,8 +2072,10 @@ duplicate_loop_to_header_edge (struct loop *loop,
   if (orig && bitmap_bit_p (wont_exit, 0))
     {
       if (to_remove)
-       to_remove->safe_push (orig);
-      set_zero_probability (orig);
+       {
+         to_remove->safe_push (orig);
+       }
+      set_zero_probability (loop, orig);
 
       /* Scale the frequencies of the blocks dominated by the exit.  */
       if (bbs_to_scale)
@@ -1462,6 +2123,13 @@ duplicate_loop_to_header_edge (struct loop *loop,
   free (bbs);
   BITMAP_FREE (bbs_to_scale);
 
+#ifdef ENABLE_CHECKING
+  /* This function call is strictly paranoia.  it makes no changes
+   * to the data structures.
+   */
+  check_loop_frequency_integrity (loop);
+#endif
+
   return true;
 }
 
Index: cfgloopmanip.h
===================================================================
--- cfgloopmanip.h      (.../trunk/gcc) (revision 229257)
+++ cfgloopmanip.h      (.../branches/ibm/kelvin-1/gcc) (working copy)
@@ -60,4 +60,8 @@ extern void force_single_succ_latches (void);
 struct loop * loop_version (struct loop *, void *,
                            basic_block *, unsigned, unsigned, unsigned, bool);
 
+extern bool in_loop_p (basic_block, struct loop *);
+extern void zero_loop_frequencies (struct loop *);
+extern void increment_loop_frequencies (struct loop *, basic_block, int);
+
 #endif /* GCC_CFGLOOPMANIP_H */

Reply via email to