Hi,
this patch makes profile scaling to use same logic in tree-inline and 
cgraphclones.
This reduces roundoff errors and makes it more easy to propagate more info.
Compiling tramp3d with profile feedback the number of mismatches after inlining
goes down from 248 to 95.

Honza

        * cgraph.h (cgraph_edge::clone): Update prototype.
        * cgraphclones.c (cgraph_edge::clone): Update profile scaling.
        (cgraph_node::create_clone): Update.
        (cgraph_node::create_version_clone): Update.
        * tree-inline.c (copy_bb): Update.
        (expand_call_inline): Update.
Index: cgraph.h
===================================================================
--- cgraph.h    (revision 249092)
+++ cgraph.h    (working copy)
@@ -1649,7 +1649,7 @@ struct GTY((chain_next ("%h.next_caller"
   /* Create clone of edge in the node N represented
      by CALL_EXPR the callgraph.  */
   cgraph_edge * clone (cgraph_node *n, gcall *call_stmt, unsigned stmt_uid,
-                      gcov_type count_scale, int freq_scale,
+                      profile_count num, profile_count den, int freq_scale,
                       bool update_original);
 
   /* Verify edge count and frequency.  */
Index: cgraphclones.c
===================================================================
--- cgraphclones.c      (revision 249092)
+++ cgraphclones.c      (working copy)
@@ -86,10 +86,13 @@ along with GCC; see the file COPYING3.
 
 cgraph_edge *
 cgraph_edge::clone (cgraph_node *n, gcall *call_stmt, unsigned stmt_uid,
-                   gcov_type count_scale, int freq_scale, bool update_original)
+                   profile_count num, profile_count den,
+                   int freq_scale, bool update_original)
 {
   cgraph_edge *new_edge;
-  profile_count gcov_count = count.apply_scale (count_scale, REG_BR_PROB_BASE);
+  profile_count gcov_count
+        = (num == profile_count::zero () || den > 0)
+          ? count.apply_scale (num, den) : count;
   gcov_type freq;
 
   /* We do not want to ignore loop nest after frequency drops to 0.  */
@@ -116,7 +119,7 @@ cgraph_edge::clone (cgraph_node *n, gcal
        {
          new_edge = n->create_indirect_edge (call_stmt,
                                              indirect_info->ecf_flags,
-                                             count, freq, false);
+                                             gcov_count, freq, false);
          *new_edge->indirect_info = *indirect_info;
        }
     }
@@ -428,7 +431,6 @@ cgraph_node::create_clone (tree new_decl
 {
   cgraph_node *new_node = symtab->create_empty ();
   cgraph_edge *e;
-  gcov_type count_scale;
   unsigned i;
 
   if (new_inlined_to)
@@ -453,7 +455,6 @@ cgraph_node::create_clone (tree new_decl
   new_node->global = global;
   new_node->global.inlined_to = new_inlined_to;
   new_node->rtl = rtl;
-  new_node->count = count;
   new_node->frequency = frequency;
   new_node->tp_first_run = tp_first_run;
   new_node->tm_clone = tm_clone;
@@ -475,18 +476,6 @@ cgraph_node::create_clone (tree new_decl
   else
     new_node->clone.combined_args_to_skip = args_to_skip;
 
-  if (count.initialized_p ())
-    {
-      if (new_node->count > count)
-        count_scale = REG_BR_PROB_BASE;
-      else
-       count_scale = new_node->count.probability_in (count);
-    }
-  else
-    count_scale = 0;
-  if (update_original)
-    count -= prof_count;
-
   FOR_EACH_VEC_ELT (redirect_callers, i, e)
     {
       /* Redirect calls to the old version node to point to its new
@@ -500,12 +489,12 @@ cgraph_node::create_clone (tree new_decl
   new_node->expand_all_artificial_thunks ();
 
   for (e = callees;e; e=e->next_callee)
-    e->clone (new_node, e->call_stmt, e->lto_stmt_uid, count_scale,
+    e->clone (new_node, e->call_stmt, e->lto_stmt_uid, new_node->count, count,
              freq, update_original);
 
   for (e = indirect_calls; e; e = e->next_callee)
     e->clone (new_node, e->call_stmt, e->lto_stmt_uid,
-             count_scale, freq, update_original);
+             new_node->count, count, freq, update_original);
   new_node->clone_references (this);
 
   new_node->next_sibling_clone = clones;
@@ -514,6 +503,9 @@ cgraph_node::create_clone (tree new_decl
   clones = new_node;
   new_node->clone_of = this;
 
+  if (update_original)
+    count -= prof_count;
+
   if (call_duplication_hook)
     symtab->call_cgraph_duplication_hooks (this, new_node);
 
@@ -911,14 +903,14 @@ cgraph_node::create_version_clone (tree
      if (!bbs_to_copy
         || bitmap_bit_p (bbs_to_copy, gimple_bb (e->call_stmt)->index))
        e->clone (new_version, e->call_stmt,
-                e->lto_stmt_uid, REG_BR_PROB_BASE,
+                e->lto_stmt_uid, count, count,
                 CGRAPH_FREQ_BASE,
                 true);
    for (e = indirect_calls; e; e=e->next_callee)
      if (!bbs_to_copy
         || bitmap_bit_p (bbs_to_copy, gimple_bb (e->call_stmt)->index))
        e->clone (new_version, e->call_stmt,
-                e->lto_stmt_uid, REG_BR_PROB_BASE,
+                e->lto_stmt_uid, count, count,
                 CGRAPH_FREQ_BASE,
                 true);
    FOR_EACH_VEC_ELT (redirect_callers, i, e)
Index: tree-inline.c
===================================================================
--- tree-inline.c       (revision 249092)
+++ tree-inline.c       (working copy)
@@ -2009,7 +2009,9 @@ copy_bb (copy_body_data *id, basic_block
                      struct cgraph_edge *old_edge = edge;
                      edge = edge->clone (id->dst_node, call_stmt,
                                          gimple_uid (stmt),
-                                         REG_BR_PROB_BASE, CGRAPH_FREQ_BASE,
+                                         profile_count::one (),
+                                         profile_count::one (),
+                                         CGRAPH_FREQ_BASE,
                                          true);
                      /* We could also just rescale the frequency, but
                         doing so would introduce roundoff errors and make
@@ -2028,7 +2030,9 @@ copy_bb (copy_body_data *id, basic_block
                          old_edge->speculative_call_info (direct, indirect, 
ref);
                          indirect = indirect->clone (id->dst_node, call_stmt,
                                                      gimple_uid (stmt),
-                                                     REG_BR_PROB_BASE, 
CGRAPH_FREQ_BASE,
+                                                     profile_count::one (),
+                                                     profile_count::one (),
+                                                     CGRAPH_FREQ_BASE,
                                                      true);
                          if (old_edge->frequency + indirect->frequency)
                            {
@@ -4509,7 +4522,9 @@ expand_call_inline (basic_block bb, gimp
       cg_edge->remove ();
       edge = id->src_node->callees->clone (id->dst_node, call_stmt,
                                           gimple_uid (stmt),
-                                          REG_BR_PROB_BASE, CGRAPH_FREQ_BASE,
+                                          profile_count::one (),
+                                          profile_count::one (),
+                                          CGRAPH_FREQ_BASE,
                                           true);
       edge->frequency = freq;
       edge->count = count;

Reply via email to