This patch pessimizes stack accounting during inlining.  This enables
setting a firm stack size limit (via parameters "large-stack-frame"
and "large-stack-frame-growth").  Without this patch the inliner is
overly optimistic about potential stack reuse resulting in actual
stack frames much larger than the parameterized limits.

Internal benchmarks show minor performance differences with non-fdo
and lipo, but overall neutral.  Tested/bootstrapped on x86-64.

Ok for google-main?

Mark

2011-06-07  Mark Heffernan  <meh...@google.com>
        * cgraph.h (cgraph_global_info): Remove field.
        * ipa-inline.c (cgraph_clone_inlined_nodes): Change
        stack frame computation.
        (cgraph_check_inline_limits): Ditto.
        (compute_inline_parameters): Remove dead initialization.

Index: gcc/cgraph.h
===================================================================
--- gcc/cgraph.h        (revision 174512)
+++ gcc/cgraph.h        (working copy)
@@ -136,8 +136,6 @@ struct GTY(()) cgraph_local_info {
 struct GTY(()) cgraph_global_info {
   /* Estimated stack frame consumption by the function.  */
   HOST_WIDE_INT estimated_stack_size;
-  /* Expected offset of the stack frame of inlined function.  */
-  HOST_WIDE_INT stack_frame_offset;

   /* For inline clones this points to the function they will be
      inlined into.  */
Index: gcc/ipa-inline.c
===================================================================
--- gcc/ipa-inline.c    (revision 174512)
+++ gcc/ipa-inline.c    (working copy)
@@ -229,8 +229,6 @@ void
 cgraph_clone_inlined_nodes (struct cgraph_edge *e, bool duplicate,
                            bool update_original)
 {
-  HOST_WIDE_INT peak;
-
   if (duplicate)
     {
       /* We may eliminate the need for out-of-line copy to be output.
@@ -279,13 +277,13 @@ cgraph_clone_inlined_nodes (struct cgrap
     e->callee->global.inlined_to = e->caller->global.inlined_to;
   else
     e->callee->global.inlined_to = e->caller;
-  e->callee->global.stack_frame_offset
-    = e->caller->global.stack_frame_offset
-      + inline_summary (e->caller)->estimated_self_stack_size;
-  peak = e->callee->global.stack_frame_offset
-      + inline_summary (e->callee)->estimated_self_stack_size;
-  if (e->callee->global.inlined_to->global.estimated_stack_size < peak)
-    e->callee->global.inlined_to->global.estimated_stack_size = peak;
+
+  /* Pessimistically assume no sharing of stack space.  That is, the
+     frame size of a function is estimated as the original frame size
+     plus the sum of the frame sizes of all inlined callees.  */
+  e->callee->global.inlined_to->global.estimated_stack_size +=
+    inline_summary (e->callee)->estimated_self_stack_size;
+
   cgraph_propagate_frequency (e->callee);

   /* Recursively clone all bodies.  */
@@ -430,8 +428,7 @@ cgraph_check_inline_limits (struct cgrap

   stack_size_limit += stack_size_limit * PARAM_VALUE
(PARAM_STACK_FRAME_GROWTH) / 100;

-  inlined_stack = (to->global.stack_frame_offset
-                  + inline_summary (to)->estimated_self_stack_size
+  inlined_stack = (to->global.estimated_stack_size
                   + what->global.estimated_stack_size);
   if (inlined_stack  > stack_size_limit
       && inlined_stack > PARAM_VALUE (PARAM_LARGE_STACK_FRAME))
@@ -2064,7 +2061,6 @@ compute_inline_parameters (struct cgraph
   self_stack_size = optimize ? estimated_stack_frame_size (node) : 0;
   inline_summary (node)->estimated_self_stack_size = self_stack_size;
   node->global.estimated_stack_size = self_stack_size;
-  node->global.stack_frame_offset = 0;

   /* Can this function be inlined at all?  */
   node->local.inlinable = tree_inlinable_function_p (node->decl);

Reply via email to