This patch pessimizes stack accounting during inlining. This enables setting a firm stack size limit (via parameters "large-stack-frame" and "large-stack-frame-growth"). Without this patch the inliner is overly optimistic about potential stack reuse resulting in actual stack frames much larger than the parameterized limits.
Internal benchmarks show minor performance differences with non-fdo and lipo, but overall neutral. Tested/bootstrapped on x86-64. Ok for google-main? Mark 2011-06-07 Mark Heffernan <meh...@google.com> * cgraph.h (cgraph_global_info): Remove field. * ipa-inline.c (cgraph_clone_inlined_nodes): Change stack frame computation. (cgraph_check_inline_limits): Ditto. (compute_inline_parameters): Remove dead initialization. Index: gcc/cgraph.h =================================================================== --- gcc/cgraph.h (revision 174512) +++ gcc/cgraph.h (working copy) @@ -136,8 +136,6 @@ struct GTY(()) cgraph_local_info { struct GTY(()) cgraph_global_info { /* Estimated stack frame consumption by the function. */ HOST_WIDE_INT estimated_stack_size; - /* Expected offset of the stack frame of inlined function. */ - HOST_WIDE_INT stack_frame_offset; /* For inline clones this points to the function they will be inlined into. */ Index: gcc/ipa-inline.c =================================================================== --- gcc/ipa-inline.c (revision 174512) +++ gcc/ipa-inline.c (working copy) @@ -229,8 +229,6 @@ void cgraph_clone_inlined_nodes (struct cgraph_edge *e, bool duplicate, bool update_original) { - HOST_WIDE_INT peak; - if (duplicate) { /* We may eliminate the need for out-of-line copy to be output. @@ -279,13 +277,13 @@ cgraph_clone_inlined_nodes (struct cgrap e->callee->global.inlined_to = e->caller->global.inlined_to; else e->callee->global.inlined_to = e->caller; - e->callee->global.stack_frame_offset - = e->caller->global.stack_frame_offset - + inline_summary (e->caller)->estimated_self_stack_size; - peak = e->callee->global.stack_frame_offset - + inline_summary (e->callee)->estimated_self_stack_size; - if (e->callee->global.inlined_to->global.estimated_stack_size < peak) - e->callee->global.inlined_to->global.estimated_stack_size = peak; + + /* Pessimistically assume no sharing of stack space. That is, the + frame size of a function is estimated as the original frame size + plus the sum of the frame sizes of all inlined callees. */ + e->callee->global.inlined_to->global.estimated_stack_size += + inline_summary (e->callee)->estimated_self_stack_size; + cgraph_propagate_frequency (e->callee); /* Recursively clone all bodies. */ @@ -430,8 +428,7 @@ cgraph_check_inline_limits (struct cgrap stack_size_limit += stack_size_limit * PARAM_VALUE (PARAM_STACK_FRAME_GROWTH) / 100; - inlined_stack = (to->global.stack_frame_offset - + inline_summary (to)->estimated_self_stack_size + inlined_stack = (to->global.estimated_stack_size + what->global.estimated_stack_size); if (inlined_stack > stack_size_limit && inlined_stack > PARAM_VALUE (PARAM_LARGE_STACK_FRAME)) @@ -2064,7 +2061,6 @@ compute_inline_parameters (struct cgraph self_stack_size = optimize ? estimated_stack_frame_size (node) : 0; inline_summary (node)->estimated_self_stack_size = self_stack_size; node->global.estimated_stack_size = self_stack_size; - node->global.stack_frame_offset = 0; /* Can this function be inlined at all? */ node->local.inlinable = tree_inlinable_function_p (node->decl);