Hi,
we have a test in the gnat.dg testsuite (stack_usage1.adb) which checks that
the allocation of big temporaries created in non-overlapping blocks on the
stack is optimal, i.e. that they share a stack slot. It is run at -O0 and
passes. If you run it at -O2, it also passes. Now, if you run it at -O1, it
fails and that's a regression from the pre-TREE_CLOBBER_P era.
The problem is that, when optimization is enabled, DECL_IGNORED_P variables
are removed from blocks by remove_unused_scope_block_p and moved to the
toplevel. Now defer_stack_allocation has:
/* Variables in the outermost scope automatically conflict with
every other variable. The only reason to want to defer them
at all is that, after sorting, we can more efficiently pack
small variables in the stack frame. Continue to defer at -O2. */
if (toplevel && optimize < 2)
return false;
The comment is slightly obsolete in the TREE_CLOBBER_P era, since toplevel
variables don't necessarily conflict with each other, for example the above
variables moved to toplevel by remove_unused_scope_block_p.
We don't think that we need to tweak again remove_unused_scope_block_p in the
TREE_CLOBBER_P era; instead we can defer the allocation of big DECL_IGNORED_P
variables at toplevel from defer_stack_allocation.
Tested on x86_64-suse-linux, OK for the mainline?
2013-11-14 Olivier Hainque <hain...@adacore.com>
* cfgexpand.c (defer_stack_allocation): When optimization is enabled,
defer allocation of DECL_IGNORED_P variables at toplevel unless really
small. Factorize size threshold computation from the existing one.
(expand_used_vars): Refine comment.
2013-11-14 Eric Botcazou <ebotca...@adacore.com>
* gnat.dg/stack_usage1b.adb: New test.
* gnat.dg/stack_usage1c.adb: Likewise.
--
Eric Botcazou
-- { dg-do compile }
-- { dg-options "-O -fstack-usage" }
with Stack_Usage1_Pkg; use Stack_Usage1_Pkg;
procedure Stack_Usage1b is
A : Integer := Ident_Int (123);
begin
case A is
when 0 =>
My_Proc (R'(Ident_Int(0), Ident_Int(1), Ident_Int(2), Ident_Int(3), Ident_Int(4), Ident_Int(5), Ident_Int(6), Ident_Int(7), Ident_Int(8), Ident_Int(9)));
when 1 =>
My_Proc (R'(Ident_Int(0), Ident_Int(1), Ident_Int(2), Ident_Int(3), Ident_Int(4), Ident_Int(5), Ident_Int(6), Ident_Int(7), Ident_Int(8), Ident_Int(9)));
when 2 =>
My_Proc (R'(Ident_Int(0), Ident_Int(1), Ident_Int(2), Ident_Int(3), Ident_Int(4), Ident_Int(5), Ident_Int(6), Ident_Int(7), Ident_Int(8), Ident_Int(9)));
when 3 =>
My_Proc (R'(Ident_Int(0), Ident_Int(1), Ident_Int(2), Ident_Int(3), Ident_Int(4), Ident_Int(5), Ident_Int(6), Ident_Int(7), Ident_Int(8), Ident_Int(9)));
when 4 =>
My_Proc (R'(Ident_Int(0), Ident_Int(1), Ident_Int(2), Ident_Int(3), Ident_Int(4), Ident_Int(5), Ident_Int(6), Ident_Int(7), Ident_Int(8), Ident_Int(9)));
when 5 =>
My_Proc (R'(Ident_Int(0), Ident_Int(1), Ident_Int(2), Ident_Int(3), Ident_Int(4), Ident_Int(5), Ident_Int(6), Ident_Int(7), Ident_Int(8), Ident_Int(9)));
when 6 =>
My_Proc (R'(Ident_Int(0), Ident_Int(1), Ident_Int(2), Ident_Int(3), Ident_Int(4), Ident_Int(5), Ident_Int(6), Ident_Int(7), Ident_Int(8), Ident_Int(9)));
when 7 =>
My_Proc (R'(Ident_Int(0), Ident_Int(1), Ident_Int(2), Ident_Int(3), Ident_Int(4), Ident_Int(5), Ident_Int(6), Ident_Int(7), Ident_Int(8), Ident_Int(9)));
when 8 =>
My_Proc (R'(Ident_Int(0), Ident_Int(1), Ident_Int(2), Ident_Int(3), Ident_Int(4), Ident_Int(5), Ident_Int(6), Ident_Int(7), Ident_Int(8), Ident_Int(9)));
when 9 =>
My_Proc (R'(Ident_Int(0), Ident_Int(1), Ident_Int(2), Ident_Int(3), Ident_Int(4), Ident_Int(5), Ident_Int(6), Ident_Int(7), Ident_Int(8), Ident_Int(9)));
when others =>
null;
end case;
end Stack_Usage1b;
-- { dg-final { scan-stack-usage "\t\[0-9\]\[0-9\]\t" { target i?86-*-* x86_64-*-* } } }
-- { dg-final { cleanup-stack-usage } }
-- { dg-do compile }
-- { dg-options "-O2 -fstack-usage" }
with Stack_Usage1_Pkg; use Stack_Usage1_Pkg;
procedure Stack_Usage1c is
A : Integer := Ident_Int (123);
begin
case A is
when 0 =>
My_Proc (R'(Ident_Int(0), Ident_Int(1), Ident_Int(2), Ident_Int(3), Ident_Int(4), Ident_Int(5), Ident_Int(6), Ident_Int(7), Ident_Int(8), Ident_Int(9)));
when 1 =>
My_Proc (R'(Ident_Int(0), Ident_Int(1), Ident_Int(2), Ident_Int(3), Ident_Int(4), Ident_Int(5), Ident_Int(6), Ident_Int(7), Ident_Int(8), Ident_Int(9)));
when 2 =>
My_Proc (R'(Ident_Int(0), Ident_Int(1), Ident_Int(2), Ident_Int(3), Ident_Int(4), Ident_Int(5), Ident_Int(6), Ident_Int(7), Ident_Int(8), Ident_Int(9)));
when 3 =>
My_Proc (R'(Ident_Int(0), Ident_Int(1), Ident_Int(2), Ident_Int(3), Ident_Int(4), Ident_Int(5), Ident_Int(6), Ident_Int(7), Ident_Int(8), Ident_Int(9)));
when 4 =>
My_Proc (R'(Ident_Int(0), Ident_Int(1), Ident_Int(2), Ident_Int(3), Ident_Int(4), Ident_Int(5), Ident_Int(6), Ident_Int(7), Ident_Int(8), Ident_Int(9)));
when 5 =>
My_Proc (R'(Ident_Int(0), Ident_Int(1), Ident_Int(2), Ident_Int(3), Ident_Int(4), Ident_Int(5), Ident_Int(6), Ident_Int(7), Ident_Int(8), Ident_Int(9)));
when 6 =>
My_Proc (R'(Ident_Int(0), Ident_Int(1), Ident_Int(2), Ident_Int(3), Ident_Int(4), Ident_Int(5), Ident_Int(6), Ident_Int(7), Ident_Int(8), Ident_Int(9)));
when 7 =>
My_Proc (R'(Ident_Int(0), Ident_Int(1), Ident_Int(2), Ident_Int(3), Ident_Int(4), Ident_Int(5), Ident_Int(6), Ident_Int(7), Ident_Int(8), Ident_Int(9)));
when 8 =>
My_Proc (R'(Ident_Int(0), Ident_Int(1), Ident_Int(2), Ident_Int(3), Ident_Int(4), Ident_Int(5), Ident_Int(6), Ident_Int(7), Ident_Int(8), Ident_Int(9)));
when 9 =>
My_Proc (R'(Ident_Int(0), Ident_Int(1), Ident_Int(2), Ident_Int(3), Ident_Int(4), Ident_Int(5), Ident_Int(6), Ident_Int(7), Ident_Int(8), Ident_Int(9)));
when others =>
null;
end case;
end Stack_Usage1c;
-- { dg-final { scan-stack-usage "\t\[0-9\]\[0-9\]\t" { target i?86-*-* x86_64-*-* } } }
-- { dg-final { cleanup-stack-usage } }
Index: cfgexpand.c
===================================================================
--- cfgexpand.c (revision 204742)
+++ cfgexpand.c (working copy)
@@ -1128,6 +1128,12 @@ expand_one_error_var (tree var)
static bool
defer_stack_allocation (tree var, bool toplevel)
{
+ /* Whether the variable is small enough for immediate allocation not to be
+ a problem with regard to the frame size. */
+ bool smallish
+ = (tree_low_cst (DECL_SIZE_UNIT (var), 1)
+ < PARAM_VALUE (PARAM_MIN_SIZE_FOR_STACK_SHARING));
+
/* If stack protection is enabled, *all* stack variables must be deferred,
so that we can re-order the strings to the top of the frame.
Similarly for Address Sanitizer. */
@@ -1139,8 +1145,15 @@ defer_stack_allocation (tree var, bool t
if (DECL_ALIGN (var) > MAX_SUPPORTED_STACK_ALIGNMENT)
return true;
- /* Variables in the outermost scope automatically conflict with
- every other variable. The only reason to want to defer them
+ /* When optimization is enabled, DECL_IGNORED_P variables originally scoped
+ might be detached from their block and appear at toplevel when we reach
+ here. We want to coalesce them with variables from other blocks when
+ the immediate contribution to the frame size would be noticeable. */
+ if (toplevel && optimize > 0 && DECL_IGNORED_P (var) && !smallish)
+ return true;
+
+ /* Variables declared in the outermost scope automatically conflict
+ with every other variable. The only reason to want to defer them
at all is that, after sorting, we can more efficiently pack
small variables in the stack frame. Continue to defer at -O2. */
if (toplevel && optimize < 2)
@@ -1152,9 +1165,7 @@ defer_stack_allocation (tree var, bool t
other hand, we don't want the function's stack frame size to
get completely out of hand. So we avoid adding scalars and
"small" aggregates to the list at all. */
- if (optimize == 0
- && (tree_low_cst (DECL_SIZE_UNIT (var), 1)
- < PARAM_VALUE (PARAM_MIN_SIZE_FOR_STACK_SHARING)))
+ if (optimize == 0 && smallish)
return false;
return true;
@@ -1674,9 +1685,11 @@ expand_used_vars (void)
else if (TREE_STATIC (var) || DECL_EXTERNAL (var))
expand_now = true;
- /* If the variable is not associated with any block, then it
- was created by the optimizers, and could be live anywhere
- in the function. */
+ /* Expand variables not associated with any block now. Those created by
+ the optimizers could be live anywhere in the function. Those that
+ could possibly have been scoped originally and detached from their
+ block will have their allocation deferred so we coalesce them with
+ others when optimization is enabled. */
else if (TREE_USED (var))
expand_now = true;