On Tue, Sep 23, 2025 at 5:25 AM Andrew Pinski <[email protected]> wrote: > > The call check in optimize_stack_restore is not the same as > what is described in the comment before the function. It has never > been the same even. It has always allowed all nromal builtins but not > target builtins while the comment says no calls. > > This rewrites it to allow the following. > * a restore right before a noreturn is fine to be removed as the noreturn > function will do the restore (or exit the program). > * Internal functions are ok because they will turn into an instruction or 2 > * Still specifically reject alloca and __scrub_leave > * simple or inexpensive builtins (including target builtins)
Sooo .. can we explain in the comment _why_ a call is not OK? > This will both reject some more locations but also accepts more locations due > to the internal and target and noreturn function calls checks. > > Bootstrapped and tested on x86_64-linux-gnu. > > PR tree-optimization/122033 > gcc/ChangeLog: > > * tree-ssa-ccp.cc (optimize_stack_restore): Rewrite the call check. > Update comment in the front to new rules on calls. > * tree.h (fndecl_builtin_alloc_p): New function. > > gcc/testsuite/ChangeLog: > > * gcc.dg/tree-ssa/pr122033-1.c: New test. > * gcc.dg/tree-ssa/pr122033-2.c: New test. > > Signed-off-by: Andrew Pinski <[email protected]> > --- > gcc/testsuite/gcc.dg/tree-ssa/pr122033-1.c | 18 +++++++++ > gcc/testsuite/gcc.dg/tree-ssa/pr122033-2.c | 23 ++++++++++++ > gcc/tree-ssa-ccp.cc | 43 ++++++++++++++++------ > gcc/tree.h | 9 +++++ > 4 files changed, 81 insertions(+), 12 deletions(-) > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr122033-1.c > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr122033-2.c > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr122033-1.c > b/gcc/testsuite/gcc.dg/tree-ssa/pr122033-1.c > new file mode 100644 > index 00000000000..4ef8c6c3150 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr122033-1.c > @@ -0,0 +1,18 @@ > +/* PR middle-end/122033 */ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -fdump-tree-optimized" } */ > + > +void bar1 (char *, int); > +void bar3(void) __attribute__((noreturn)); > +void foo1 (int size) > +{ > + { > + char temp[size]; > + temp[size-1] = '\0'; > + bar1 (temp, size); > + } > + bar3 (); > +} > + > +/* { dg-final { scan-tree-dump-not "__builtin_stack_save" "optimized"} } */ > +/* { dg-final { scan-tree-dump-not "__builtin_stack_restore" "optimized"} } > */ > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr122033-2.c > b/gcc/testsuite/gcc.dg/tree-ssa/pr122033-2.c > new file mode 100644 > index 00000000000..f429324f64c > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr122033-2.c > @@ -0,0 +1,23 @@ > +/* PR middle-end/122033 */ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -fdump-tree-optimized" } */ > + > +void g(int*); > +void h(); > +double t; > +void f(int a, int b) > +{ > + { > + int array0[a]; > + { > + int array1[b]; > + g(array0); > + g(array1); > + } > + t = __builtin_sin(t); > + } > + h (); > +} > + > +/* { dg-final { scan-tree-dump-times "__builtin_stack_save" 2 "optimized"} } > */ > +/* { dg-final { scan-tree-dump-times "__builtin_stack_restore" 2 > "optimized"} } */ > diff --git a/gcc/tree-ssa-ccp.cc b/gcc/tree-ssa-ccp.cc > index c9ffd2af85c..070289ca9f0 100644 > --- a/gcc/tree-ssa-ccp.cc > +++ b/gcc/tree-ssa-ccp.cc > @@ -3091,7 +3091,9 @@ make_pass_ccp (gcc::context *ctxt) > if there is another __builtin_stack_restore in the same basic > block and no calls or ASM_EXPRs are in between, or if this block's > only outgoing edge is to EXIT_BLOCK and there are no calls or > - ASM_EXPRs after this __builtin_stack_restore. */ > + ASM_EXPRs after this __builtin_stack_restore. > + Note restore right before a noreturn function is not needed. > + And skip some cheap calls that will most likely become an instruction. */ > > static tree > optimize_stack_restore (gimple_stmt_iterator i) > @@ -3111,26 +3113,43 @@ optimize_stack_restore (gimple_stmt_iterator i) > for (gsi_next (&i); !gsi_end_p (i); gsi_next (&i)) > { > stmt = gsi_stmt (i); > - if (gimple_code (stmt) == GIMPLE_ASM) > + if (is_a<gasm*> (stmt)) > return NULL_TREE; > - if (gimple_code (stmt) != GIMPLE_CALL) > + gcall *call = dyn_cast<gcall*>(stmt); > + if (!call) > continue; > > - callee = gimple_call_fndecl (stmt); > + /* We can remove the restore in front of noreturn > + calls. Since the restore will happen either > + via an unwind/longjmp or not at all. */ > + if (gimple_call_noreturn_p (call)) > + break; > + > + /* Internal calls are ok, to bypass > + check first since fndecl will be null. */ > + if (gimple_call_internal_p (call)) > + continue; > + > + callee = gimple_call_fndecl (call); > + /* Non-builtin calls are not ok. */ > if (!callee > - || !fndecl_built_in_p (callee, BUILT_IN_NORMAL) > - /* All regular builtins are ok, just obviously not alloca. */ > - || ALLOCA_FUNCTION_CODE_P (DECL_FUNCTION_CODE (callee)) > - /* Do not remove stack updates before strub leave. */ > - || fndecl_built_in_p (callee, BUILT_IN___STRUB_LEAVE)) > + || !fndecl_built_in_p (callee)) > + return NULL_TREE; > + > + /* Do not remove stack updates before strub leave. */ > + if (fndecl_built_in_p (callee, BUILT_IN___STRUB_LEAVE) > + /* Alloca calls are not ok either. */ > + || fndecl_builtin_alloc_p (callee)) > return NULL_TREE; > > if (fndecl_built_in_p (callee, BUILT_IN_STACK_RESTORE)) > goto second_stack_restore; > - } > > - if (!gsi_end_p (i)) > - return NULL_TREE; > + /* If not a simple or inexpensive builtin, then it is not ok either. */ > + if (!is_simple_builtin (callee) > + && !is_inexpensive_builtin (callee)) > + return NULL_TREE; > + } > > /* Allow one successor of the exit block, or zero successors. */ > switch (EDGE_COUNT (bb->succs)) > diff --git a/gcc/tree.h b/gcc/tree.h > index ce8c778087f..a2f28bbe231 100644 > --- a/gcc/tree.h > +++ b/gcc/tree.h > @@ -7005,6 +7005,15 @@ fndecl_built_in_p (const_tree node, built_in_function > name1, F... names) > name1, names...)); > } > > +/* Returns true if the function decl NODE is an alloca. */ > +inline bool > +fndecl_builtin_alloc_p (const_tree node) > +{ > + if (!fndecl_built_in_p (node, BUILT_IN_NORMAL)) > + return false; > + return ALLOCA_FUNCTION_CODE_P (DECL_FUNCTION_CODE (node)); > +} > + > /* A struct for encapsulating location information about an operator > and the operation built from it. > > -- > 2.43.0 >
