Hello, as my patch for stack-arrays in fortran reveals we a problem that VLA objects basically prevent all inlining to happen. They're transformed into alloca calls and those are considered to disable inlining. The (correct) fear being that inlining a bare alloca call into a loop leads to unbounded stack growth.
But the situation is different for alloca calls emitted for dealing with VLA objects. They always are wrapped with stack_save/stack_restore calls. Inlining such regions always is okay, even into loops. The stack space usage will be exactly the same at runtime. We have a flag for this already on the CALL_EXPR. But we don't retain it over tuples, and hence we also don't look at it in inline_forbidden_p_stmt. This patch fixes both. (The strange testing of builtin-ness is because in CALL_EXPR the ALLOCA_FOR_VAR_P and CALL_FROM_THUNK_P flags are overloaded) (This fixes the regression of fatigue with the stack-arrays patch) regstrapping on x86_64-linux in progress, okay for trunk? Ciao, Michael. * gimple.h (enum gf_mask): Add GF_CALL_ALLOCA_FOR_VAR. (gimple_call_set_alloca_for_var): New inline function. (gimple_call_alloca_for_var_p): Ditto. * gimple.c (gimple_build_call_from_tree): Remember ALLOCA_FOR_VAR_P state. * cfgexpand.c (expand_call_stmt): Restore ALLOCA_FOR_VAR_P state. * tree-inline.c (inline_forbidden_p_stmt): Don't reject alloca calls if they were for VLA objects. Index: cfgexpand.c =================================================================== *** cfgexpand.c (revision 172431) --- cfgexpand.c (working copy) *************** expand_call_stmt (gimple stmt) *** 1873,1879 **** CALL_EXPR_TAILCALL (exp) = gimple_call_tail_p (stmt); CALL_EXPR_RETURN_SLOT_OPT (exp) = gimple_call_return_slot_opt_p (stmt); ! CALL_FROM_THUNK_P (exp) = gimple_call_from_thunk_p (stmt); CALL_CANNOT_INLINE_P (exp) = gimple_call_cannot_inline_p (stmt); CALL_EXPR_VA_ARG_PACK (exp) = gimple_call_va_arg_pack_p (stmt); SET_EXPR_LOCATION (exp, gimple_location (stmt)); --- 1873,1884 ---- CALL_EXPR_TAILCALL (exp) = gimple_call_tail_p (stmt); CALL_EXPR_RETURN_SLOT_OPT (exp) = gimple_call_return_slot_opt_p (stmt); ! if (decl ! && DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL ! && DECL_FUNCTION_CODE (decl) == BUILT_IN_ALLOCA) ! ALLOCA_FOR_VAR_P (exp) = gimple_call_alloca_for_var_p (stmt); ! else ! CALL_FROM_THUNK_P (exp) = gimple_call_from_thunk_p (stmt); CALL_CANNOT_INLINE_P (exp) = gimple_call_cannot_inline_p (stmt); CALL_EXPR_VA_ARG_PACK (exp) = gimple_call_va_arg_pack_p (stmt); SET_EXPR_LOCATION (exp, gimple_location (stmt)); Index: tree-inline.c =================================================================== *** tree-inline.c (revision 172431) --- tree-inline.c (working copy) *************** inline_forbidden_p_stmt (gimple_stmt_ite *** 2997,3004 **** this may change program's memory overhead drastically when the function using alloca is called in loop. In GCC present in SPEC2000 inlining into schedule_block cause it to require 2GB of ! RAM instead of 256MB. */ if (gimple_alloca_call_p (stmt) && !lookup_attribute ("always_inline", DECL_ATTRIBUTES (fn))) { inline_forbidden_reason --- 2997,3007 ---- this may change program's memory overhead drastically when the function using alloca is called in loop. In GCC present in SPEC2000 inlining into schedule_block cause it to require 2GB of ! RAM instead of 256MB. Don't do so for alloca calls emitted for ! VLA objects as those can't cause unbounded growth (they're always ! wrapped inside stack_save/stack_restore regions. */ if (gimple_alloca_call_p (stmt) + && !gimple_call_alloca_for_var_p (stmt) && !lookup_attribute ("always_inline", DECL_ATTRIBUTES (fn))) { inline_forbidden_reason Index: gimple.c =================================================================== *** gimple.c (revision 172431) --- gimple.c (working copy) *************** gimple_build_call_from_tree (tree t) *** 303,309 **** gimple_call_set_tail (call, CALL_EXPR_TAILCALL (t)); gimple_call_set_cannot_inline (call, CALL_CANNOT_INLINE_P (t)); gimple_call_set_return_slot_opt (call, CALL_EXPR_RETURN_SLOT_OPT (t)); ! gimple_call_set_from_thunk (call, CALL_FROM_THUNK_P (t)); gimple_call_set_va_arg_pack (call, CALL_EXPR_VA_ARG_PACK (t)); gimple_call_set_nothrow (call, TREE_NOTHROW (t)); gimple_set_no_warning (call, TREE_NO_WARNING (t)); --- 303,314 ---- gimple_call_set_tail (call, CALL_EXPR_TAILCALL (t)); gimple_call_set_cannot_inline (call, CALL_CANNOT_INLINE_P (t)); gimple_call_set_return_slot_opt (call, CALL_EXPR_RETURN_SLOT_OPT (t)); ! if (fndecl ! && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL ! && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_ALLOCA) ! gimple_call_set_alloca_for_var (call, ALLOCA_FOR_VAR_P (t)); ! else ! gimple_call_set_from_thunk (call, CALL_FROM_THUNK_P (t)); gimple_call_set_va_arg_pack (call, CALL_EXPR_VA_ARG_PACK (t)); gimple_call_set_nothrow (call, TREE_NOTHROW (t)); gimple_set_no_warning (call, TREE_NO_WARNING (t)); Index: gimple.h =================================================================== *** gimple.h (revision 172431) --- gimple.h (working copy) *************** enum gf_mask { *** 102,107 **** --- 102,108 ---- GF_CALL_TAILCALL = 1 << 3, GF_CALL_VA_ARG_PACK = 1 << 4, GF_CALL_NOTHROW = 1 << 5, + GF_CALL_ALLOCA_FOR_VAR = 1 << 6, GF_OMP_PARALLEL_COMBINED = 1 << 0, /* True on an GIMPLE_OMP_RETURN statement if the return does not require *************** gimple_call_nothrow_p (gimple s) *** 2329,2334 **** --- 2330,2358 ---- return (gimple_call_flags (s) & ECF_NOTHROW) != 0; } + /* If FOR_VAR is true, GIMPLE_CALL S is a call to builtin_alloca that + is known to be emitted for VLA objects. Those are wrapped by + stack_save/stack_restore calls and hence can't lead to unbounded + stack growth even when they occur in loops. */ + + static inline void + gimple_call_set_alloca_for_var (gimple s, bool for_var) + { + GIMPLE_CHECK (s, GIMPLE_CALL); + if (for_var) + s->gsbase.subcode |= GF_CALL_ALLOCA_FOR_VAR; + else + s->gsbase.subcode &= ~GF_CALL_ALLOCA_FOR_VAR; + } + + /* Return true of S is a call to builtin_alloca emitted for VLA objects. */ + + static inline bool + gimple_call_alloca_for_var_p (gimple s) + { + GIMPLE_CHECK (s, GIMPLE_CALL); + return (s->gsbase.subcode & GF_CALL_ALLOCA_FOR_VAR) != 0; + } /* Copy all the GF_CALL_* flags from ORIG_CALL to DEST_CALL. */