Hi Alfie, (just a head’s up)
This change breaks bootstrap on my darwin development branch. The issue is in aarch64_output_mi_thunk () and it is that - the thunk is referring to a virtual dtor - the compilation of the virtual dtor is “complete” and function.cc <http://function.cc/>:free_after_compilation () is called _before_ the thunk output happens. That resets cfun->machine to nullptr (and then we segv). I am not sure why this fires on Darwin and not on Linux - but it’s possible that the different handling of clones is responsible - since Darwin does not have strong symbol aliases, clones are real copies (but this is speculation, unproven). Anyway, I propose to resolve the issue as below … (at least in my local branch) - open to better solutions, of course no action needed - as said, just a head’s up. thanks Iain > On 12 Dec 2025, at 20:44, Andrew Pinski <[email protected]> > wrote: > > On Tue, Nov 18, 2025 at 9:03 AM Alfie Richards <[email protected]> wrote: >> >> Hi All, >> >> This fixes the compile time regression noted by Andrew Pinski. >> >> The compilation speed regression was caused by my patch requirig querying >> fndecl_abi signifficantly more than previously. This fixes this by caching >> the pcs in the machine_function struct. >> >> Regr tested and bootstrapped on aarch64-linux-gnu. >> >> Okay for master? > > Just to confirm this did fix the regression and thanks again for > looking into the compile time regression. > > links to the graphs: > -O0: https://lnt.opensuse.org/db_default/v4/CPP/graph?plot.0=358.576.8 > -Og: https://lnt.opensuse.org/db_default/v4/CPP/graph?plot.0=349.576.8 > > Thanks, > Andrew Pinski > >> >> Alfie >> >> -- >8 -- >> >> As aarch64_function_arg_regno_p is a very hot function called many times for >> a >> function it should not call fndecl_abi every time as it is expensive and >> unecessasary. >> >> This caches the result and in doing so fixes a regression in compilation >> speed >> introduced by r16-5076-g7197d8062fddc2. >> >> gcc/ChangeLog: >> >> * config/aarch64/aarch64.cc (aarch64_fndecl_abi): New function. >> (aarch64_function_arg_regno_p): Update to use aarch64_fndecl_abi. >> (aarch64_output_mi_thunk): Likewise. >> (aarch64_is_variant_pcs): Likewise. >> (aarch64_set_current_function): Update to initialize pcs value. >> * config/aarch64/aarch64.h (enum arm_pcs): Move location earlier in >> file. >> (machine_function) Add pcs value. >> --- >> gcc/config/aarch64/aarch64.cc | 29 +++++++++++++++++++++++++---- >> gcc/config/aarch64/aarch64.h | 31 +++++++++++++++++-------------- >> 2 files changed, 42 insertions(+), 18 deletions(-) >> >> diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc >> index 60608a19078..2e6e468e62b 100644 >> --- a/gcc/config/aarch64/aarch64.cc >> +++ b/gcc/config/aarch64/aarch64.cc >> @@ -728,6 +728,22 @@ aarch64_merge_string_arguments (tree args, tree >> old_attr, >> return !use_old_attr; >> } >> >> +/* Get the PCS for a decl and store the result to avoid needing to do >> + too many calls to fndecl_abi which can be expensive. */ >> + >> +static arm_pcs >> +aarch64_fndecl_abi (tree fn) >> +{ >> + gcc_assert (TREE_CODE (fn) == FUNCTION_DECL); ^^ I wonder if the asserts could be gcc_checking_asserts() if this is hot code. >> + struct function *fun = DECL_STRUCT_FUNCTION (fn); >> + if (!fun) I am going to make this ^^^ if (!fun || !fun->machine) >> + return arm_pcs (fndecl_abi (fn).id ()); >> + if (fun->machine->pcs == ARM_PCS_UNKNOWN) >> + fun->machine->pcs = arm_pcs (fndecl_abi (fn).id ()); >> + >> + return fun->machine->pcs; >> +} >> + >> /* Check whether an 'aarch64_vector_pcs' attribute is valid. */ >> >> static tree >> @@ -7896,8 +7912,7 @@ function_arg_preserve_none_regno_p (unsigned regno) >> bool >> aarch64_function_arg_regno_p (unsigned regno) >> { >> - enum arm_pcs pcs >> - = cfun ? (arm_pcs) fndecl_abi (cfun->decl).id () : ARM_PCS_AAPCS64; >> + enum arm_pcs pcs = cfun ? aarch64_fndecl_abi (cfun->decl) : >> ARM_PCS_AAPCS64; >> >> switch (pcs) >> { >> @@ -10764,7 +10779,7 @@ aarch64_output_mi_thunk (FILE *file, tree thunk >> ATTRIBUTE_UNUSED, >> funexp = XEXP (DECL_RTL (function), 0); >> funexp = gen_rtx_MEM (FUNCTION_MODE, funexp); >> auto isa_mode = aarch64_fntype_isa_mode (TREE_TYPE (function)); >> - auto pcs_variant = arm_pcs (fndecl_abi (function).id ()); >> + auto pcs_variant = aarch64_fndecl_abi (function); >> bool ir = lookup_attribute ("indirect_return", >> TYPE_ATTRIBUTES (TREE_TYPE (function))); >> rtx callee_abi = aarch64_gen_callee_cookie (isa_mode, pcs_variant, ir); >> @@ -19735,6 +19750,7 @@ aarch64_init_machine_status (void) >> { >> struct machine_function *machine; >> machine = ggc_cleared_alloc<machine_function> (); >> + machine->pcs = ARM_PCS_UNKNOWN; >> return machine; >> } >> >> @@ -19891,6 +19907,11 @@ aarch64_set_current_function (tree fndecl) >> >> aarch64_previous_fndecl = fndecl; >> >> + /* Initialize the PCS value to UNKNOWN. */ >> + if (fndecl && TREE_CODE (fndecl) == FUNCTION_DECL) >> + if (function *fn = DECL_STRUCT_FUNCTION (fndecl)) >> + fn->machine->pcs = ARM_PCS_UNKNOWN; >> + >> /* First set the target options. */ >> cl_target_option_restore (&global_options, &global_options_set, >> TREE_TARGET_OPTION (new_tree)); >> @@ -25648,7 +25669,7 @@ static bool >> aarch64_is_variant_pcs (tree fndecl) >> { >> /* Check for ABIs that preserve more registers than usual. */ >> - arm_pcs pcs = (arm_pcs) fndecl_abi (fndecl).id (); >> + arm_pcs pcs = aarch64_fndecl_abi (fndecl); >> if (pcs == ARM_PCS_SIMD || pcs == ARM_PCS_SVE || pcs == >> ARM_PCS_PRESERVE_NONE) >> return true; >> >> diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h >> index 0e596b59744..de0db894c7a 100644 >> --- a/gcc/config/aarch64/aarch64.h >> +++ b/gcc/config/aarch64/aarch64.h >> @@ -984,6 +984,20 @@ extern enum aarch64_cpu aarch64_tune; >> >> #define DEFAULT_PCC_STRUCT_RETURN 0 >> >> +/* The set of available Procedure Call Stardards. */ >> + >> +enum arm_pcs >> +{ >> + ARM_PCS_AAPCS64, /* Base standard AAPCS for 64 bit. */ >> + ARM_PCS_SIMD, /* For aarch64_vector_pcs functions. >> */ >> + ARM_PCS_SVE, /* For functions that pass or return >> + values in SVE registers. */ >> + ARM_PCS_TLSDESC, /* For targets of tlsdesc calls. */ >> + ARM_PCS_PRESERVE_NONE, /* PCS variant with no call-preserved >> + registers except X29. */ >> + ARM_PCS_UNKNOWN >> +}; >> + >> #if defined(HAVE_POLY_INT_H) && defined(GCC_VEC_H) >> struct GTY (()) aarch64_frame >> { >> @@ -1151,6 +1165,9 @@ typedef struct GTY (()) machine_function >> >> /* During SEH output, this is non-null. */ >> struct seh_frame_state * GTY ((skip (""))) seh; >> + >> + /* The Procedure Call Standard for the function. */ >> + enum arm_pcs pcs; >> } machine_function; >> #endif >> #endif >> @@ -1168,20 +1185,6 @@ enum aarch64_abi_type >> >> #define TARGET_ILP32 (aarch64_abi & AARCH64_ABI_ILP32) >> >> -enum arm_pcs >> -{ >> - ARM_PCS_AAPCS64, /* Base standard AAPCS for 64 bit. */ >> - ARM_PCS_SIMD, /* For aarch64_vector_pcs functions. >> */ >> - ARM_PCS_SVE, /* For functions that pass or return >> - values in SVE registers. */ >> - ARM_PCS_TLSDESC, /* For targets of tlsdesc calls. */ >> - ARM_PCS_PRESERVE_NONE, /* PCS variant with no call-preserved >> - registers except X29. */ >> - ARM_PCS_UNKNOWN >> -}; >> - >> - >> - >> >> /* We can't use machine_mode inside a generator file because it >> hasn't been created yet; we shouldn't be using any code that >> -- >> 2.34.1 >>
