Hi All,
This fixes the compile time regression noted by Andrew Pinski.
The compilation speed regression was caused by my patch requirig querying
fndecl_abi signifficantly more than previously. This fixes this by caching
the pcs in the machine_function struct.
Regr tested and bootstrapped on aarch64-linux-gnu.
Okay for master?
Alfie
-- >8 --
As aarch64_function_arg_regno_p is a very hot function called many times for a
function it should not call fndecl_abi every time as it is expensive and
unecessasary.
This caches the result and in doing so fixes a regression in compilation speed
introduced by r16-5076-g7197d8062fddc2.
gcc/ChangeLog:
* config/aarch64/aarch64.cc (aarch64_fndecl_abi): New function.
(aarch64_function_arg_regno_p): Update to use aarch64_fndecl_abi.
(aarch64_output_mi_thunk): Likewise.
(aarch64_is_variant_pcs): Likewise.
(aarch64_set_current_function): Update to initialize pcs value.
* config/aarch64/aarch64.h (enum arm_pcs): Move location earlier in
file.
(machine_function) Add pcs value.
---
gcc/config/aarch64/aarch64.cc | 29 +++++++++++++++++++++++++----
gcc/config/aarch64/aarch64.h | 31 +++++++++++++++++--------------
2 files changed, 42 insertions(+), 18 deletions(-)
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 60608a19078..2e6e468e62b 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -728,6 +728,22 @@ aarch64_merge_string_arguments (tree args, tree old_attr,
return !use_old_attr;
}
+/* Get the PCS for a decl and store the result to avoid needing to do
+ too many calls to fndecl_abi which can be expensive. */
+
+static arm_pcs
+aarch64_fndecl_abi (tree fn)
+{
+ gcc_assert (TREE_CODE (fn) == FUNCTION_DECL);
+ struct function *fun = DECL_STRUCT_FUNCTION (fn);
+ if (!fun)
+ return arm_pcs (fndecl_abi (fn).id ());
+ if (fun->machine->pcs == ARM_PCS_UNKNOWN)
+ fun->machine->pcs = arm_pcs (fndecl_abi (fn).id ());
+
+ return fun->machine->pcs;
+}
+
/* Check whether an 'aarch64_vector_pcs' attribute is valid. */
static tree
@@ -7896,8 +7912,7 @@ function_arg_preserve_none_regno_p (unsigned regno)
bool
aarch64_function_arg_regno_p (unsigned regno)
{
- enum arm_pcs pcs
- = cfun ? (arm_pcs) fndecl_abi (cfun->decl).id () : ARM_PCS_AAPCS64;
+ enum arm_pcs pcs = cfun ? aarch64_fndecl_abi (cfun->decl) : ARM_PCS_AAPCS64;
switch (pcs)
{
@@ -10764,7 +10779,7 @@ aarch64_output_mi_thunk (FILE *file, tree thunk
ATTRIBUTE_UNUSED,
funexp = XEXP (DECL_RTL (function), 0);
funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
auto isa_mode = aarch64_fntype_isa_mode (TREE_TYPE (function));
- auto pcs_variant = arm_pcs (fndecl_abi (function).id ());
+ auto pcs_variant = aarch64_fndecl_abi (function);
bool ir = lookup_attribute ("indirect_return",
TYPE_ATTRIBUTES (TREE_TYPE (function)));
rtx callee_abi = aarch64_gen_callee_cookie (isa_mode, pcs_variant, ir);
@@ -19735,6 +19750,7 @@ aarch64_init_machine_status (void)
{
struct machine_function *machine;
machine = ggc_cleared_alloc<machine_function> ();
+ machine->pcs = ARM_PCS_UNKNOWN;
return machine;
}
@@ -19891,6 +19907,11 @@ aarch64_set_current_function (tree fndecl)
aarch64_previous_fndecl = fndecl;
+ /* Initialize the PCS value to UNKNOWN. */
+ if (fndecl && TREE_CODE (fndecl) == FUNCTION_DECL)
+ if (function *fn = DECL_STRUCT_FUNCTION (fndecl))
+ fn->machine->pcs = ARM_PCS_UNKNOWN;
+
/* First set the target options. */
cl_target_option_restore (&global_options, &global_options_set,
TREE_TARGET_OPTION (new_tree));
@@ -25648,7 +25669,7 @@ static bool
aarch64_is_variant_pcs (tree fndecl)
{
/* Check for ABIs that preserve more registers than usual. */
- arm_pcs pcs = (arm_pcs) fndecl_abi (fndecl).id ();
+ arm_pcs pcs = aarch64_fndecl_abi (fndecl);
if (pcs == ARM_PCS_SIMD || pcs == ARM_PCS_SVE || pcs ==
ARM_PCS_PRESERVE_NONE)
return true;
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 0e596b59744..de0db894c7a 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -984,6 +984,20 @@ extern enum aarch64_cpu aarch64_tune;
#define DEFAULT_PCC_STRUCT_RETURN 0
+/* The set of available Procedure Call Stardards. */
+
+enum arm_pcs
+{
+ ARM_PCS_AAPCS64, /* Base standard AAPCS for 64 bit. */
+ ARM_PCS_SIMD, /* For aarch64_vector_pcs functions. */
+ ARM_PCS_SVE, /* For functions that pass or return
+ values in SVE registers. */
+ ARM_PCS_TLSDESC, /* For targets of tlsdesc calls. */
+ ARM_PCS_PRESERVE_NONE, /* PCS variant with no call-preserved
+ registers except X29. */
+ ARM_PCS_UNKNOWN
+};
+
#if defined(HAVE_POLY_INT_H) && defined(GCC_VEC_H)
struct GTY (()) aarch64_frame
{
@@ -1151,6 +1165,9 @@ typedef struct GTY (()) machine_function
/* During SEH output, this is non-null. */
struct seh_frame_state * GTY ((skip (""))) seh;
+
+ /* The Procedure Call Standard for the function. */
+ enum arm_pcs pcs;
} machine_function;
#endif
#endif
@@ -1168,20 +1185,6 @@ enum aarch64_abi_type
#define TARGET_ILP32 (aarch64_abi & AARCH64_ABI_ILP32)
-enum arm_pcs
-{
- ARM_PCS_AAPCS64, /* Base standard AAPCS for 64 bit. */
- ARM_PCS_SIMD, /* For aarch64_vector_pcs functions. */
- ARM_PCS_SVE, /* For functions that pass or return
- values in SVE registers. */
- ARM_PCS_TLSDESC, /* For targets of tlsdesc calls. */
- ARM_PCS_PRESERVE_NONE, /* PCS variant with no call-preserved
- registers except X29. */
- ARM_PCS_UNKNOWN
-};
-
-
-
/* We can't use machine_mode inside a generator file because it
hasn't been created yet; we shouldn't be using any code that
--
2.34.1