On 06/11/2025 09:51, Tamar Christina wrote:
-----Original Message----- From: Alfie Richards <[email protected]> Sent: 06 November 2025 09:47 To: Tamar Christina <[email protected]>; [email protected] Cc: Richard Earnshaw <[email protected]>; [email protected]; Alice Carlotti <[email protected]>; Alex Coplan <[email protected]>; Wilco Dijkstra <[email protected]>; [email protected]; [email protected]; [email protected]; [email protected] Subject: Re: [PATCH v2] aarch64: Add support for preserve_none function attribute [PR target/118328]The 11/06/2025 08:00, Tamar Christina wrote:Hi Alfie,-----Original Message----- From: Alfie Richards <[email protected]> Sent: 23 October 2025 14:31 To: [email protected] Cc: Richard Earnshaw <[email protected]>; Tamar Christina <[email protected]>; [email protected]; Alice Carlotti <[email protected]>; Alex Coplan <[email protected]>; Wilco Dijkstra <[email protected]>; [email protected]; [email protected]; [email protected]; [email protected]; Alfie Richards<[email protected]>Subject: [PATCH v2] aarch64: Add support for preserve_none function attribute [PR target/118328] Hi all, Updated the documentation part of this patch after some feedback. Functional code unchanged. Reg tested on AArch64. Ok for master? Alfie -- >8 -- When applied to a function preserve_none changes the procedure call standard such that all registers except stack pointer, frame register, and link register are caller saved. Additionally, changes the argument passing registers. PR target/118328 gcc/ChangeLog: * config/aarch64/aarch64.cc (handle_aarch64_vector_pcs_attribute): Add handling for ARM_PCS_PRESERVE_NONE. (aarch64_pcs_exclusions): New definition. (aarch64_gnu_attributes): Add entry for preserve_none and add aarch64_pcs_exclusions to aarch64_vector_pcs entry. (aarch64_preserve_none_abi): New function. (aarch64_fntype_abi): Add handling for preserve_none. (aarch64_reg_save_mode): Add handling for ARM_PCS_PRESERVE_NONE. (aarch64_hard_regno_call_part_clobbered): Add handling for ARM_PCS_PRESERVE_NONE. (num_pcs_arg_regs): New helper function. (get_pcs_arg_reg): New helper function. (aarch64_function_ok_for_sibcall): Add handling for ARM_PCS_PRESERVE_NONE. (aarch64_layout_arg): Add preserve_none argument lauout.. (function_arg_preserve_none_regno_p): New helper function. (aarch64_function_arg): Update to handle preserve_none. (function_arg_preserve_none_regno_p): Update logic for preserve_none. (aarch64_expand_builtin_va_start): Add preserve_none layout. (aarch64_setup_incoming_varargs): Add preserve_none layout. (aarch64_is_variant_pcs): Update for case of ARM_PCS_PRESERVE_NONE. (aarch64_comp_type_attributes): Add preserve_none. * config/aarch64/aarch64.h (NUM_PRESERVE_NONE_ARG_REGS): New macro. (PRESERVE_NONE_REGISTERS): New macro. (enum arm_pcs): Add ARM_PCS_PRESERVE_NONE. * doc/extend.texi (preserve_none): Add docs for new attribute. gcc/testsuite/ChangeLog: * gcc.target/aarch64/preserve_none_1.c: New test. * gcc.target/aarch64/preserve_none_2.c: New test. * gcc.target/aarch64/preserve_none_3.c: New test. * gcc.target/aarch64/preserve_none_4.c: New test. * gcc.target/aarch64/preserve_none_5.c: New test. * gcc.target/aarch64/preserve_none_6.c: New test. --- gcc/config/aarch64/aarch64.cc | 178 ++++++++++++++++-- gcc/config/aarch64/aarch64.h | 13 ++ gcc/doc/extend.texi | 18 ++ .../gcc.target/aarch64/preserve_none_1.c | 142 ++++++++++++++ .../gcc.target/aarch64/preserve_none_2.c | 49 +++++ .../gcc.target/aarch64/preserve_none_3.c | 109 +++++++++++ .../gcc.target/aarch64/preserve_none_4.c | 93 +++++++++ .../gcc.target/aarch64/preserve_none_5.c | 45 +++++ .../gcc.target/aarch64/preserve_none_6.c | 66 +++++++ 9 files changed, 693 insertions(+), 20 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/preserve_none_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/preserve_none_2.c create mode 100644 gcc/testsuite/gcc.target/aarch64/preserve_none_3.c create mode 100644 gcc/testsuite/gcc.target/aarch64/preserve_none_4.c create mode 100644 gcc/testsuite/gcc.target/aarch64/preserve_none_5.c create mode 100644 gcc/testsuite/gcc.target/aarch64/preserve_none_6.c diff --git a/gcc/config/aarch64/aarch64.ccb/gcc/config/aarch64/aarch64.ccindex b86064148fe..5bab2807c2a 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -749,6 +749,8 @@ handle_aarch64_vector_pcs_attribute (tree*node,tree name, tree, *no_add_attrs = true; return NULL_TREE; + /* Rely on the exclusions list for preserve_none. */ + case ARM_PCS_PRESERVE_NONE: case ARM_PCS_TLSDESC: case ARM_PCS_UNKNOWN: break; @@ -851,6 +853,16 @@ handle_arm_shared (tree *node, tree name, tree args, return NULL_TREE; } +/* Mutually-exclusive function type attributes for various PCS variants. */ +static const struct attribute_spec::exclusions aarch64_pcs_exclusions[] = +{ + /* Attribute name exclusion applies to: + function, type, variable */ + { "aarch64_vector_pcs", false, true, false }, + { "preserve_none", false, true, false }, + { NULL, false, false, false } +}; + /* Mutually-exclusive function type attributes for controlling PSTATE.SM.*/static const struct attribute_spec::exclusions attr_streaming_exclusions[] = { @@ -867,7 +879,10 @@ static const attribute_spec aarch64_gnu_attributes[] = /* { name, min_len, max_len, decl_req, type_req, fn_type_req, affects_type_identity, handler, exclude } */ { "aarch64_vector_pcs", 0, 0, false, true, true, true, - handle_aarch64_vector_pcs_attribute, NULL }, + handle_aarch64_vector_pcs_attribute, + aarch64_pcs_exclusions }, + { "preserve_none", 0, 0, false, true, true, true, NULL, + aarch64_pcs_exclusions }, { "indirect_return", 0, 0, false, true, true, true, NULL, NULL }, { "arm_sve_vector_bits", 1, 1, false, true, false, true, aarch64_sve::handle_arm_sve_vector_bits_attribute, @@ -1317,6 +1332,23 @@ aarch64_sve_abi (void) return sve_abi; } +/* Return the descriptor of the preserve_none PCS. */ + +static const predefined_function_abi & +aarch64_preserve_none_abi (void) +{ + auto &preserve_none_abi = function_abis[ARM_PCS_PRESERVE_NONE]; + if (!preserve_none_abi.initialized_p ()) + { + HARD_REG_SET preserved_regs = {}; + if (!CALL_USED_X18) + SET_HARD_REG_BIT (preserved_regs, R18_REGNUM); + auto full_reg_clobbers = reg_class_contents[ALL_REGS] & ~preserved_regs; + preserve_none_abi.initialize (ARM_PCS_PRESERVE_NONE, full_reg_clobbers); + } + return preserve_none_abi; +} + /* If X is an UNSPEC_SALT_ADDR expression, return the address that it wraps, otherwise return X itself. */ @@ -2312,6 +2344,9 @@ aarch64_fntype_abi (const_tree fntype) if (lookup_attribute ("aarch64_vector_pcs", TYPE_ATTRIBUTES (fntype))) return aarch64_simd_abi (); + if (lookup_attribute ("preserve_none", TYPE_ATTRIBUTES (fntype))) + return aarch64_preserve_none_abi (); + if (aarch64_returns_value_in_sve_regs_p (fntype) || aarch64_takes_arguments_in_sve_regs_p (fntype)) return aarch64_sve_abi (); @@ -2519,6 +2554,9 @@ aarch64_reg_save_mode (unsigned int regno) if (FP_REGNUM_P (regno)) switch (crtl->abi->id ()) { + case ARM_PCS_PRESERVE_NONE: + /* We should never save FPRs for preserve_none, but nevertheless + treat it like the base PCS for consistency. */This comment is a bit confusing to me as the documentation you addedstatesthat FPRs are saved as callee saves by design. So in that sense addingpreservenone here makes sense, but the comment is confusing.Ah yeah thats from the original prototype by Richard S and I didn't check properly.
Appologies here btw, I replied too quickly here without properly thinking. This comment is actually correct.preserve_none changes all the registers (including FP and SIMD) to be caller saved (exluding LR SP).
As this code is to get the mode for storing callee saved registers it's return value should not matter for a preserve_none function.
case ARM_PCS_AAPCS64: /* Only the low 64 bits are saved by the base PCS. */ return DFmode; @@ -2649,7 +2687,9 @@ aarch64_hard_regno_call_part_clobbered (unsigned int abi_id, unsigned int regno, machine_mode mode) { - if (FP_REGNUM_P (regno) && abi_id != ARM_PCS_SVE) + if (FP_REGNUM_P (regno) + && abi_id != ARM_PCS_SVE + && abi_id != ARM_PCS_PRESERVE_NONE) { poly_int64 per_register_size = GET_MODE_SIZE (mode); unsigned int nregs = hard_regno_nregs (regno, mode); @@ -6826,6 +6866,10 @@ aarch64_function_ok_for_sibcall (tree, treeexp)auto from_abi = crtl->abi->id (); auto to_abi = expr_callee_abi (exp).id (); + /* preserve_none functions can tail-call anything that the base PCS can.*/+ if (from_abi != to_abi && from_abi == ARM_PCS_PRESERVE_NONE) + from_abi = ARM_PCS_AAPCS64; + /* ARM_PCS_SVE preserves strictly more than ARM_PCS_SIMD, which in turn preserves strictly more than the base PCS. The callee must preserve everything that the caller is required to preserve. */ @@ -7287,6 +7331,49 @@ bitint_or_aggr_of_bitint_p (tree type) return false; } +/* How many GPR are available for argument passing in the procedure call + standard. */ +static int +num_pcs_arg_regs (enum arm_pcs pcs) +{ + switch (pcs) + { + case ARM_PCS_PRESERVE_NONE: + return NUM_PRESERVE_NONE_ARG_REGS; + case ARM_PCS_AAPCS64: + case ARM_PCS_SIMD: + case ARM_PCS_SVE: + case ARM_PCS_TLSDESC: + case ARM_PCS_UNKNOWN: + return NUM_ARG_REGS; + } + gcc_unreachable (); +} + +/* Get the NUM'th GPR argument passing register from the PCSprocedurecall + * standard. */ + +static int +get_pcs_arg_reg (enum arm_pcs pcs, int num) +{ + static const int ARM_PCS_PRESERVE_NONE_REGISTERS[] = PRESERVE_NONE_REGISTERS; + + gcc_assert (num < num_pcs_arg_regs (pcs)); + + switch (pcs) + { + case ARM_PCS_PRESERVE_NONE: + return ARM_PCS_PRESERVE_NONE_REGISTERS[num]; + case ARM_PCS_AAPCS64: + case ARM_PCS_SIMD: + case ARM_PCS_SVE: + case ARM_PCS_TLSDESC: + case ARM_PCS_UNKNOWN: + return R0_REGNUM + num; + } + gcc_unreachable (); +} + /* Layout a function argument according to the AAPCS64 rules. The rule numbers refer to the rule numbers in the AAPCS64. ORIG_MODE is the mode that was originally given to us by the target hook, whereas the @@ -7385,7 +7472,9 @@ aarch64_layout_arg (cumulative_args_tpcum_v,const function_arg_info &arg) unprototyped function. There is no ABI-defined location we can return in this case, so we have no real choice but to raise an error immediately, even though this is only a query function. */ - if (arg.named && pcum->pcs_variant != ARM_PCS_SVE) + if (arg.named + && pcum->pcs_variant != ARM_PCS_SVE + && pcum->pcs_variant != ARM_PCS_PRESERVE_NONE) { gcc_assert (!pcum->silent_p); error ("SVE type %qT cannot be passed to an unprototyped function", @@ -7400,7 +7489,6 @@ aarch64_layout_arg (cumulative_args_tpcum_v,const function_arg_info &arg) pcum->aapcs_nextnvrn = pcum->aapcs_nvrn + pst_info.num_zr (); pcum->aapcs_nextnprn = pcum->aapcs_nprn + pst_info.num_pr (); gcc_assert (arg.named - && pcum->pcs_variant == ARM_PCS_SVE && pcum->aapcs_nextnvrn <= NUM_FP_ARG_REGS && pcum->aapcs_nextnprn <= NUM_PR_ARG_REGS); pcum->aapcs_reg = pst_info.get_rtx (mode, V0_REGNUM + pcum-aapcs_nvrn,@@ -7514,7 +7602,7 @@ aarch64_layout_arg (cumulative_args_tpcum_v,const function_arg_info &arg) /* C6 - C9. though the sign and zero extension semantics are handled elsewhere. This is the case where the argument fits entirely general registers. */ - if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS)) + if (allocate_ncrn && (ncrn + nregs <= num_pcs_arg_regs (pcum-pcs_variant))){ gcc_assert (nregs == 0 || nregs == 1 || nregs == 2); @@ -7550,7 +7638,7 @@ aarch64_layout_arg (cumulative_args_tpcum_v,const function_arg_info &arg) inform (input_location, "parameter passing for argument of type " "%qT changed in GCC 9.1", type); ++ncrn; - gcc_assert (ncrn + nregs <= NUM_ARG_REGS); + gcc_assert (ncrn + nregs <= num_pcs_arg_regs (pcum-pcs_variant));} } @@ -7572,7 +7660,8 @@ aarch64_layout_arg (cumulative_args_tpcum_v,const function_arg_info &arg) if (nregs == 0 || (nregs == 1 && !sve_p) || GET_MODE_CLASS (mode) == MODE_INT) - pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn); + pcum->aapcs_reg + = gen_rtx_REG (mode, get_pcs_arg_reg (pcum->pcs_variant, ncrn)); else { rtx par; @@ -7584,7 +7673,8 @@ aarch64_layout_arg (cumulative_args_tpcum_v,const function_arg_info &arg) scalar_int_mode reg_mode = word_mode; if (nregs == 1) reg_mode = int_mode_for_mode (mode).require (); - rtx tmp = gen_rtx_REG (reg_mode, R0_REGNUM + ncrn + i); + int reg = get_pcs_arg_reg (pcum->pcs_variant, ncrn + i); + rtx tmp = gen_rtx_REG (reg_mode, reg); tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, GEN_INT (i * UNITS_PER_WORD)); XVECEXP (par, 0, i) = tmp; @@ -7597,7 +7687,7 @@ aarch64_layout_arg (cumulative_args_tpcum_v,const function_arg_info &arg) } /* C.11 */ - pcum->aapcs_nextncrn = NUM_ARG_REGS; + pcum->aapcs_nextncrn = num_pcs_arg_regs (pcum->pcs_variant); /* The argument is passed on stack; record the needed number of wordsforthis argument and align the total size if necessary. */ @@ -7675,7 +7765,8 @@ aarch64_function_arg (cumulative_args_tpcum_v,const function_arg_info &arg) CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v); gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64 || pcum->pcs_variant == ARM_PCS_SIMD - || pcum->pcs_variant == ARM_PCS_SVE); + || pcum->pcs_variant == ARM_PCS_SVE + || pcum->pcs_variant == ARM_PCS_PRESERVE_NONE); if (arg.end_marker_p ()) { @@ -7767,7 +7858,8 @@ aarch64_function_arg_advance (cumulative_args_t pcum_v, CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v); if (pcum->pcs_variant == ARM_PCS_AAPCS64 || pcum->pcs_variant == ARM_PCS_SIMD - || pcum->pcs_variant == ARM_PCS_SVE) + || pcum->pcs_variant == ARM_PCS_SVE + || pcum->pcs_variant == ARM_PCS_PRESERVE_NONE) { aarch64_layout_arg (pcum_v, arg); gcc_assert ((pcum->aapcs_reg != NULL_RTX) @@ -7786,13 +7878,41 @@ aarch64_function_arg_advance (cumulative_args_t pcum_v, } } -bool -aarch64_function_arg_regno_p (unsigned regno) +/* Checks if a register is live at entry of a preserve_none pcs function. + That is, it used for passing registers. See ARM_PCS_PRESERVE_NONE_REGISTERS + for full list and order of argument passing registers. */ + +static bool +function_arg_preserve_none_regno_p (unsigned regno) { - return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS) + return ((GP_REGNUM_P (regno) && regno != R8_REGNUM && regno != R15_REGNUM + && regno != R16_REGNUM && regno != R17_REGNUM && regno != R18_REGNUM + && regno != R19_REGNUM && regno != R29_REGNUM && regno != R30_REGNUM) || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS) || (PR_REGNUM_P (regno) && regno < P0_REGNUM + NUM_PR_ARG_REGS)); } +/* Implements FUNCTION_ARG_REGNO_P. */ +bool +aarch64_function_arg_regno_p (unsigned regno) +{ + enum arm_pcs pcs + = cfun ? (arm_pcs) fndecl_abi (cfun->decl).id () : ARM_PCS_AAPCS64; + + switch (pcs) + { + case ARM_PCS_AAPCS64: + case ARM_PCS_SIMD: + case ARM_PCS_SVE: + case ARM_PCS_TLSDESC: + case ARM_PCS_UNKNOWN: + return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS) + || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS) + || (PR_REGNUM_P (regno) && regno < P0_REGNUM + NUM_PR_ARG_REGS)); + case ARM_PCS_PRESERVE_NONE: + return function_arg_preserve_none_regno_p (regno); + } + gcc_unreachable (); +} /* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least PARM_BOUNDARY bits of alignment, but will be given anything up @@ -21804,8 +21924,9 @@ aarch64_expand_builtin_va_start (treevalist, rtxnextarg ATTRIBUTE_UNUSED) cum = &crtl->args.info; if (cfun->va_list_gpr_size) - gr_save_area_size = MIN ((NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD, - cfun->va_list_gpr_size); + gr_save_area_size = MIN ((num_pcs_arg_regs (cum->pcs_variant) + - cum->aapcs_ncrn) + * UNITS_PER_WORD, cfun->va_list_gpr_size); if (cfun->va_list_fpr_size) vr_save_area_size = MIN ((NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG, cfun->va_list_fpr_size); @@ -22190,7 +22311,8 @@ aarch64_setup_incoming_varargs (cumulative_args_t cum_v, /* Found out how many registers we need to save. Honor tree-stdvar analysis results. */ if (cfun->va_list_gpr_size) - gr_saved = MIN (NUM_ARG_REGS - local_cum.aapcs_ncrn, + gr_saved = MIN (num_pcs_arg_regs (local_cum.pcs_variant) + - local_cum.aapcs_ncrn, cfun->va_list_gpr_size / UNITS_PER_WORD); if (cfun->va_list_fpr_size) vr_saved = MIN (NUM_FP_ARG_REGS - local_cum.aapcs_nvrn, @@ -22214,8 +22336,22 @@ aarch64_setup_incoming_varargs (cumulative_args_t cum_v, mem = gen_frame_mem (BLKmode, ptr); set_mem_alias_set (mem, get_varargs_alias_set ()); - move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM, - mem, gr_saved); + /* For preserve_none pcs we can't use move_block_from_reg as the + argument passing register order is not consecutive. */ + if (local_cum.pcs_variant == ARM_PCS_PRESERVE_NONE) + { + for (int i = 0; i < gr_saved; ++i) + { + rtx tem = operand_subword (mem, i, 1, BLKmode); + gcc_assert (tem); + int reg = get_pcs_arg_reg (local_cum.pcs_variant, + local_cum.aapcs_ncrn + i); + emit_move_insn (tem, gen_rtx_REG (word_mode, reg)); + } + } + else + move_block_from_reg (R0_REGNUM + local_cum.aapcs_ncrn, mem, + gr_saved); } if (vr_saved > 0) { @@ -25521,7 +25657,7 @@ aarch64_is_variant_pcs (tree fndecl) { /* Check for ABIs that preserve more registers than usual. */ arm_pcs pcs = (arm_pcs) fndecl_abi (fndecl).id (); - if (pcs == ARM_PCS_SIMD || pcs == ARM_PCS_SVE) + if (pcs == ARM_PCS_SIMD || pcs == ARM_PCS_SVE || pcs == ARM_PCS_PRESERVE_NONE) return true; /* Check for ABIs that allow PSTATE.SM to be 1 on entry. */ @@ -30252,6 +30388,8 @@ aarch64_comp_type_attributes (const_tree type1, const_tree type2) if (!check_attr ("gnu", "aarch64_vector_pcs")) return 0; + if (!check_attr ("gnu", "preserve_none")) + return 0; if (!check_attr ("gnu", "indirect_return")) return 0; if (!check_attr ("gnu", "Advanced SIMD type")) diff --git a/gcc/config/aarch64/aarch64.hb/gcc/config/aarch64/aarch64.hindex 2cd929d83f9..79528696da0 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h @@ -696,6 +696,17 @@ through +ssve-fp8dot2. */ #define NUM_FP_ARG_REGS 8 #define NUM_PR_ARG_REGS 4 +/* The argument passing regs for preserve_none pcs. */ +#define NUM_PRESERVE_NONE_ARG_REGS 23 +#define PRESERVE_NONE_REGISTERS \ +{ \ + R20_REGNUM, R21_REGNUM, R22_REGNUM, R23_REGNUM, R24_REGNUM, R25_REGNUM,\ + R26_REGNUM, R27_REGNUM, R28_REGNUM,\ + R0_REGNUM, R1_REGNUM, R2_REGNUM, R3_REGNUM, R4_REGNUM, R5_REGNUM,\ + R6_REGNUM, R7_REGNUM,\ + R10_REGNUM, R11_REGNUM, R12_REGNUM, R13_REGNUM, R14_REGNUM, R9_REGNUM\ +} +According to the LLVM documentation on this[1], R15 is also an argumentpassingregister on non-Windows. So I think it should be here conditionally? [1] https://clang.llvm.org/docs/AttributeReference.html#preserve-noneAh thank you for catching that. My mistake. Will fix./* A Homogeneous Floating-Point or Short-Vector Aggregate may have at most four members. */ #define HA_MAX_NUM_FLDS 4 @@ -1150,6 +1161,8 @@ enum arm_pcs ARM_PCS_SVE, /* For functions that pass or return values in SVE registers. */ ARM_PCS_TLSDESC, /* For targets of tlsdesc calls. */ + ARM_PCS_PRESERVE_NONE, /* PCS variant with no call-preserved + registers except X29. */ ARM_PCS_UNKNOWN }; diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index fb117f59665..6643c00e11e 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -3930,6 +3930,24 @@ threads, such as the POSIX@code{swapcontext}function. This attribute adds a @code{BTI J} instruction when BTI is enabled e.g. via @option{-mbranch-protection}. +@cindex @code{preserve_none} function attribute, AArch64 +@item preserve_none +Use this attribute to change the procedure call standard of the specified +function to the preserve-none variant. + +The preserve-none ABI variant modifies the AAPCS such that has no +callee-saved registers (including SIMD and floating-point registers). +That is, all registers, except for stack register, +link register (r30), and frame pointer (r29), are shifted to be caller saved, +and can be used as scratch registers by the callee. +The LLVM documentation says they are callee saved though, not caller.The clang documentation says "So all general registers are caller saved registers." which matches here (though I need to update the language for fp/simd registers).But the AArch64 specific documentation below that states: On AArch64, only LR and FP are preserved by the callee. So there's an exclusion for AArch64 isn't there?
I dont believe so. Caller saved and callee saved are the opposites. So saying "general registers are caller saved" and "only LR and SP are callee saved" is basically equivalent.
Thanks, Alfie>
Thanks, TamarIt also deifinitely should be making registers caller saved either way.So one of these seem odd.+Additionally, registers r20--r28, r0--r7, r10--r14, and finally r9 are usedfor+argument passing, in that order. The return value registers remain r0 andr1.+r15 is also an argument passing on non-Windows isn't it?Will fix.Thanks, Tamar+All other details are the same as for the AAPCS ABI. + +This ABI has not been stabilized, and may be subject to change in future +versions. @end table The above target attributes can be specified as follows: diff --git a/gcc/testsuite/gcc.target/aarch64/preserve_none_1.c b/gcc/testsuite/gcc.target/aarch64/preserve_none_1.c new file mode 100644 index 00000000000..a411af23256 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/preserve_none_1.c @@ -0,0 +1,142 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +void normal_callee(); +void preserve_none_callee() [[gnu::preserve_none]]; + +#pragma GCC target "+sve" + +/* +** preserve_none_caller1: +** ?#APP +** nop +** ?#NO_APP +** ret +*/ +void preserve_none_caller1() [[gnu::preserve_none]] +{ + asm volatile ("nop" ::: "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", + "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", + "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", + "x24", "x25", "x26", "x27", "x28", + + "z0", "z1", "z2", "z3", "z4", "z5", "z6", "z7", + "z8", "z9", "z10", "z11", "z12", "z13", "z14", "z15", + "z16", "z17", "z18", "z19", "z20", "z21", "z22", "z23", + "z24", "z25", "z26", "z27", "z28", "z29", "z30", "z31", + + "p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7", + "p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15"); +} + +/* +** preserve_none_caller2: +** stp x29, x30, \[sp, #?-16\]! +** mov x29, sp +** bl normal_callee +** mov w0, w20 +** ldp x29, x30, \[sp\], #?16 +** ret +*/ +int preserve_none_caller2(int x) [[gnu::preserve_none]] +{ + normal_callee(); + return x; +} + +/* +** preserve_none_caller3: +** stp x29, x30, \[sp, #?-32\]! +** mov x29, sp +** str w20, \[sp, #?[0-9]+\] +** bl preserve_none_callee +** ldr w0, \[sp, #?[0-9]+\] +** ldp x29, x30, \[sp\], #?32 +** ret +*/ +int preserve_none_caller3(int x) [[gnu::preserve_none]] +{ + preserve_none_callee(); + return x; +} + +/* +** preserve_none_caller4: +** b preserve_none_callee +*/ +void preserve_none_caller4() [[gnu::preserve_none]] +{ + preserve_none_callee(); +} + +/* +** preserve_none_caller5: +** b preserve_none_callee +*/ +void preserve_none_caller5(__SVBool_t x) [[gnu::preserve_none]] +{ + preserve_none_callee(); +} + +/* +** normal_caller1: +** stp x29, x30, \[sp, #?-160\]! +** mov x29, sp +** stp x19, x20, \[sp, #?16\] +** stp x21, x22, \[sp, #?32\] +** stp x23, x24, \[sp, #?48\] +** stp x25, x26, \[sp, #?64\] +** stp x27, x28, \[sp, #?80\] +** stp d8, d9, \[sp, #?96\] +** stp d10, d11, \[sp, #?112\] +** stp d12, d13, \[sp, #?128\] +** stp d14, d15, \[sp, #?144\] +** bl preserve_none_callee +** ldp d8, d9, \[sp, #?96\] +** ldp d10, d11, \[sp, #?112\] +** ldp d12, d13, \[sp, #?128\] +** ldp d14, d15, \[sp, #?144\] +** ldp x19, x20, \[sp, #?16\] +** ldp x21, x22, \[sp, #?32\] +** ldp x23, x24, \[sp, #?48\] +** ldp x25, x26, \[sp, #?64\] +** ldp x27, x28, \[sp, #?80\] +** ldp x29, x30, \[sp\], #?160 +** ret +*/ +void normal_caller1() +{ + preserve_none_callee(); +} + +/* +** normal_caller2: +** stp x29, x30, \[sp, #?-160\]! +** mov x29, sp +** stp x19, x20, \[sp, #?16\] +** stp x21, x22, \[sp, #?32\] +** stp x23, x24, \[sp, #?48\] +** stp x25, x26, \[sp, #?64\] +** stp x27, x28, \[sp, #?80\] +** stp d8, d9, \[sp, #?96\] +** stp d10, d11, \[sp, #?112\] +** stp d12, d13, \[sp, #?128\] +** stp d14, d15, \[sp, #?144\] +** blr x0 +** ldp d8, d9, \[sp, #?96\] +** ldp d10, d11, \[sp, #?112\] +** ldp d12, d13, \[sp, #?128\] +** ldp d14, d15, \[sp, #?144\] +** ldp x19, x20, \[sp, #?16\] +** ldp x21, x22, \[sp, #?32\] +** ldp x23, x24, \[sp, #?48\] +** ldp x25, x26, \[sp, #?64\] +** ldp x27, x28, \[sp, #?80\] +** ldp x29, x30, \[sp\], #?160 +** ret +*/ +void normal_caller2(void (*callee)() [[gnu::preserve_none]]) +{ + callee(); +} diff --git a/gcc/testsuite/gcc.target/aarch64/preserve_none_2.c b/gcc/testsuite/gcc.target/aarch64/preserve_none_2.c new file mode 100644 index 00000000000..1bb89e026e5 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/preserve_none_2.c @@ -0,0 +1,49 @@ +/* { dg-options "" } */ + +void multi1() [[gnu::aarch64_vector_pcs, gnu::preserve_none]]; /* { dg- warning {ignoring attribute 'preserve_none' because it conflicts} } */ +void multi2() [[gnu::preserve_none, gnu::aarch64_vector_pcs]]; /* { dg- warning {ignoring attribute 'aarch64_vector_pcs' because it conflicts} } */ + +void normal_callee(); +void preserve_none_callee() [[gnu::preserve_none]]; +void vector_callee() [[gnu::aarch64_vector_pcs]]; +void sve_callee(__SVBool_t); +void sve_preserve_none_callee(__SVBool_t) [[gnu::preserve_none]]; + +void (*normal_ptr)(); +void (*preserve_none_ptr)() [[gnu::preserve_none]]; +void (*vector_ptr)() [[gnu::aarch64_vector_pcs]]; +void (*sve_ptr)(__SVBool_t); +void (*sve_preserve_none_ptr)(__SVBool_t) [[gnu::preserve_none]]; + +void f() +{ + normal_ptr = normal_callee; + normal_ptr = preserve_none_callee; /* { dg-error {incompatible pointer type} } */ + normal_ptr = vector_callee; /* { dg-error {incompatible pointer type} } */ + normal_ptr = sve_callee; /* { dg-error {incompatible pointer type} } */ + normal_ptr = sve_preserve_none_callee; /* { dg-error {incompatiblepointertype} } */ + + preserve_none_ptr = normal_callee; /* { dg-error {incompatible pointer type} } */ + preserve_none_ptr = preserve_none_callee; + preserve_none_ptr = vector_callee; /* { dg-error {incompatible pointertype}} */ + preserve_none_ptr = sve_callee; /* { dg-error {incompatible pointer type}}*/ + preserve_none_ptr = sve_preserve_none_callee; /* { dg-error{incompatiblepointer type} } */ + + vector_ptr = normal_callee; /* { dg-error {incompatible pointer type} } */ + vector_ptr = preserve_none_callee; /* { dg-error {incompatible pointertype}} */ + vector_ptr = vector_callee; + vector_ptr = sve_callee; /* { dg-error {incompatible pointer type} } */ + vector_ptr = sve_preserve_none_callee; /* { dg-error {incompatiblepointertype} } */ + + sve_ptr = normal_callee; /* { dg-error {incompatible pointer type} } */ + sve_ptr = preserve_none_callee; /* { dg-error {incompatible pointer type}}*/ + sve_ptr = vector_callee; /* { dg-error {incompatible pointer type} } */ + sve_ptr = sve_callee; + sve_ptr = sve_preserve_none_callee; /* { dg-error {incompatible pointer type} } */ + + sve_preserve_none_ptr = normal_callee; /* { dg-error {incompatiblepointertype} } */ + sve_preserve_none_ptr = preserve_none_callee; /* { dg-error{incompatiblepointer type} } */ + sve_preserve_none_ptr = vector_callee; /* { dg-error {incompatiblepointertype} } */ + sve_preserve_none_ptr = sve_callee; /* { dg-error {incompatible pointer type} } */ + sve_preserve_none_ptr = sve_preserve_none_callee; +} diff --git a/gcc/testsuite/gcc.target/aarch64/preserve_none_3.c b/gcc/testsuite/gcc.target/aarch64/preserve_none_3.c new file mode 100644 index 00000000000..7a47190687d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/preserve_none_3.c @@ -0,0 +1,109 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -std=gnu23" } */ + +int no_arg_stack_use_callee [[gnu::preserve_none, gnu::noinline, gnu::noipa]] + (int a0, int a1, int a2, int a3, int a4, int a5, int a6, + int a7, int a8, int a9, int a10, int a11, int a12, + int a13, int a14, int a15, int a16, int a17, int a18, + int a19, int a20, int a21, int a22) { + /* Clobber all the registers to check they are correctly marked live at the + start. */ + asm volatile ("mov x0, #0;" + "mov x1, #0;" + "mov x2, #0;" + "mov x3, #0;" + "mov x4, #0;" + "mov x5, #0;" + "mov x6, #0;" + "mov x7, #0;" + "mov x8, #0;" + "mov x9, #0;" + "mov x10, #0;" + "mov x11, #0;" + "mov x12, #0;" + "mov x13, #0;" + "mov x14, #0;" + "mov x15, #0;" + "mov x16, #0;" + "mov x17, #0;" + "mov x18, #0;" + "mov x19, #0;" + "mov x20, #0;" + "mov x21, #0;" + "mov x22, #0;" + "mov x23, #0;" + "mov x24, #0;" + "mov x25, #0;" + "mov x26, #0;" + "mov x27, #0;" + "mov x28, #0;" + ::: "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", + "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", + "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", + "x24", "x25", "x26", "x27", "x28"); + + return a0 + a1 + a2 + a3 + a4 + a5 + a6 + a7 + a8 + a9 + a10 + a11 + a12 +a13+ + a14 + a15 + a16 + a17 + a18 + a19 + a20 + a21 + a22; +} + +int arg_stack_use_callee [[gnu::preserve_none, gnu::noinline,gnu::noipa]]+ (int a0, int a1, int a2, int a3, int a4, int a5, int a6, + int a7, int a8, int a9, int a10, int a11, int a12, + int a13, int a14, int a15, int a16, int a17, int a18, + int a19, int a20, int a21, int a22, int a23) { + /* Clobber all the registers to check they are correctly marked live at the + start. */ + asm volatile ("mov x0, #0;" + "mov x1, #0;" + "mov x2, #0;" + "mov x3, #0;" + "mov x4, #0;" + "mov x5, #0;" + "mov x6, #0;" + "mov x7, #0;" + "mov x8, #0;" + "mov x9, #0;" + "mov x10, #0;" + "mov x11, #0;" + "mov x12, #0;" + "mov x13, #0;" + "mov x14, #0;" + "mov x15, #0;" + "mov x16, #0;" + "mov x17, #0;" + "mov x18, #0;" + "mov x19, #0;" + "mov x20, #0;" + "mov x21, #0;" + "mov x22, #0;" + "mov x23, #0;" + "mov x24, #0;" + "mov x25, #0;" + "mov x26, #0;" + "mov x27, #0;" + "mov x28, #0;" + ::: "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", + "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", + "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", + "x24", "x25", "x26", "x27", "x28"); + + return a0 + a1 + a2 + a3 + a4 + a5 + a6 + a7 + a8 + a9 + a10 + a11 + a12 +a13+ + a14 + a15 + a16 + a17 + a18 + a19 + a20 + a21 + a22 + a23; +} + +int main () { + + int res = no_arg_stack_use_callee (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,13,14, + 15, 16, 17, 18, 19, 20, 21, 22); + + if (res != 22 * 23 / 2) + return 1; + + res = arg_stack_use_callee(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23); + + if (res != 23 * 24 / 2) + return 1; + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/preserve_none_4.c b/gcc/testsuite/gcc.target/aarch64/preserve_none_4.c new file mode 100644 index 00000000000..22338c96711 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/preserve_none_4.c @@ -0,0 +1,93 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +int no_arg_stack_use_callee [[gnu::preserve_none, gnu::noinline, gnu::noipa]] + (int a0, int a1, int a2, int a3, int a4, int a5, int a6, + int a7, int a8, int a9, int a10, int a11, int a12, + int a13, int a14, int a15, int a16, int a17, int a18, + int a19, int a20, int a21, int a22); + +/* Check the pcs argument order is correct. Should be x20-28, x0-7, x10-14,x9, and that the return arg is x0 */ + +/* +** no_arg_stack_use_caller: +** ... +** mov w9, 22 +** mov w14, 21 +** mov w13, 20 +** mov w12, 19 +** mov w11, 18 +** mov w10, 17 +** mov w7, 16 +** mov w6, 15 +** mov w5, 14 +** mov w4, 13 +** mov w3, 12 +** mov w2, 11 +** mov w1, 10 +** mov w0, 9 +** mov w28, 8 +** mov w27, 7 +** mov w26, 6 +** mov w25, 5 +** mov w24, 4 +** mov w23, 3 +** mov w22, 2 +** mov w21, 1 +** mov w20, 0 +** bl no_arg_stack_use_callee +** add w0, w0, 1 +** ... +*/ +int no_arg_stack_use_caller [[gnu::preserve_none]] () +{ + return no_arg_stack_use_callee (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, + 14, 15, 16, 17, 18, 19, 20, 21, 22) + + 1; +} + +int arg_stack_use_callee [[gnu::preserve_none, gnu::noinline,gnu::noipa]]+ (int a0, int a1, int a2, int a3, int a4, int a5, int a6, + int a7, int a8, int a9, int a10, int a11, int a12, + int a13, int a14, int a15, int a16, int a17, int a18, + int a19, int a20, int a21, int a22, int a23); + +/* +** arg_stack_use_caller: +** ... +** mov w0, 23 +** mov w9, 22 +** mov w14, 21 +** mov w13, 20 +** mov w12, 19 +** mov w11, 18 +** mov w10, 17 +** mov w7, 16 +** mov w6, 15 +** mov w5, 14 +** mov w4, 13 +** mov w3, 12 +** mov w2, 11 +** mov w1, 10 +** mov w28, 8 +** mov w27, 7 +** mov w26, 6 +** mov w25, 5 +** mov w24, 4 +** mov w23, 3 +** mov w22, 2 +** mov w21, 1 +** mov w20, 0 +** str w0, \[sp\] +** mov w0, 9 +** bl arg_stack_use_callee +** add w0, w0, 1 +** ... +*/ +int arg_stack_use_caller [[gnu::preserve_none]] () +{ + return arg_stack_use_callee (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23) + + 1; +} diff --git a/gcc/testsuite/gcc.target/aarch64/preserve_none_5.c b/gcc/testsuite/gcc.target/aarch64/preserve_none_5.c new file mode 100644 index 00000000000..87b22646fb1 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/preserve_none_5.c @@ -0,0 +1,45 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +#include <stdarg.h> +int foo [[gnu::preserve_none]] (...); + +/* Check the pcs argument order is correct. Should be x20-28, x0-7, x10-14,x9, and that the return arg is x0 */ + +/* +** bar: +** ... +** mov w9, 22 +** mov w14, 21 +** mov w13, 20 +** mov w12, 19 +** mov w11, 18 +** mov w10, 17 +** mov w7, 16 +** mov w6, 15 +** mov w5, 14 +** mov w4, 13 +** mov w3, 12 +** mov w2, 11 +** mov w1, 10 +** mov w0, 9 +** mov w28, 8 +** mov w27, 7 +** mov w26, 6 +** mov w25, 5 +** mov w24, 4 +** mov w23, 3 +** mov w22, 2 +** mov w21, 1 +** mov w20, 0 +** bl foo +** add w0, w0, 1 +** ... +*/ +int bar [[gnu::preserve_none]] () +{ + return foo (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22) + + 1; +} diff --git a/gcc/testsuite/gcc.target/aarch64/preserve_none_6.c b/gcc/testsuite/gcc.target/aarch64/preserve_none_6.c new file mode 100644 index 00000000000..e576df40e77 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/preserve_none_6.c @@ -0,0 +1,66 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -std=gnu23" } */ + +#include <stdarg.h> +#include <stdio.h> + +int preserve_none_va_func [[gnu::preserve_none, gnu::noinline, gnu::noclone]] (int count, ...) { + asm volatile ("mov x0, #0;" + "mov x1, #0;" + "mov x2, #0;" + "mov x3, #0;" + "mov x4, #0;" + "mov x5, #0;" + "mov x6, #0;" + "mov x7, #0;" + "mov x8, #0;" + "mov x9, #0;" + "mov x10, #0;" + "mov x11, #0;" + "mov x12, #0;" + "mov x13, #0;" + "mov x14, #0;" + "mov x15, #0;" + "mov x16, #0;" + "mov x17, #0;" + "mov x18, #0;" + "mov x19, #0;" + "mov x20, #0;" + "mov x21, #0;" + "mov x22, #0;" + "mov x23, #0;" + "mov x24, #0;" + "mov x25, #0;" + "mov x26, #0;" + "mov x27, #0;" + "mov x28, #0;" + ::: "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", + "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", + "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", + "x24", "x25", "x26", "x27", "x28"); + + int sum = 0; + + va_list args; + + va_start (args, count); + for (int i = 0; i < count; i++) + sum += va_arg(args, int); + va_end (args); + + return sum; +} + +int main () { + int res = preserve_none_va_func (22, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,12,13, + 14, 15, 16, 17, 18, 19, 20, 21); + if (res != 22 * 21 / 2) + return 1; + + res = preserve_none_va_func (23, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,13,+ 14, 15, 16, 17, 18, 19, 20, 21, 22); + if (res != 23 * 22 / 2) + return 1; + + return 0; +} -- 2.34.1-- Alfie Richards
