[This follows on from: https://gcc.gnu.org/ml/gcc-patches/2019-09/msg00778.html https://gcc.gnu.org/ml/gcc-patches/2019-09/msg01456.html]
At the moment we rely on SYMBOL_REF_DECL to get the ABI of the callee of a call insn, falling back to the default ABI if the decl isn't available. I think it'd be cleaner to attach the ABI directly to the call instruction instead, which would also have the very minor benefit of handling indirect calls more efficiently. Tested on aarch64-linux-gnu. OK to install? Richard 2019-09-25 Richard Sandiford <richard.sandif...@arm.com> gcc/ * config/aarch64/aarch64-protos.h (aarch64_expand_call): Take an extra callee_abi argument. * config/aarch64/aarch64.c (aarch64_expand_call): Likewise. Insert a CALLEE_ABI unspec into the call pattern as the second element in the PARALLEL. (aarch64_simd_call_p): Delete. (aarch64_insn_callee_abi): Get the arm_pcs of the callee from the new CALLEE_ABI element of the PARALLEL. (aarch64_init_cumulative_args): Get the arm_pcs of the callee from the function type, if given. (aarch64_function_arg_advance): Handle ARM_PCS_SIMD. (aarch64_function_arg): Likewise. Return the arm_pcs of the callee when passed the function_arg_info end marker. (aarch64_output_mi_thunk): Pass the arm_pcs of the callee as the final argument of gen_sibcall. * config/aarch64/aarch64.md (UNSPEC_CALLEE_ABI): New unspec. (call): Make operand 2 a const_int_operand and pass it to expand_call. Wrap it in an UNSPEC_CALLEE_ABI unspec for the dummy define_expand pattern. (call_value): Likewise operand 3. (sibcall): Likewise operand 2. Place the unspec before rather than after the return. (sibcall_value): Likewise operand 3. (*call_insn, *call_value_insn): Include an UNSPEC_CALLEE_ABI. (tlsgd_small_<mode>, *tlsgd_small_<mode>): Likewise. (*sibcall_insn, *sibcall_value_insn): Likewise. Remove empty constraint strings. (untyped_call): Pass const0_rtx as the callee ABI to gen_call. gcc/testsuite/ * gcc.target/aarch64/torture/simd-abi-10.c: New test. * gcc.target/aarch64/torture/simd-abi-11.c: Likewise. Index: gcc/config/aarch64/aarch64-protos.h =================================================================== --- gcc/config/aarch64/aarch64-protos.h 2019-09-25 17:23:36.770504785 +0100 +++ gcc/config/aarch64/aarch64-protos.h 2019-09-25 17:31:04.663257639 +0100 @@ -452,7 +452,7 @@ bool aarch64_const_vec_all_same_in_range bool aarch64_constant_address_p (rtx); bool aarch64_emit_approx_div (rtx, rtx, rtx); bool aarch64_emit_approx_sqrt (rtx, rtx, bool); -void aarch64_expand_call (rtx, rtx, bool); +void aarch64_expand_call (rtx, rtx, rtx, bool); bool aarch64_expand_cpymem (rtx *); bool aarch64_float_const_zero_rtx_p (rtx); bool aarch64_float_const_rtx_p (rtx); Index: gcc/config/aarch64/aarch64.c =================================================================== --- gcc/config/aarch64/aarch64.c 2019-09-25 17:31:04.000000000 +0100 +++ gcc/config/aarch64/aarch64.c 2019-09-25 17:31:04.667257609 +0100 @@ -1872,37 +1872,17 @@ aarch64_reg_save_mode (tree fndecl, unsi : (aarch64_simd_decl_p (fndecl) ? E_TFmode : E_DFmode); } -/* Return true if the instruction is a call to a SIMD function, false - if it is not a SIMD function or if we do not know anything about - the function. */ - -static bool -aarch64_simd_call_p (const rtx_insn *insn) -{ - rtx symbol; - rtx call; - tree fndecl; - - gcc_assert (CALL_P (insn)); - call = get_call_rtx_from (insn); - symbol = XEXP (XEXP (call, 0), 0); - if (GET_CODE (symbol) != SYMBOL_REF) - return false; - fndecl = SYMBOL_REF_DECL (symbol); - if (!fndecl) - return false; - - return aarch64_simd_decl_p (fndecl); -} - /* Implement TARGET_INSN_CALLEE_ABI. */ const predefined_function_abi & aarch64_insn_callee_abi (const rtx_insn *insn) { - if (aarch64_simd_call_p (insn)) - return aarch64_simd_abi (); - return default_function_abi; + rtx pat = PATTERN (insn); + gcc_assert (GET_CODE (pat) == PARALLEL); + rtx unspec = XVECEXP (pat, 0, 1); + gcc_assert (GET_CODE (unspec) == UNSPEC + && XINT (unspec, 1) == UNSPEC_CALLEE_ABI); + return function_abis[INTVAL (XVECEXP (unspec, 0, 0))]; } /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. The callee only saves @@ -4847,10 +4827,11 @@ aarch64_layout_arg (cumulative_args_t pc aarch64_function_arg (cumulative_args_t pcum_v, const function_arg_info &arg) { CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v); - gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64); + gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64 + || pcum->pcs_variant == ARM_PCS_SIMD); if (arg.end_marker_p ()) - return NULL_RTX; + return gen_int_mode (pcum->pcs_variant, DImode); aarch64_layout_arg (pcum_v, arg.mode, arg.type, arg.named); return pcum->aapcs_reg; @@ -4858,16 +4839,19 @@ aarch64_function_arg (cumulative_args_t void aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum, - const_tree fntype ATTRIBUTE_UNUSED, - rtx libname ATTRIBUTE_UNUSED, - const_tree fndecl ATTRIBUTE_UNUSED, - unsigned n_named ATTRIBUTE_UNUSED) + const_tree fntype, + rtx libname ATTRIBUTE_UNUSED, + const_tree fndecl ATTRIBUTE_UNUSED, + unsigned n_named ATTRIBUTE_UNUSED) { pcum->aapcs_ncrn = 0; pcum->aapcs_nvrn = 0; pcum->aapcs_nextncrn = 0; pcum->aapcs_nextnvrn = 0; - pcum->pcs_variant = ARM_PCS_AAPCS64; + if (fntype) + pcum->pcs_variant = (arm_pcs) fntype_abi (fntype).id (); + else + pcum->pcs_variant = ARM_PCS_AAPCS64; pcum->aapcs_reg = NULL_RTX; pcum->aapcs_arg_processed = false; pcum->aapcs_stack_words = 0; @@ -4892,7 +4876,8 @@ aarch64_function_arg_advance (cumulative const function_arg_info &arg) { CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v); - if (pcum->pcs_variant == ARM_PCS_AAPCS64) + if (pcum->pcs_variant == ARM_PCS_AAPCS64 + || pcum->pcs_variant == ARM_PCS_SIMD) { aarch64_layout_arg (pcum_v, arg.mode, arg.type, arg.named); gcc_assert ((pcum->aapcs_reg != NULL_RTX) @@ -6921,7 +6906,8 @@ aarch64_output_mi_thunk (FILE *file, tre } funexp = XEXP (DECL_RTL (function), 0); funexp = gen_rtx_MEM (FUNCTION_MODE, funexp); - insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX)); + rtx callee_abi = gen_int_mode (fndecl_abi (function).id (), DImode); + insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, callee_abi)); SIBLING_CALL_P (insn) = 1; insn = get_insns (); @@ -7999,11 +7985,12 @@ aarch64_fixed_condition_code_regs (unsig RESULT is the register in which the result is returned. It's NULL for "call" and "sibcall". MEM is the location of the function call. + CALLEE_ABI is a const_int that gives the arm_pcs of the callee. SIBCALL indicates whether this function call is normal call or sibling call. It will generate different pattern accordingly. */ void -aarch64_expand_call (rtx result, rtx mem, bool sibcall) +aarch64_expand_call (rtx result, rtx mem, rtx callee_abi, bool sibcall) { rtx call, callee, tmp; rtvec vec; @@ -8033,7 +8020,11 @@ aarch64_expand_call (rtx result, rtx mem else tmp = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNUM)); - vec = gen_rtvec (2, call, tmp); + gcc_assert (CONST_INT_P (callee_abi)); + callee_abi = gen_rtx_UNSPEC (DImode, gen_rtvec (1, callee_abi), + UNSPEC_CALLEE_ABI); + + vec = gen_rtvec (3, call, callee_abi, tmp); call = gen_rtx_PARALLEL (VOIDmode, vec); aarch64_emit_call_insn (call); Index: gcc/config/aarch64/aarch64.md =================================================================== --- gcc/config/aarch64/aarch64.md 2019-09-25 17:23:36.774504754 +0100 +++ gcc/config/aarch64/aarch64.md 2019-09-25 17:31:04.667257609 +0100 @@ -130,6 +130,7 @@ (define_c_enum "unspec" [ UNSPEC_AUTIB1716 UNSPEC_AUTIASP UNSPEC_AUTIBSP + UNSPEC_CALLEE_ABI UNSPEC_CASESI UNSPEC_CRC32B UNSPEC_CRC32CB @@ -913,14 +914,15 @@ (define_insn "*cb<optab><mode>1" ;; ------------------------------------------------------------------- (define_expand "call" - [(parallel [(call (match_operand 0 "memory_operand") - (match_operand 1 "general_operand")) - (use (match_operand 2 "" "")) - (clobber (reg:DI LR_REGNUM))])] + [(parallel + [(call (match_operand 0 "memory_operand") + (match_operand 1 "general_operand")) + (unspec:DI [(match_operand 2 "const_int_operand")] UNSPEC_CALLEE_ABI) + (clobber (reg:DI LR_REGNUM))])] "" " { - aarch64_expand_call (NULL_RTX, operands[0], false); + aarch64_expand_call (NULL_RTX, operands[0], operands[2], false); DONE; }" ) @@ -928,6 +930,7 @@ (define_expand "call" (define_insn "*call_insn" [(call (mem:DI (match_operand:DI 0 "aarch64_call_insn_operand" "r, Usf")) (match_operand 1 "" "")) + (unspec:DI [(match_operand:DI 2 "const_int_operand")] UNSPEC_CALLEE_ABI) (clobber (reg:DI LR_REGNUM))] "" "@ @@ -937,15 +940,16 @@ (define_insn "*call_insn" ) (define_expand "call_value" - [(parallel [(set (match_operand 0 "" "") - (call (match_operand 1 "memory_operand") - (match_operand 2 "general_operand"))) - (use (match_operand 3 "" "")) - (clobber (reg:DI LR_REGNUM))])] + [(parallel + [(set (match_operand 0 "") + (call (match_operand 1 "memory_operand") + (match_operand 2 "general_operand"))) + (unspec:DI [(match_operand 3 "const_int_operand")] UNSPEC_CALLEE_ABI) + (clobber (reg:DI LR_REGNUM))])] "" " { - aarch64_expand_call (operands[0], operands[1], false); + aarch64_expand_call (operands[0], operands[1], operands[3], false); DONE; }" ) @@ -954,6 +958,7 @@ (define_insn "*call_value_insn" [(set (match_operand 0 "" "") (call (mem:DI (match_operand:DI 1 "aarch64_call_insn_operand" "r, Usf")) (match_operand 2 "" ""))) + (unspec:DI [(match_operand:DI 3 "const_int_operand")] UNSPEC_CALLEE_ABI) (clobber (reg:DI LR_REGNUM))] "" "@ @@ -963,33 +968,36 @@ (define_insn "*call_value_insn" ) (define_expand "sibcall" - [(parallel [(call (match_operand 0 "memory_operand") - (match_operand 1 "general_operand")) - (return) - (use (match_operand 2 "" ""))])] + [(parallel + [(call (match_operand 0 "memory_operand") + (match_operand 1 "general_operand")) + (unspec:DI [(match_operand 2 "const_int_operand")] UNSPEC_CALLEE_ABI) + (return)])] "" { - aarch64_expand_call (NULL_RTX, operands[0], true); + aarch64_expand_call (NULL_RTX, operands[0], operands[2], true); DONE; } ) (define_expand "sibcall_value" - [(parallel [(set (match_operand 0 "" "") - (call (match_operand 1 "memory_operand") - (match_operand 2 "general_operand"))) - (return) - (use (match_operand 3 "" ""))])] + [(parallel + [(set (match_operand 0 "") + (call (match_operand 1 "memory_operand") + (match_operand 2 "general_operand"))) + (unspec:DI [(match_operand 3 "const_int_operand")] UNSPEC_CALLEE_ABI) + (return)])] "" { - aarch64_expand_call (operands[0], operands[1], true); + aarch64_expand_call (operands[0], operands[1], operands[3], true); DONE; } ) (define_insn "*sibcall_insn" [(call (mem:DI (match_operand:DI 0 "aarch64_call_insn_operand" "Ucs, Usf")) - (match_operand 1 "" "")) + (match_operand 1 "")) + (unspec:DI [(match_operand:DI 2 "const_int_operand")] UNSPEC_CALLEE_ABI) (return)] "SIBLING_CALL_P (insn)" "@ @@ -999,10 +1007,11 @@ (define_insn "*sibcall_insn" ) (define_insn "*sibcall_value_insn" - [(set (match_operand 0 "" "") + [(set (match_operand 0 "") (call (mem:DI (match_operand:DI 1 "aarch64_call_insn_operand" "Ucs, Usf")) - (match_operand 2 "" ""))) + (match_operand 2 ""))) + (unspec:DI [(match_operand:DI 3 "const_int_operand")] UNSPEC_CALLEE_ABI) (return)] "SIBLING_CALL_P (insn)" "@ @@ -1022,7 +1031,9 @@ (define_expand "untyped_call" { int i; - emit_call_insn (gen_call (operands[0], const0_rtx, NULL)); + /* Untyped calls always use the default ABI. It's only possible to use + ABI variants if we know the type of the target function. */ + emit_call_insn (gen_call (operands[0], const0_rtx, const0_rtx)); for (i = 0; i < XVECLEN (operands[2], 0); i++) { @@ -6682,6 +6693,7 @@ (define_insn "aarch64_load_tp_hard" (define_expand "tlsgd_small_<mode>" [(parallel [(set (match_operand 0 "register_operand") (call (mem:DI (match_dup 2)) (const_int 1))) + (unspec:DI [(const_int 0)] UNSPEC_CALLEE_ABI) (unspec:DI [(match_operand:PTR 1 "aarch64_valid_symref")] UNSPEC_GOTSMALLTLS) (clobber (reg:DI LR_REGNUM))])] "" @@ -6692,6 +6704,7 @@ (define_expand "tlsgd_small_<mode>" (define_insn "*tlsgd_small_<mode>" [(set (match_operand 0 "register_operand" "") (call (mem:DI (match_operand:DI 2 "" "")) (const_int 1))) + (unspec:DI [(const_int 0)] UNSPEC_CALLEE_ABI) (unspec:DI [(match_operand:PTR 1 "aarch64_valid_symref" "S")] UNSPEC_GOTSMALLTLS) (clobber (reg:DI LR_REGNUM)) ] Index: gcc/testsuite/gcc.target/aarch64/torture/simd-abi-10.c =================================================================== --- /dev/null 2019-09-17 11:41:18.176664108 +0100 +++ gcc/testsuite/gcc.target/aarch64/torture/simd-abi-10.c 2019-09-25 17:31:04.667257609 +0100 @@ -0,0 +1,14 @@ +/* { dg-do compile } */ + +int __attribute__((aarch64_vector_pcs)) (*callee) (void); + +int __attribute__ ((aarch64_vector_pcs)) +caller (int *x) +{ + return callee () + 1; +} + +/* { dg-final { scan-assembler-not {\tstp\tq} } } */ +/* { dg-final { scan-assembler-not {\tldp\tq} } } */ +/* { dg-final { scan-assembler-not {\tstr\tq} } } */ +/* { dg-final { scan-assembler-not {\tldr\tq} } } */ Index: gcc/testsuite/gcc.target/aarch64/torture/simd-abi-11.c =================================================================== --- /dev/null 2019-09-17 11:41:18.176664108 +0100 +++ gcc/testsuite/gcc.target/aarch64/torture/simd-abi-11.c 2019-09-25 17:31:04.671257581 +0100 @@ -0,0 +1,26 @@ +/* { dg-do compile } */ + +int (*callee) (void); + +int __attribute__ ((aarch64_vector_pcs)) +caller (int *x) +{ + return callee () + 1; +} + +/* { dg-final { scan-assembler {\sstp\tq8, q9} } } */ +/* { dg-final { scan-assembler {\sstp\tq10, q11} } } */ +/* { dg-final { scan-assembler {\sstp\tq12, q13} } } */ +/* { dg-final { scan-assembler {\sstp\tq14, q15} } } */ +/* { dg-final { scan-assembler {\sstp\tq16, q17} } } */ +/* { dg-final { scan-assembler {\sstp\tq18, q19} } } */ +/* { dg-final { scan-assembler {\sstp\tq20, q21} } } */ +/* { dg-final { scan-assembler {\sstp\tq22, q23} } } */ +/* { dg-final { scan-assembler {\sldp\tq8, q9} } } */ +/* { dg-final { scan-assembler {\sldp\tq10, q11} } } */ +/* { dg-final { scan-assembler {\sldp\tq12, q13} } } */ +/* { dg-final { scan-assembler {\sldp\tq14, q15} } } */ +/* { dg-final { scan-assembler {\sldp\tq16, q17} } } */ +/* { dg-final { scan-assembler {\sldp\tq18, q19} } } */ +/* { dg-final { scan-assembler {\sldp\tq20, q21} } } */ +/* { dg-final { scan-assembler {\sldp\tq22, q23} } } */