We only support tail calls between functions with the same PSTATE.ZA setting ("private-ZA" to "private-ZA" and "shared-ZA" to "shared-ZA").
Only a normal non-streaming function can tail-call another non-streaming function, and only a streaming function can tail-call another streaming function. Any function can tail-call a streaming-compatible function. gcc/ * config/aarch64/aarch64.cc (aarch64_function_ok_for_sibcall): Enforce PSTATE.SM and PSTATE.ZA restrictions. (aarch64_expand_epilogue): Save and restore the arguments to a sibcall around any change to PSTATE.SM. gcc/testsuite/ * gcc.target/aarch64/sme/locally_streaming_4.c: New test. * gcc.target/aarch64/sme/sibcall_1.c: Likewise. * gcc.target/aarch64/sme/sibcall_2.c: Likewise. * gcc.target/aarch64/sme/sibcall_3.c: Likewise. * gcc.target/aarch64/sme/sibcall_4.c: Likewise. * gcc.target/aarch64/sme/sibcall_5.c: Likewise. * gcc.target/aarch64/sme/sibcall_6.c: Likewise. * gcc.target/aarch64/sme/sibcall_7.c: Likewise. * gcc.target/aarch64/sme/sibcall_8.c: Likewise. --- gcc/config/aarch64/aarch64.cc | 9 +- .../aarch64/sme/locally_streaming_4.c | 129 ++++++++++++++++++ .../gcc.target/aarch64/sme/sibcall_1.c | 45 ++++++ .../gcc.target/aarch64/sme/sibcall_2.c | 45 ++++++ .../gcc.target/aarch64/sme/sibcall_3.c | 45 ++++++ .../gcc.target/aarch64/sme/sibcall_4.c | 45 ++++++ .../gcc.target/aarch64/sme/sibcall_5.c | 45 ++++++ .../gcc.target/aarch64/sme/sibcall_6.c | 26 ++++ .../gcc.target/aarch64/sme/sibcall_7.c | 26 ++++ .../gcc.target/aarch64/sme/sibcall_8.c | 19 +++ 10 files changed, 433 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_4.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/sibcall_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/sibcall_2.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/sibcall_3.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/sibcall_4.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/sibcall_5.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/sibcall_6.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/sibcall_7.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/sibcall_8.c diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index 9a4a469a078..0d4c20f5c6a 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -8110,6 +8110,11 @@ aarch64_function_ok_for_sibcall (tree, tree exp) if (crtl->abi->id () != expr_callee_abi (exp).id ()) return false; + tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp))); + if (aarch64_fntype_sm_state (fntype) & ~aarch64_cfun_incoming_sm_state ()) + return false; + if (aarch64_fntype_za_state (fntype) != aarch64_cfun_incoming_za_state ()) + return false; return true; } @@ -11236,7 +11241,9 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall) guard_label = aarch64_guard_switch_pstate_sm (IP0_REGNUM, aarch64_isa_flags); aarch64_sme_mode_switch_regs args_switch; - if (crtl->return_rtx && REG_P (crtl->return_rtx)) + if (sibcall) + args_switch.add_call_args (sibcall); + else if (crtl->return_rtx && REG_P (crtl->return_rtx)) args_switch.add_reg (GET_MODE (crtl->return_rtx), REGNO (crtl->return_rtx)); args_switch.emit_prologue (); diff --git a/gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_4.c b/gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_4.c new file mode 100644 index 00000000000..b0e4759ed11 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_4.c @@ -0,0 +1,129 @@ +// { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls" } +/* { dg-final { check-function-bodies "**" "" } } */ + +#include <arm_neon.h> +#include <arm_sve.h> + +/* +** test_d0: +** ... +** fmov x10, d0 +** smstop sm +** fmov d0, x10 +** ... +*/ +void consume_d0 (double d0); + +void __attribute__((arm_locally_streaming)) +test_d0 () +{ + consume_d0 (1.0); +} + +/* +** test_d7: +** ... +** fmov x10, d0 +** fmov x11, d1 +** fmov x12, d2 +** fmov x13, d3 +** fmov x14, d4 +** fmov x15, d5 +** fmov x16, d6 +** fmov x17, d7 +** smstop sm +** fmov d0, x10 +** fmov d1, x11 +** fmov d2, x12 +** fmov d3, x13 +** fmov d4, x14 +** fmov d5, x15 +** fmov d6, x16 +** fmov d7, x17 +** ... +*/ +void consume_d7 (double d0, double d1, double d2, double d3, + double d4, double d5, double d6, double d7); +void __attribute__((arm_locally_streaming)) +test_d7 () +{ + consume_d7 (1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); +} + +/* +** test_q7: +** ... +** stp q0, q1, \[sp, #?-128\]! +** stp q2, q3, \[sp, #?32\] +** stp q4, q5, \[sp, #?64\] +** stp q6, q7, \[sp, #?96\] +** smstop sm +** ldp q2, q3, \[sp, #?32\] +** ldp q4, q5, \[sp, #?64\] +** ldp q6, q7, \[sp, #?96\] +** ldp q0, q1, \[sp\], #?128 +** ... +*/ +void consume_q7 (int8x16x4_t q0, int8x16x4_t q4); + +void __attribute__((arm_locally_streaming)) +test_q7 (int8x16x4_t *ptr) +{ + consume_q7 (ptr[0], ptr[1]); +} + +/* +** test_z7: +** ... +** addvl sp, sp, #-8 +** str z0, \[sp\] +** str z1, \[sp, #1, mul vl\] +** str z2, \[sp, #2, mul vl\] +** str z3, \[sp, #3, mul vl\] +** str z4, \[sp, #4, mul vl\] +** str z5, \[sp, #5, mul vl\] +** str z6, \[sp, #6, mul vl\] +** str z7, \[sp, #7, mul vl\] +** smstop sm +** ldr z0, \[sp\] +** ldr z1, \[sp, #1, mul vl\] +** ldr z2, \[sp, #2, mul vl\] +** ldr z3, \[sp, #3, mul vl\] +** ldr z4, \[sp, #4, mul vl\] +** ldr z5, \[sp, #5, mul vl\] +** ldr z6, \[sp, #6, mul vl\] +** ldr z7, \[sp, #7, mul vl\] +** addvl sp, sp, #8 +** ... +*/ +void consume_z7 (svint8x4_t z0, svint8x4_t z4); + +void __attribute__((arm_locally_streaming)) +test_z7 (svint8x4_t *ptr1, svint8x4_t *ptr2) +{ + consume_z7 (*ptr1, *ptr2); +} + +/* +** test_p3: +** ... +** addvl sp, sp, #-1 +** str p0, \[sp\] +** str p1, \[sp, #1, mul vl\] +** str p2, \[sp, #2, mul vl\] +** str p3, \[sp, #3, mul vl\] +** smstop sm +** ldr p0, \[sp\] +** ldr p1, \[sp, #1, mul vl\] +** ldr p2, \[sp, #2, mul vl\] +** ldr p3, \[sp, #3, mul vl\] +** addvl sp, sp, #1 +** ... +*/ +void consume_p3 (svbool_t p0, svbool_t p1, svbool_t p2, svbool_t p3); + +void __attribute__((arm_locally_streaming)) +test_p3 (svbool_t *ptr1, svbool_t *ptr2, svbool_t *ptr3, svbool_t *ptr4) +{ + consume_p3 (*ptr1, *ptr2, *ptr3, *ptr4); +} diff --git a/gcc/testsuite/gcc.target/aarch64/sme/sibcall_1.c b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_1.c new file mode 100644 index 00000000000..0b0f4191a60 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_1.c @@ -0,0 +1,45 @@ +/* { dg-options "-O2" } */ + +void __attribute__((arm_streaming_compatible)) sc_callee (); +void __attribute__((arm_streaming)) s_callee (); +void n_callee (); + +void __attribute__((noipa, arm_streaming_compatible, arm_locally_streaming)) +sc_ls_callee () {} +void __attribute__((noipa, arm_locally_streaming)) +n_ls_callee () {} + +void __attribute__((arm_streaming_compatible)) +sc_to_sc () +{ + sc_callee (); +} +/* { dg-final { scan-assembler {\tb\tsc_callee} } } */ + +void __attribute__((arm_streaming_compatible)) +sc_to_s () +{ + s_callee (); +} +/* { dg-final { scan-assembler {\tbl\ts_callee} } } */ + +void __attribute__((arm_streaming_compatible)) +sc_to_n () +{ + n_callee (); +} +/* { dg-final { scan-assembler {\tbl\tn_callee} } } */ + +void __attribute__((arm_streaming_compatible)) +sc_to_sc_ls () +{ + sc_ls_callee (); +} +/* { dg-final { scan-assembler {\tb\tsc_ls_callee} } } */ + +void __attribute__((arm_streaming_compatible)) +sc_to_n_ls () +{ + n_ls_callee (); +} +/* { dg-final { scan-assembler {\tbl\tn_ls_callee} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sme/sibcall_2.c b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_2.c new file mode 100644 index 00000000000..95af22dd29d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_2.c @@ -0,0 +1,45 @@ +/* { dg-options "-O2" } */ + +void __attribute__((arm_streaming_compatible)) sc_callee (); +void __attribute__((arm_streaming)) s_callee (); +void n_callee (); + +void __attribute__((noipa, arm_streaming_compatible, arm_locally_streaming)) +sc_ls_callee () {} +void __attribute__((noipa, arm_locally_streaming)) +n_ls_callee () {} + +void __attribute__((arm_streaming)) +s_to_sc () +{ + sc_callee (); +} +/* { dg-final { scan-assembler {\tb\tsc_callee} } } */ + +void __attribute__((arm_streaming)) +s_to_s () +{ + s_callee (); +} +/* { dg-final { scan-assembler {\tb\ts_callee} } } */ + +void __attribute__((arm_streaming)) +s_to_n () +{ + n_callee (); +} +/* { dg-final { scan-assembler {\tbl\tn_callee} } } */ + +void __attribute__((arm_streaming)) +s_to_sc_ls () +{ + sc_ls_callee (); +} +/* { dg-final { scan-assembler {\tb\tsc_ls_callee} } } */ + +void __attribute__((arm_streaming)) +s_to_n_ls () +{ + n_ls_callee (); +} +/* { dg-final { scan-assembler {\tbl\tn_ls_callee} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sme/sibcall_3.c b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_3.c new file mode 100644 index 00000000000..5221f925567 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_3.c @@ -0,0 +1,45 @@ +/* { dg-options "-O2" } */ + +void __attribute__((arm_streaming_compatible)) sc_callee (); +void __attribute__((arm_streaming)) s_callee (); +void n_callee (); + +void __attribute__((noipa, arm_streaming_compatible, arm_locally_streaming)) +sc_ls_callee () {} +void __attribute__((noipa, arm_locally_streaming)) +n_ls_callee () {} + +void +n_to_sc () +{ + sc_callee (); +} +/* { dg-final { scan-assembler {\tb\tsc_callee} } } */ + +void +n_to_s () +{ + s_callee (); +} +/* { dg-final { scan-assembler {\tbl\ts_callee} } } */ + +void +n_to_n () +{ + n_callee (); +} +/* { dg-final { scan-assembler {\tb\tn_callee} } } */ + +void +n_to_sc_ls () +{ + sc_ls_callee (); +} +/* { dg-final { scan-assembler {\tb\tsc_ls_callee} } } */ + +void +n_to_n_ls () +{ + n_ls_callee (); +} +/* { dg-final { scan-assembler {\tb\tn_ls_callee} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sme/sibcall_4.c b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_4.c new file mode 100644 index 00000000000..21b6a66a1b2 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_4.c @@ -0,0 +1,45 @@ +/* { dg-options "-O2" } */ + +void __attribute__((arm_streaming_compatible)) sc_callee (); +void __attribute__((arm_streaming)) s_callee (); +void n_callee (); + +void __attribute__((noipa, arm_streaming_compatible, arm_locally_streaming)) +sc_ls_callee () {} +void __attribute__((noipa, arm_locally_streaming)) +n_ls_callee () {} + +void __attribute__((arm_streaming_compatible, arm_locally_streaming)) +sc_to_sc () +{ + sc_callee (); +} +/* { dg-final { scan-assembler {\tb\tsc_callee} } } */ + +void __attribute__((arm_streaming_compatible, arm_locally_streaming)) +sc_to_s () +{ + s_callee (); +} +/* { dg-final { scan-assembler {\tbl\ts_callee} } } */ + +void __attribute__((arm_streaming_compatible, arm_locally_streaming)) +sc_to_n () +{ + n_callee (); +} +/* { dg-final { scan-assembler {\tbl\tn_callee} } } */ + +void __attribute__((arm_streaming_compatible, arm_locally_streaming)) +sc_to_sc_ls () +{ + sc_ls_callee (); +} +/* { dg-final { scan-assembler {\tb\tsc_ls_callee} } } */ + +void __attribute__((arm_streaming_compatible, arm_locally_streaming)) +sc_to_n_ls () +{ + n_ls_callee (); +} +/* { dg-final { scan-assembler {\tbl\tn_ls_callee} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sme/sibcall_5.c b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_5.c new file mode 100644 index 00000000000..736797a476c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_5.c @@ -0,0 +1,45 @@ +/* { dg-options "-O2" } */ + +void __attribute__((arm_streaming_compatible)) sc_callee (); +void __attribute__((arm_streaming)) s_callee (); +void n_callee (); + +void __attribute__((noipa, arm_streaming_compatible, arm_locally_streaming)) +sc_ls_callee () {} +void __attribute__((noipa, arm_locally_streaming)) +n_ls_callee () {} + +void __attribute__((arm_locally_streaming)) +n_to_sc () +{ + sc_callee (); +} +/* { dg-final { scan-assembler {\tb\tsc_callee} } } */ + +void __attribute__((arm_locally_streaming)) +n_to_s () +{ + s_callee (); +} +/* { dg-final { scan-assembler {\tbl\ts_callee} } } */ + +void __attribute__((arm_locally_streaming)) +n_to_n () +{ + n_callee (); +} +/* { dg-final { scan-assembler {\tb\tn_callee} } } */ + +void __attribute__((arm_locally_streaming)) +n_to_sc_ls () +{ + sc_ls_callee (); +} +/* { dg-final { scan-assembler {\tb\tsc_ls_callee} } } */ + +void __attribute__((arm_locally_streaming)) +n_to_n_ls () +{ + n_ls_callee (); +} +/* { dg-final { scan-assembler {\tb\tn_ls_callee} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sme/sibcall_6.c b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_6.c new file mode 100644 index 00000000000..b2f321b7c8f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_6.c @@ -0,0 +1,26 @@ +/* { dg-options "-O2" } */ + +void __attribute__((arm_shared_za)) shared_callee (); +void __attribute__((noipa, arm_new_za)) new_callee () {} +void normal_callee (); + +void __attribute__((arm_shared_za)) +shared_to_shared () +{ + shared_callee (); +} +/* { dg-final { scan-assembler {\tb\tshared_callee} } } */ + +void __attribute__((arm_shared_za)) +shared_to_new () +{ + new_callee (); +} +/* { dg-final { scan-assembler {\tbl\tnew_callee} } } */ + +void __attribute__((arm_shared_za)) +shared_to_normal () +{ + normal_callee (); +} +/* { dg-final { scan-assembler {\tbl\tnormal_callee} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sme/sibcall_7.c b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_7.c new file mode 100644 index 00000000000..a096cf591b7 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_7.c @@ -0,0 +1,26 @@ +/* { dg-options "-O2" } */ + +void __attribute__((arm_shared_za)) shared_callee (); +void __attribute__((noipa, arm_new_za)) new_callee () {} +void normal_callee (); + +void __attribute__((arm_new_za)) +new_to_shared () +{ + shared_callee (); +} +/* { dg-final { scan-assembler {\tbl\tshared_callee} } } */ + +void __attribute__((arm_new_za)) +new_to_new () +{ + new_callee (); +} +/* { dg-final { scan-assembler {\tb\tnew_callee} } } */ + +void __attribute__((arm_new_za)) +new_to_normal () +{ + normal_callee (); +} +/* { dg-final { scan-assembler {\tb\tnormal_callee} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sme/sibcall_8.c b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_8.c new file mode 100644 index 00000000000..2553c10045a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_8.c @@ -0,0 +1,19 @@ +/* { dg-options "-O2" } */ + +void __attribute__((arm_shared_za)) shared_callee (); +void __attribute__((noipa, arm_new_za)) new_callee () {} +void normal_callee (); + +void +normal_to_new () +{ + new_callee (); +} +/* { dg-final { scan-assembler {\tb\tnew_callee} } } */ + +void +normal_to_normal () +{ + normal_callee (); +} +/* { dg-final { scan-assembler {\tb\tnormal_callee} } } */ -- 2.25.1