Yury Khrustalev <yury.khrusta...@arm.com> writes: > From: Szabolcs Nagy <szabolcs.n...@arm.com> > > Nonlocal stack save and restore has to also save and restore the GCS > pointer. This is used in __builtin_setjmp/longjmp and nonlocal goto. > > The GCS specific code is only emitted if GCS branch-protection is > enabled and the code always checks at runtime if GCS is enabled. > > The new -mbranch-protection=gcs and old -mbranch-protection=none code > are ABI compatible: jmpbuf for __builtin_setjmp has space for 5 > pointers, the layout is > > old layout: fp, pc, sp, unused, unused > new layout: fp, pc, sp, gcsp, unused > > Note: the ILP32 code generation is wrong as it saves the pointers with > Pmode (i.e. 8 bytes per pointer), but the user supplied buffer size is > for 5 pointers (4 bytes per pointer), this is not fixed. > > The nonlocal goto has no ABI compatibility issues as the goto and its > destination are in the same translation unit. > > gcc/ChangeLog: > > * config/aarch64/aarch64.h (STACK_SAVEAREA_MODE): Make space for gcs. > * config/aarch64/aarch64.md (save_stack_nonlocal): New. > (restore_stack_nonlocal): New. > --- > gcc/config/aarch64/aarch64.h | 7 +++ > gcc/config/aarch64/aarch64.md | 82 +++++++++++++++++++++++++++++++++++ > 2 files changed, 89 insertions(+) > > diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h > index 593319fd472..43a92e85780 100644 > --- a/gcc/config/aarch64/aarch64.h > +++ b/gcc/config/aarch64/aarch64.h > @@ -1297,6 +1297,13 @@ typedef struct > #define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \ > ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2) > > +/* Have space for both SP and GCSPR in the NONLOCAL case in > + emit_stack_save as well as in __builtin_setjmp, __builtin_longjmp > + and __builtin_nonlocal_goto. > + Note: On ILP32 the documented buf size is not enough PR84150. */ > +#define STACK_SAVEAREA_MODE(LEVEL) \ > + ((LEVEL) == SAVE_NONLOCAL ? TImode : Pmode)
It might be better to use CDImode, so that we don't claim 16-byte alignment for -mstrict-align. > + > #define INCOMING_RETURN_ADDR_RTX gen_rtx_REG (Pmode, LR_REGNUM) > > #define RETURN_ADDR_RTX aarch64_return_addr > diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md > index e4e11e35b5b..6e1646387d8 100644 > --- a/gcc/config/aarch64/aarch64.md > +++ b/gcc/config/aarch64/aarch64.md > @@ -1200,6 +1200,88 @@ (define_insn "*cb<optab><mode>1" > (const_int 1)))] > ) > > +(define_expand "save_stack_nonlocal" > + [(set (match_operand 0 "memory_operand") > + (match_operand 1 "register_operand"))] > + "" > +{ > + rtx stack_slot = adjust_address (operands[0], Pmode, 0); > + emit_move_insn (stack_slot, operands[1]); > + > + if (aarch64_gcs_enabled ()) > + { > + /* Save GCS with code like > + mov x16, 1 > + chkfeat x16 > + tbnz x16, 0, .L_done > + mrs tmp, gcspr_el0 > + str tmp, [%0, 8] > + .L_done: */ > + > + rtx done_label = gen_label_rtx (); > + rtx r16 = gen_rtx_REG (DImode, R16_REGNUM); > + emit_move_insn (r16, const1_rtx); > + emit_insn (gen_aarch64_chkfeat ()); > + emit_insn (gen_tbranch_neqi3 (r16, const0_rtx, done_label)); > + rtx gcs_slot = adjust_address (operands[0], Pmode, GET_MODE_SIZE > (Pmode)); > + rtx gcs = force_reg (Pmode, const0_rtx); The code seems to use force_reg (Pmode, const0_rtx) to get a fresh register, but that should be done using gen_reg_rtx (Pmode) instead. Looks good otherwise. In particular, it avoids one mistake I made in the past, in that it uses the generic optabs to generate branches, and so should work with -mtrack-speculation. (It would be good to have a test of nonlocal goto and -mtrack-speculation though, if the later patches don't have one already.) Thanks, Richard > + emit_insn (gen_aarch64_load_gcspr (gcs)); > + emit_move_insn (gcs_slot, gcs); > + emit_label (done_label); > + } > + DONE; > +}) > + > +(define_expand "restore_stack_nonlocal" > + [(set (match_operand 0 "register_operand" "") > + (match_operand 1 "memory_operand" ""))] > + "" > +{ > + rtx stack_slot = adjust_address (operands[1], Pmode, 0); > + emit_move_insn (operands[0], stack_slot); > + > + if (aarch64_gcs_enabled ()) > + { > + /* Restore GCS with code like > + mov x16, 1 > + chkfeat x16 > + tbnz x16, 0, .L_done > + ldr tmp1, [%1, 8] > + mrs tmp2, gcspr_el0 > + subs tmp2, tmp1, tmp2 > + b.eq .L_done > + .L_loop: > + gcspopm > + subs tmp2, tmp2, 8 > + b.ne .L_loop > + .L_done: */ > + > + rtx loop_label = gen_label_rtx (); > + rtx done_label = gen_label_rtx (); > + rtx r16 = gen_rtx_REG (DImode, R16_REGNUM); > + emit_move_insn (r16, const1_rtx); > + emit_insn (gen_aarch64_chkfeat ()); > + emit_insn (gen_tbranch_neqi3 (r16, const0_rtx, done_label)); > + rtx gcs_slot = adjust_address (operands[1], Pmode, GET_MODE_SIZE > (Pmode)); > + rtx gcs_old = force_reg (Pmode, const0_rtx); > + emit_move_insn (gcs_old, gcs_slot); > + rtx gcs_now = force_reg (Pmode, const0_rtx); > + emit_insn (gen_aarch64_load_gcspr (gcs_now)); > + emit_insn (gen_subdi3_compare1 (gcs_now, gcs_old, gcs_now)); > + rtx cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM); > + rtx cmp_rtx = gen_rtx_fmt_ee (EQ, DImode, cc_reg, const0_rtx); > + emit_jump_insn (gen_condjump (cmp_rtx, cc_reg, done_label)); > + emit_label (loop_label); > + emit_insn (gen_aarch64_gcspopm_xzr ()); > + emit_insn (gen_adddi3_compare0 (gcs_now, gcs_now, GEN_INT (-8))); > + cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM); > + cmp_rtx = gen_rtx_fmt_ee (NE, DImode, cc_reg, const0_rtx); > + emit_jump_insn (gen_condjump (cmp_rtx, cc_reg, loop_label)); > + emit_label (done_label); > + } > + DONE; > +}) > + > ;; ------------------------------------------------------------------- > ;; Subroutine calls and sibcalls > ;; -------------------------------------------------------------------