Yury Khrustalev <yury.khrusta...@arm.com> writes:
> From: Szabolcs Nagy <szabolcs.n...@arm.com>
>
> Transaction begin and abort use setjmp/longjmp like operations that
> need to be updated for GCS compatibility. We use similar logic to
> libc setjmp/longjmp that support switching stack and thus switching
> GCS (e.g. due to longjmp out of a makecontext stack), this is kept
> even though it is likely not required for transaction aborts.
>
> The gtm_jmpbuf is internal to libitm so we can change its layout
> without breaking ABI.
>
> libitm/ChangeLog:
>
>       * config/aarch64/sjlj.S: Add GCS support and mark GCS compatible.
>       * config/aarch64/target.h: Add gcs field to gtm_jmpbuf.
> ---
>  libitm/config/aarch64/sjlj.S   | 60 ++++++++++++++++++++++++++++++++--
>  libitm/config/aarch64/target.h |  1 +
>  2 files changed, 58 insertions(+), 3 deletions(-)
>
> diff --git a/libitm/config/aarch64/sjlj.S b/libitm/config/aarch64/sjlj.S
> index aeffd4d1070..cf1d8af2c96 100644
> --- a/libitm/config/aarch64/sjlj.S
> +++ b/libitm/config/aarch64/sjlj.S
> @@ -29,6 +29,13 @@
>  #define AUTIASP      hint    29
>  #define PACIBSP      hint    27
>  #define AUTIBSP      hint    31
> +#define CHKFEAT_X16  hint    40
> +#define MRS_GCSPR(x) mrs     x, s3_3_c2_c5_1
> +#define GCSPOPM(x)   sysl    x, #3, c7, c7, #1
> +#define GCSSS1(x)    sys     #3, c7, c7, #2, x
> +#define GCSSS2(x)    sysl    x, #3, c7, c7, #3
> +
> +#define L(name) .L##name
>  
>  #if defined(HAVE_AS_CFI_PSEUDO_OP) && defined(__GCC_HAVE_DWARF2_CFI_ASM)
>  # define cfi_negate_ra_state .cfi_negate_ra_state
> @@ -80,7 +87,16 @@ _ITM_beginTransaction:
>       stp     d10, d11, [sp, 7*16]
>       stp     d12, d13, [sp, 8*16]
>       stp     d14, d15, [sp, 9*16]
> -     str     x1, [sp, 10*16]
> +
> +     /* GCS support.  */
> +     mov     x2, 0
> +     mov     x16, 1
> +     CHKFEAT_X16
> +     tbnz    x16, 0, L(gcs_done_sj)
> +     MRS_GCSPR (x2)
> +     add     x2, x2, 8 /* GCS after _ITM_beginTransaction returns.  */
> +L(gcs_done_sj):
> +     stp     x2, x1, [sp, 10*16]
>  
>       /* Invoke GTM_begin_transaction with the struct we just built.  */
>       mov     x1, sp
> @@ -117,7 +133,38 @@ GTM_longjmp:
>       ldp     d10, d11, [x1, 7*16]
>       ldp     d12, d13, [x1, 8*16]
>       ldp     d14, d15, [x1, 9*16]
> +
> +     /* GCS support.  */
> +     mov     x16, 1
> +     CHKFEAT_X16
> +     tbnz    x16, 0, L(gcs_done_lj)
> +     MRS_GCSPR (x7)
>       ldr     x3, [x1, 10*16]
> +     mov     x4, x3
> +     /* x7: GCSPR now.  x3, x4: target GCSPR.  x5, x6: tmp regs.  */
> +L(gcs_scan):
> +     cmp     x7, x4
> +     b.eq    L(gcs_pop)
> +     sub     x4, x4, 8
> +     /* Check for a cap token.  */
> +     ldr     x5, [x4]
> +     and     x6, x4, 0xfffffffffffff000
> +     orr     x6, x6, 1
> +     cmp     x5, x6
> +     b.ne    L(gcs_scan)
> +L(gcs_switch):
> +     add     x7, x4, 8
> +     GCSSS1 (x4)
> +     GCSSS2 (xzr)

Don't we still need to pop from the current stack up to the switch point,
in case something further up the call frame wants to switch back to it?

If so, don't we also need to handle multiple switches, and similarly
pop from intermediate stacks?

E.g. if we have stacks S1-S3 and functions f1-f4, and a call stack:

  f1 initially uses S1, switches to S2, calls f2
  f2 initially uses S2, switches to S3, calls f3
  f3 initially uses S3, switches to S1, calls f4
  f4 initially uses S1, triggers a longjmp to f2

then wouldn't the longjmp need unwind S1 to the switch point;
unwind S3 through f3's entry to the switch point; and then unwind
S2 in the way that the routine currently does?  Or is that kind of
situation not supported?

Thanks,
Richard

> +L(gcs_pop):
> +     cmp     x7, x3
> +     b.eq    L(gcs_done_lj)
> +     GCSPOPM (xzr)
> +     add     x7, x7, 8
> +     b       L(gcs_pop)
> +L(gcs_done_lj):
> +
> +     ldr     x3, [x1, 10*16 + 8]
>       ldp     x29, x30, [x1]
>       cfi_def_cfa(x1, 0)
>       CFI_PAC_TOGGLE
> @@ -132,6 +179,7 @@ GTM_longjmp:
>  #define FEATURE_1_AND 0xc0000000
>  #define FEATURE_1_BTI 1
>  #define FEATURE_1_PAC 2
> +#define FEATURE_1_GCS 4
>  
>  /* Supported features based on the code generation options.  */
>  #if defined(__ARM_FEATURE_BTI_DEFAULT)
> @@ -146,6 +194,12 @@ GTM_longjmp:
>  # define PAC_FLAG 0
>  #endif
>  
> +#if __ARM_FEATURE_GCS_DEFAULT
> +# define GCS_FLAG FEATURE_1_GCS
> +#else
> +# define GCS_FLAG 0
> +#endif
> +
>  /* Add a NT_GNU_PROPERTY_TYPE_0 note.  */
>  #define GNU_PROPERTY(type, value)    \
>    .section .note.gnu.property, "a";  \
> @@ -163,7 +217,7 @@ GTM_longjmp:
>  .section .note.GNU-stack, "", %progbits
>  
>  /* Add GNU property note if built with branch protection.  */
> -# if (BTI_FLAG|PAC_FLAG) != 0
> -GNU_PROPERTY (FEATURE_1_AND, BTI_FLAG|PAC_FLAG)
> +# if (BTI_FLAG|PAC_FLAG|GCS_FLAG) != 0
> +GNU_PROPERTY (FEATURE_1_AND, BTI_FLAG|PAC_FLAG|GCS_FLAG)
>  # endif
>  #endif
> diff --git a/libitm/config/aarch64/target.h b/libitm/config/aarch64/target.h
> index 3d99197bfab..a1f39b4bf7a 100644
> --- a/libitm/config/aarch64/target.h
> +++ b/libitm/config/aarch64/target.h
> @@ -30,6 +30,7 @@ typedef struct gtm_jmpbuf
>    unsigned long long pc;     /* x30 */
>    unsigned long long gr[10]; /* x19-x28 */
>    unsigned long long vr[8];  /* d8-d15 */
> +  void *gcs;                 /* GCSPR_EL0 */
>    void *cfa;
>  } gtm_jmpbuf;

Reply via email to