Szabolcs Nagy <szabolcs.n...@arm.com> writes:
> The expected way to handle eh_return is to pass the stack adjustment
> offset and landing pad address via
>
>   EH_RETURN_STACKADJ_RTX
>   EH_RETURN_HANDLER_RTX
>
> to the epilogue that is shared between normal return paths and the
> eh_return paths.  EH_RETURN_HANDLER_RTX is the stack slot of the
> return address that is overwritten with the landing pad in the
> eh_return case and EH_RETURN_STACKADJ_RTX is a register added to sp
> right before return and it is set to 0 in the normal return case.
>
> The issue with this design is that eh_return and normal return may
> require different return sequence but there is no way to distinguish
> the two cases in the epilogue (the stack adjustment may be 0 in the
> eh_return case too).
>
> The reason eh_return and normal return requires different return
> sequence is that control flow integrity hardening may need to treat
> eh_return as a forward-edge transfer (it is not returning to the
> previous stack frame) and normal return as a backward-edge one.
> In case of AArch64 forward-edge is protected by BTI and requires br
> instruction and backward-edge is protected by PAUTH or GCS and
> requires ret (or authenticated ret) instruction.
>
> This patch resolves the issue by introducing EH_RETURN_TAKEN_RTX that
> is a flag set to 1 in the eh_return path and 0 in normal return paths.
> Branching on the EH_RETURN_TAKEN_RTX flag, the right return sequence
> can be used in the epilogue.
>
> The handler could be passed the old way via clobbering the return
> address, but since now the eh_return case can be distinguished, the
> handler can be in a different register than x30 and no stack frame
> is needed for eh_return.
>
> This patch fixes a return to anywhere gadget in the unwinder with
> existing standard branch protection as well as makes EH return
> compatible with the Guarded Control Stack (GCS) extension.
>
> Some tests are adjusted because eh_return no longer prevents pac-ret
> in the normal return path.
>
> gcc/ChangeLog:
>
>       * config/aarch64/aarch64-protos.h (aarch64_eh_return_handler_rtx):
>       Remove.
>       * config/aarch64/aarch64.cc (aarch64_return_address_signing_enabled):
>       Sign return address even in functions with eh_return.
>       (aarch64_expand_epilogue): Conditionally return with br or ret.
>       (aarch64_eh_return_handler_rtx): Remove.
>       * config/aarch64/aarch64.h (EH_RETURN_TAKEN_RTX): Define.
>       (EH_RETURN_STACKADJ_RTX): Change to R5.
>       (EH_RETURN_HANDLER_RTX): Change to R6.
>       * df-scan.cc: Handle EH_RETURN_TAKEN_RTX.
>       * doc/tm.texi: Regenerate.
>       * doc/tm.texi.in: Document EH_RETURN_TAKEN_RTX.
>       * except.cc (expand_eh_return): Handle EH_RETURN_TAKEN_RTX.
>
> gcc/testsuite/ChangeLog:
>
>       * gcc.target/aarch64/return_address_sign_1.c: Move func4 to ...
>       * gcc.target/aarch64/return_address_sign_2.c: ... here and fix the
>       scan asm check.
>       * gcc.target/aarch64/return_address_sign_b_1.c: Move func4 to ...
>       * gcc.target/aarch64/return_address_sign_b_2.c: ... here and fix the
>       scan asm check.

OK, thanks!

Richard

> ---
> v2:
> - Introduce EH_RETURN_TAKEN_RTX instead of abusing EH_RETURN_STACKADJ_RTX.
> - Merge test fixes.
> ---
>  gcc/config/aarch64/aarch64-protos.h           |  1 -
>  gcc/config/aarch64/aarch64.cc                 | 88 ++++++-------------
>  gcc/config/aarch64/aarch64.h                  |  9 +-
>  gcc/df-scan.cc                                | 10 +++
>  gcc/doc/tm.texi                               | 12 +++
>  gcc/doc/tm.texi.in                            | 12 +++
>  gcc/except.cc                                 | 20 +++++
>  .../aarch64/return_address_sign_1.c           | 13 +--
>  .../aarch64/return_address_sign_2.c           | 17 +++-
>  .../aarch64/return_address_sign_b_1.c         | 11 ---
>  .../aarch64/return_address_sign_b_2.c         | 17 +++-
>  11 files changed, 116 insertions(+), 94 deletions(-)
>
> diff --git a/gcc/config/aarch64/aarch64-protos.h 
> b/gcc/config/aarch64/aarch64-protos.h
> index 60a55f4bc19..80296024f04 100644
> --- a/gcc/config/aarch64/aarch64-protos.h
> +++ b/gcc/config/aarch64/aarch64-protos.h
> @@ -859,7 +859,6 @@ machine_mode aarch64_hard_regno_caller_save_mode 
> (unsigned, unsigned,
>                                                      machine_mode);
>  int aarch64_uxt_size (int, HOST_WIDE_INT);
>  int aarch64_vec_fpconst_pow_of_2 (rtx);
> -rtx aarch64_eh_return_handler_rtx (void);
>  rtx aarch64_mask_from_zextract_ops (rtx, rtx);
>  const char *aarch64_output_move_struct (rtx *operands);
>  rtx aarch64_return_addr_rtx (void);
> diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
> index a28b66acf6a..5cdb33dd3dc 100644
> --- a/gcc/config/aarch64/aarch64.cc
> +++ b/gcc/config/aarch64/aarch64.cc
> @@ -9113,17 +9113,6 @@ aarch64_return_address_signing_enabled (void)
>    /* This function should only be called after frame laid out.   */
>    gcc_assert (cfun->machine->frame.laid_out);
>  
> -  /* Turn return address signing off in any function that uses
> -     __builtin_eh_return.  The address passed to __builtin_eh_return
> -     is not signed so either it has to be signed (with original sp)
> -     or the code path that uses it has to avoid authenticating it.
> -     Currently eh return introduces a return to anywhere gadget, no
> -     matter what we do here since it uses ret with user provided
> -     address. An ideal fix for that is to use indirect branch which
> -     can be protected with BTI j (to some extent).  */
> -  if (crtl->calls_eh_return)
> -    return false;
> -
>    /* If signing scope is AARCH_FUNCTION_NON_LEAF, we only sign a leaf 
> function
>       if its LR is pushed onto stack.  */
>    return (aarch_ra_sign_scope == AARCH_FUNCTION_ALL
> @@ -10060,11 +10049,7 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, 
> rtx temp2,
>  /* Return 1 if the register is used by the epilogue.  We need to say the
>     return register is used, but only after epilogue generation is complete.
>     Note that in the case of sibcalls, the values "used by the epilogue" are
> -   considered live at the start of the called function.
> -
> -   For SIMD functions we need to return 1 for FP registers that are saved and
> -   restored by a function but are not zero in call_used_regs.  If we do not 
> do 
> -   this optimizations may remove the restore of the register.  */
> +   considered live at the start of the called function.  */
>  
>  int
>  aarch64_epilogue_uses (int regno)
> @@ -10468,6 +10453,30 @@ aarch64_expand_epilogue (bool for_sibcall)
>        RTX_FRAME_RELATED_P (insn) = 1;
>      }
>  
> +  /* Stack adjustment for exception handler.  */
> +  if (crtl->calls_eh_return && !for_sibcall)
> +    {
> +      /* If the EH_RETURN_TAKEN_RTX flag is set then we need
> +      to unwind the stack and jump to the handler, otherwise
> +      skip this eh_return logic and continue with normal
> +      return after the label.  We have already reset the CFA
> +      to be SP; letting the CFA move during this adjustment
> +      is just as correct as retaining the CFA from the body
> +      of the function.  Therefore, do nothing special.  */
> +      rtx label = gen_label_rtx ();
> +      rtx x = gen_rtx_EQ (VOIDmode, EH_RETURN_TAKEN_RTX, const0_rtx);
> +      x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
> +                             gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
> +      rtx jump = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
> +      JUMP_LABEL (jump) = label;
> +      LABEL_NUSES (label)++;
> +      emit_insn (gen_add2_insn (stack_pointer_rtx,
> +                             EH_RETURN_STACKADJ_RTX));
> +      emit_jump_insn (gen_indirect_jump (EH_RETURN_HANDLER_RTX));
> +      emit_barrier ();
> +      emit_label (label);
> +    }
> +
>    /* We prefer to emit the combined return/authenticate instruction RETAA,
>       however there are three cases in which we must instead emit an explicit
>       authentication instruction.
> @@ -10497,58 +10506,11 @@ aarch64_expand_epilogue (bool for_sibcall)
>        RTX_FRAME_RELATED_P (insn) = 1;
>      }
>  
> -  /* Stack adjustment for exception handler.  */
> -  if (crtl->calls_eh_return && !for_sibcall)
> -    {
> -      /* We need to unwind the stack by the offset computed by
> -      EH_RETURN_STACKADJ_RTX.  We have already reset the CFA
> -      to be SP; letting the CFA move during this adjustment
> -      is just as correct as retaining the CFA from the body
> -      of the function.  Therefore, do nothing special.  */
> -      emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
> -    }
> -
>    emit_use (gen_rtx_REG (DImode, LR_REGNUM));
>    if (!for_sibcall)
>      emit_jump_insn (ret_rtx);
>  }
>  
> -/* Implement EH_RETURN_HANDLER_RTX.  EH returns need to either return
> -   normally or return to a previous frame after unwinding.
> -
> -   An EH return uses a single shared return sequence.  The epilogue is
> -   exactly like a normal epilogue except that it has an extra input
> -   register (EH_RETURN_STACKADJ_RTX) which contains the stack adjustment
> -   that must be applied after the frame has been destroyed.  An extra label
> -   is inserted before the epilogue which initializes this register to zero,
> -   and this is the entry point for a normal return.
> -
> -   An actual EH return updates the return address, initializes the stack
> -   adjustment and jumps directly into the epilogue (bypassing the zeroing
> -   of the adjustment).  Since the return address is typically saved on the
> -   stack when a function makes a call, the saved LR must be updated outside
> -   the epilogue.
> -
> -   This poses problems as the store is generated well before the epilogue,
> -   so the offset of LR is not known yet.  Also optimizations will remove the
> -   store as it appears dead, even after the epilogue is generated (as the
> -   base or offset for loading LR is different in many cases).
> -
> -   To avoid these problems this implementation forces the frame pointer
> -   in eh_return functions so that the location of LR is fixed and known 
> early.
> -   It also marks the store volatile, so no optimization is permitted to
> -   remove the store.  */
> -rtx
> -aarch64_eh_return_handler_rtx (void)
> -{
> -  rtx tmp = gen_frame_mem (Pmode,
> -    plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
> -
> -  /* Mark the store volatile, so no optimization is permitted to remove it.  
> */
> -  MEM_VOLATILE_P (tmp) = true;
> -  return tmp;
> -}
> -
>  /* Output code to add DELTA to the first argument, and then jump
>     to FUNCTION.  Used for C++ multiple inheritance.  */
>  static void
> diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
> index 2f0777a37ac..58f7eeb565f 100644
> --- a/gcc/config/aarch64/aarch64.h
> +++ b/gcc/config/aarch64/aarch64.h
> @@ -583,9 +583,12 @@ enum class aarch64_feature : unsigned char {
>  /* Output assembly strings after .cfi_startproc is emitted.  */
>  #define ASM_POST_CFI_STARTPROC  aarch64_post_cfi_startproc
>  
> -/* For EH returns X4 contains the stack adjustment.  */
> -#define EH_RETURN_STACKADJ_RTX       gen_rtx_REG (Pmode, R4_REGNUM)
> -#define EH_RETURN_HANDLER_RTX  aarch64_eh_return_handler_rtx ()
> +/* For EH returns X4 is a flag that is set in the EH return
> +   code paths and then X5 and X6 contain the stack adjustment
> +   and return address respectively.  */
> +#define EH_RETURN_TAKEN_RTX  gen_rtx_REG (Pmode, R4_REGNUM)
> +#define EH_RETURN_STACKADJ_RTX       gen_rtx_REG (Pmode, R5_REGNUM)
> +#define EH_RETURN_HANDLER_RTX        gen_rtx_REG (Pmode, R6_REGNUM)
>  
>  #undef TARGET_COMPUTE_FRAME_LAYOUT
>  #define TARGET_COMPUTE_FRAME_LAYOUT aarch64_layout_frame
> diff --git a/gcc/df-scan.cc b/gcc/df-scan.cc
> index 9515740728c..934c9ca2d81 100644
> --- a/gcc/df-scan.cc
> +++ b/gcc/df-scan.cc
> @@ -3720,6 +3720,16 @@ df_get_exit_block_use_set (bitmap exit_block_uses)
>      }
>  #endif
>  
> +#ifdef EH_RETURN_TAKEN_RTX
> +  if ((!targetm.have_epilogue () || ! epilogue_completed)
> +      && crtl->calls_eh_return)
> +    {
> +      rtx tmp = EH_RETURN_TAKEN_RTX;
> +      if (tmp && REG_P (tmp))
> +     df_mark_reg (tmp, exit_block_uses);
> +    }
> +#endif
> +
>    if ((!targetm.have_epilogue () || ! epilogue_completed)
>        && crtl->calls_eh_return)
>      {
> diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
> index f7ac806ff15..21bfe160a8b 100644
> --- a/gcc/doc/tm.texi
> +++ b/gcc/doc/tm.texi
> @@ -3506,6 +3506,18 @@ If you want to support call frame exception handling, 
> you must
>  define either this macro or the @code{eh_return} instruction pattern.
>  @end defmac
>  
> +@defmac EH_RETURN_TAKEN_RTX
> +A C expression whose value is RTL representing a location in which
> +to store if the EH return path was taken instead of a normal return.
> +This macro allows conditionally executing different code in the
> +epilogue for the EH and normal return cases.
> +
> +When this macro is defined, the macros @code{EH_RETURN_STACKADJ_RTX}
> +and @code{EH_RETURN_HANDLER_RTX} are only meaningful in the epilogue
> +when 1 is stored to the specified location. The value 0 means normal
> +return.
> +@end defmac
> +
>  @defmac RETURN_ADDR_OFFSET
>  If defined, an integer-valued C expression for which rtl will be generated
>  to add it to the exception handler address before it is searched in the
> diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
> index 141027e0bb4..ee9dc5c5fc3 100644
> --- a/gcc/doc/tm.texi.in
> +++ b/gcc/doc/tm.texi.in
> @@ -2742,6 +2742,18 @@ If you want to support call frame exception handling, 
> you must
>  define either this macro or the @code{eh_return} instruction pattern.
>  @end defmac
>  
> +@defmac EH_RETURN_TAKEN_RTX
> +A C expression whose value is RTL representing a location in which
> +to store if the EH return path was taken instead of a normal return.
> +This macro allows conditionally executing different code in the
> +epilogue for the EH and normal return cases.
> +
> +When this macro is defined, the macros @code{EH_RETURN_STACKADJ_RTX}
> +and @code{EH_RETURN_HANDLER_RTX} are only meaningful in the epilogue
> +when 1 is stored to the specified location. The value 0 means normal
> +return.
> +@end defmac
> +
>  @defmac RETURN_ADDR_OFFSET
>  If defined, an integer-valued C expression for which rtl will be generated
>  to add it to the exception handler address before it is searched in the
> diff --git a/gcc/except.cc b/gcc/except.cc
> index e728aa43b6a..508f8bb3e26 100644
> --- a/gcc/except.cc
> +++ b/gcc/except.cc
> @@ -2281,6 +2281,10 @@ expand_eh_return (void)
>    emit_move_insn (EH_RETURN_STACKADJ_RTX, const0_rtx);
>  #endif
>  
> +#ifdef EH_RETURN_TAKEN_RTX
> +  emit_move_insn (EH_RETURN_TAKEN_RTX, const0_rtx);
> +#endif
> +
>    around_label = gen_label_rtx ();
>    emit_jump (around_label);
>  
> @@ -2291,6 +2295,10 @@ expand_eh_return (void)
>    emit_move_insn (EH_RETURN_STACKADJ_RTX, crtl->eh.ehr_stackadj);
>  #endif
>  
> +#ifdef EH_RETURN_TAKEN_RTX
> +  emit_move_insn (EH_RETURN_TAKEN_RTX, const1_rtx);
> +#endif
> +
>    if (targetm.have_eh_return ())
>      emit_insn (targetm.gen_eh_return (crtl->eh.ehr_handler));
>    else
> @@ -2301,7 +2309,19 @@ expand_eh_return (void)
>       error ("%<__builtin_eh_return%> not supported on this target");
>      }
>  
> +#ifdef EH_RETURN_TAKEN_RTX
> +  rtx_code_label *eh_done_label = gen_label_rtx ();
> +  emit_jump (eh_done_label);
> +#endif
> +
>    emit_label (around_label);
> +
> +#ifdef EH_RETURN_TAKEN_RTX
> +  for (rtx tmp : { EH_RETURN_STACKADJ_RTX, EH_RETURN_HANDLER_RTX })
> +    if (tmp && REG_P (tmp))
> +      emit_clobber (tmp);
> +  emit_label (eh_done_label);
> +#endif
>  }
>  
>  /* Convert a ptr_mode address ADDR_TREE to a Pmode address controlled by
> diff --git a/gcc/testsuite/gcc.target/aarch64/return_address_sign_1.c 
> b/gcc/testsuite/gcc.target/aarch64/return_address_sign_1.c
> index 232ba67ade0..114a9dacb3f 100644
> --- a/gcc/testsuite/gcc.target/aarch64/return_address_sign_1.c
> +++ b/gcc/testsuite/gcc.target/aarch64/return_address_sign_1.c
> @@ -37,16 +37,5 @@ func3 (int a, int b, int c)
>    /* autiasp */
>  }
>  
> -/* eh_return.  */
> -void __attribute__ ((target ("arch=armv8.3-a")))
> -func4 (long offset, void *handler, int *ptr, int imm1, int imm2)
> -{
> -  /* no paciasp */
> -  *ptr = imm1 + foo (imm1) + imm2;
> -  __builtin_eh_return (offset, handler);
> -  /* no autiasp */
> -  return;
> -}
> -
> -/* { dg-final { scan-assembler-times "autiasp" 3 } } */
>  /* { dg-final { scan-assembler-times "paciasp" 3 } } */
> +/* { dg-final { scan-assembler-times "autiasp" 3 } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/return_address_sign_2.c 
> b/gcc/testsuite/gcc.target/aarch64/return_address_sign_2.c
> index a4bc5b45333..d93492c3c43 100644
> --- a/gcc/testsuite/gcc.target/aarch64/return_address_sign_2.c
> +++ b/gcc/testsuite/gcc.target/aarch64/return_address_sign_2.c
> @@ -14,5 +14,18 @@ func1 (int a, int b, int c)
>    /* retaa */
>  }
>  
> -/* { dg-final { scan-assembler-times "paciasp" 1 } } */
> -/* { dg-final { scan-assembler-times "retaa" 1 } } */
> +/* eh_return.  */
> +void __attribute__ ((target ("arch=armv8.3-a")))
> +func4 (long offset, void *handler, int *ptr, int imm1, int imm2)
> +{
> +  /* paciasp */
> +  *ptr = imm1 + foo (imm1) + imm2;
> +  if (handler)
> +    /* br */
> +    __builtin_eh_return (offset, handler);
> +  /* retaa */
> +  return;
> +}
> +
> +/* { dg-final { scan-assembler-times "paciasp" 2 } } */
> +/* { dg-final { scan-assembler-times "retaa" 2 } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/return_address_sign_b_1.c 
> b/gcc/testsuite/gcc.target/aarch64/return_address_sign_b_1.c
> index 43e32ab6cb7..697fa30dc5a 100644
> --- a/gcc/testsuite/gcc.target/aarch64/return_address_sign_b_1.c
> +++ b/gcc/testsuite/gcc.target/aarch64/return_address_sign_b_1.c
> @@ -37,16 +37,5 @@ func3 (int a, int b, int c)
>    /* autibsp */
>  }
>  
> -/* eh_return.  */
> -void __attribute__ ((target ("arch=armv8.3-a")))
> -func4 (long offset, void *handler, int *ptr, int imm1, int imm2)
> -{
> -  /* no pacibsp */
> -  *ptr = imm1 + foo (imm1) + imm2;
> -  __builtin_eh_return (offset, handler);
> -  /* no autibsp */
> -  return;
> -}
> -
>  /* { dg-final { scan-assembler-times "pacibsp" 3 } } */
>  /* { dg-final { scan-assembler-times "autibsp" 3 } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/return_address_sign_b_2.c 
> b/gcc/testsuite/gcc.target/aarch64/return_address_sign_b_2.c
> index 9ed64ce0591..452240b731e 100644
> --- a/gcc/testsuite/gcc.target/aarch64/return_address_sign_b_2.c
> +++ b/gcc/testsuite/gcc.target/aarch64/return_address_sign_b_2.c
> @@ -14,5 +14,18 @@ func1 (int a, int b, int c)
>    /* retab */
>  }
>  
> -/* { dg-final { scan-assembler-times "pacibsp" 1 } } */
> -/* { dg-final { scan-assembler-times "retab" 1 } } */
> +/* eh_return.  */
> +void __attribute__ ((target ("arch=armv8.3-a")))
> +func4 (long offset, void *handler, int *ptr, int imm1, int imm2)
> +{
> +  /* pacibsp */
> +  *ptr = imm1 + foo (imm1) + imm2;
> +  if (handler)
> +    /* br */
> +    __builtin_eh_return (offset, handler);
> +  /* retab */
> +  return;
> +}
> +
> +/* { dg-final { scan-assembler-times "pacibsp" 2 } } */
> +/* { dg-final { scan-assembler-times "retab" 2 } } */

Reply via email to