On 06/10/15 11:11, Eric Botcazou wrote:
>> Thanks - I have no further comments on this patch. We probably need to
>> implement the same on AArch64 too in order to avoid similar problems.
> 
> Here's the implementation for aarch64, very similar but simpler since there 
> is 
> no shortage of scratch registers; the only thing to note is the new blockage 
> pattern.  This was tested on real hardware but not with Linux, instead with 
> Darwin (experimental port of the toolchain to iOS) and makes it possible to 
> pass ACATS (Ada conformance testsuite which requires stack checking).
> 
> There is also a couple of tweaks for the ARM implementation: a cosmetic one 
> for the probe_stack pattern and one for the output_probe_stack_range loop.
> 
> 
> 2015-10-06  Tristan Gingold  <ging...@adacore.com>
>             Eric Botcazou  <ebotca...@adacore.com>
> 
>         PR middle-end/65958
>       * config/aarch64/aarch64-protos.h (aarch64_output_probe_stack-range):
>       Declare.
>       * config/aarch64/aarch64.md: Declare UNSPECV_BLOCKAGE and
>       UNSPEC_PROBE_STACK_RANGE.
>       (blockage): New instruction.
>       (probe_stack_range): Likewise.
>       * config/aarch64/aarch64.c (aarch64_emit_probe_stack_range): New
>       function.
>       (aarch64_output_probe_stack_range): Likewise.
>       (aarch64_expand_prologue): Invoke aarch64_emit_probe_stack_range if
>       static builtin stack checking is enabled.
>       * config/aarch64/aarch64-linux.h (STACK_CHECK_STATIC_BUILTIN):
>       Define.
> 
>       * config/arm/arm.c (arm_emit_probe_stack_range): Adjust comment.
>       (output_probe_stack_range): Rotate the loop and simplify.
>       (thumb1_expand_prologue): Tweak sorry message.
>       * config/arm/arm.md (probe_stack): Use bare string.
> 
> 
> 2015-10-06  Eric Botcazou  <ebotca...@adacore.com>
> 
>         * gcc.target/aarch64/stack-checking.c: New test.
> 

Unless there really is common code between the two patches, this should
be separated out into two posts, one for ARM and one for AArch64.

More comments inline.

> 
> pr65958-2.diff
> 
> 
> Index: config/aarch64/aarch64-linux.h
> ===================================================================
> --- config/aarch64/aarch64-linux.h    (revision 228512)
> +++ config/aarch64/aarch64-linux.h    (working copy)
> @@ -88,4 +88,7 @@
>  #undef TARGET_BINDS_LOCAL_P
>  #define TARGET_BINDS_LOCAL_P default_binds_local_p_2
>  
> +/* Define this to be nonzero if static stack checking is supported.  */
> +#define STACK_CHECK_STATIC_BUILTIN 1
> +
>  #endif  /* GCC_AARCH64_LINUX_H */
> Index: config/aarch64/aarch64-protos.h
> ===================================================================
> --- config/aarch64/aarch64-protos.h   (revision 228512)
> +++ config/aarch64/aarch64-protos.h   (working copy)
> @@ -316,6 +316,7 @@ void aarch64_asm_output_labelref (FILE *
>  void aarch64_cpu_cpp_builtins (cpp_reader *);
>  void aarch64_elf_asm_named_section (const char *, unsigned, tree);
>  const char * aarch64_gen_far_branch (rtx *, int, const char *, const char *);
> +const char * aarch64_output_probe_stack_range (rtx, rtx);
>  void aarch64_err_no_fpadvsimd (machine_mode, const char *);
>  void aarch64_expand_epilogue (bool);
>  void aarch64_expand_mov_immediate (rtx, rtx);
> Index: config/aarch64/aarch64.c
> ===================================================================
> --- config/aarch64/aarch64.c  (revision 228512)
> +++ config/aarch64/aarch64.c  (working copy)
> @@ -76,6 +76,7 @@
>  #include "sched-int.h"
>  #include "cortex-a57-fma-steering.h"
>  #include "target-globals.h"
> +#include "common/common-target.h"
>  
>  /* This file should be included last.  */
>  #include "target-def.h"
> @@ -2144,6 +2145,167 @@ aarch64_libgcc_cmp_return_mode (void)
>    return SImode;
>  }
>  
> +#define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
> +
> +#if (PROBE_INTERVAL % 4096) != 0
> +#error Cannot use indexed address calculation for stack probing
> +#endif
> +
> +#if PROBE_INTERVAL > 4096
> +#error Cannot use indexed addressing mode for stack probing
> +#endif
> +

Hmm, so if PROBE_INTERVAL != 4096 we barf!

While that's safe and probably right for Linux, on some OSes there might
be a minimum page size of 16k or even 64k.  It would be nice if we could
support that.

> +/* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
> +   inclusive.  These are offsets from the current stack pointer.  */
> +
> +static void
> +aarch64_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
> +{
> +  rtx reg9 = gen_rtx_REG (Pmode, 9);

Ug!  Manifest constants should be moved to pre-defines.
PROBE_STACK_BASE_REG?

> +
> +  /* The following code uses indexed address calculation on FIRST.  */
> +  gcc_assert ((first % 4096) == 0);

where's 4096 come from?

> +
> +  /* See if we have a constant small number of probes to generate.  If so,
> +     that's the easy case.  */
> +  if (size <= PROBE_INTERVAL)
> +    {
> +      emit_set_insn (reg9,
> +                  plus_constant (Pmode, stack_pointer_rtx,
> +                                        -(first + PROBE_INTERVAL)));
> +      emit_stack_probe (plus_constant (Pmode, reg9, PROBE_INTERVAL - size));
> +    }
> +
> +  /* The run-time loop is made up of 8 insns in the generic case while the
> +     compile-time loop is made up of 4+2*(n-2) insns for n # of intervals.  
> */
> +  else if (size <= 4 * PROBE_INTERVAL)
> +    {
> +      HOST_WIDE_INT i, rem;
> +
> +      emit_set_insn (reg9,
> +                  plus_constant (Pmode, stack_pointer_rtx,
> +                                        -(first + PROBE_INTERVAL)));
> +      emit_stack_probe (reg9);
> +
> +      /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
> +      it exceeds SIZE.  If only two probes are needed, this will not
> +      generate any code.  Then probe at FIRST + SIZE.  */
> +      for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
> +     {
> +       emit_set_insn (reg9, plus_constant (Pmode, reg9, -PROBE_INTERVAL));
> +       emit_stack_probe (reg9);
> +     }
> +
> +      rem = size - (i - PROBE_INTERVAL);
> +      if (rem > 256)
> +     {
> +       emit_set_insn (reg9, plus_constant (Pmode, reg9, -PROBE_INTERVAL));
> +       emit_stack_probe (plus_constant (Pmode, reg9, PROBE_INTERVAL - rem));
> +     }
> +      else
> +     emit_stack_probe (plus_constant (Pmode, reg9, -rem));
> +    }
> +
> +  /* Otherwise, do the same as above, but in a loop.  Note that we must be
> +     extra careful with variables wrapping around because we might be at
> +     the very top (or the very bottom) of the address space and we have
> +     to be able to handle this case properly; in particular, we use an
> +     equality test for the loop condition.  */
> +  else
> +    {
> +      HOST_WIDE_INT rounded_size;
> +      rtx reg10 = gen_rtx_REG (Pmode, 10);

More manifest constants.

> +
> +      /* Step 1: round SIZE to the previous multiple of the interval.  */
> +
> +      rounded_size = size & -PROBE_INTERVAL;
> +
> +
> +      /* Step 2: compute initial and final value of the loop counter.  */
> +
> +      /* TEST_ADDR = SP + FIRST.  */
> +      emit_set_insn (reg9,
> +                  plus_constant (Pmode, stack_pointer_rtx, -first));
> +
> +      /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE.  */
> +      emit_set_insn (reg10,
> +                  plus_constant (Pmode, stack_pointer_rtx,
> +                                 -(first + rounded_size)));
> +
> +
> +      /* Step 3: the loop
> +
> +      do
> +        {
> +          TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
> +          probe at TEST_ADDR
> +        }
> +      while (TEST_ADDR != LAST_ADDR)
> +
> +      probes at FIRST + N * PROBE_INTERVAL for values of N from 1
> +      until it is equal to ROUNDED_SIZE.  */
> +
> +      emit_insn (gen_probe_stack_range (reg9, reg9, reg10));
> +
> +
> +      /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
> +      that SIZE is equal to ROUNDED_SIZE.  */
> +
> +      if (size != rounded_size)
> +     {
> +       HOST_WIDE_INT rem = size - rounded_size;
> +
> +       if (rem > 256)
> +         {
> +           emit_set_insn (reg10,
> +                          plus_constant (Pmode, reg10, -PROBE_INTERVAL));
> +           emit_stack_probe (plus_constant (Pmode, reg10,
> +                                            PROBE_INTERVAL - rem));
> +         }
> +       else
> +         emit_stack_probe (plus_constant (Pmode, reg10, -rem));
> +     }
> +    }
> +
> +  /* Make sure nothing is scheduled before we are done.  */
> +  emit_insn (gen_blockage ());
> +}
> +
> +/* Probe a range of stack addresses from REG1 to REG2 inclusive.  These are
> +   absolute addresses.  */
> +
> +const char *
> +aarch64_output_probe_stack_range (rtx reg1, rtx reg2)
> +{
> +  static int labelno = 0;
> +  char loop_lab[32];
> +  rtx xops[2];
> +
> +  ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
> +
> +  /* Loop.  */
> +  ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
> +
> +  /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL.  */
> +  xops[0] = reg1;
> +  xops[1] = GEN_INT (PROBE_INTERVAL);
> +  output_asm_insn ("sub\t%0, %0, %1", xops);
> +
> +  /* Probe at TEST_ADDR.  */
> +  output_asm_insn ("str\txzr, [%0]", xops);
> +
> +  /* Test if TEST_ADDR == LAST_ADDR.  */
> +  xops[1] = reg2;
> +  output_asm_insn ("cmp\t%0, %1", xops);
> +
> +  /* Branch.  */
> +  fputs ("\tb.ne\t", asm_out_file);
> +  assemble_name_raw (asm_out_file, loop_lab);
> +  fputc ('\n', asm_out_file);
> +
> +  return "";
> +}
> +
>  static bool
>  aarch64_frame_pointer_required (void)
>  {
> @@ -2544,6 +2706,18 @@ aarch64_expand_prologue (void)
>    if (flag_stack_usage_info)
>      current_function_static_stack_size = frame_size;
>  
> +  if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
> +    {
> +      if (crtl->is_leaf && !cfun->calls_alloca)
> +     {
> +       if (frame_size > PROBE_INTERVAL && frame_size > STACK_CHECK_PROTECT)
> +         aarch64_emit_probe_stack_range (STACK_CHECK_PROTECT,
> +                                         frame_size - STACK_CHECK_PROTECT);
> +     }
> +      else if (frame_size > 0)
> +     aarch64_emit_probe_stack_range (STACK_CHECK_PROTECT, frame_size);
> +    }
> +
>    /* Store pairs and load pairs have a range only -512 to 504.  */
>    if (offset >= 512)
>      {
> Index: config/aarch64/aarch64.md
> ===================================================================
> --- config/aarch64/aarch64.md (revision 228512)
> +++ config/aarch64/aarch64.md (working copy)
> @@ -104,6 +104,7 @@ (define_c_enum "unspec" [
>      UNSPEC_MB
>      UNSPEC_NOP
>      UNSPEC_PRLG_STK
> +    UNSPEC_PROBE_STACK_RANGE
>      UNSPEC_RBIT
>      UNSPEC_SISD_NEG
>      UNSPEC_SISD_SSHL
> @@ -134,6 +135,7 @@ (define_c_enum "unspecv" [
>      UNSPECV_SET_FPCR         ; Represent assign of FPCR content.
>      UNSPECV_GET_FPSR         ; Represent fetch of FPSR content.
>      UNSPECV_SET_FPSR         ; Represent assign of FPSR content.
> +    UNSPECV_BLOCKAGE         ; Represent a blockage
>    ]
>  )
>  
> @@ -4807,6 +4809,27 @@ (define_insn "stack_tie"
>    [(set_attr "length" "0")]
>  )
>  
> +;; UNSPEC_VOLATILE is considered to use and clobber all hard registers and
> +;; all of memory.  This blocks insns from being moved across this point.
> +
> +(define_insn "blockage"
> +  [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)]
> +  ""
> +  ""
> +  [(set_attr "length" "0")
> +   (set_attr "type" "block")]
> +)
> +
> +(define_insn "probe_stack_range"
> +  [(set (match_operand:DI 0 "register_operand" "=r")
> +     (unspec_volatile:DI [(match_operand:DI 1 "register_operand" "0")
> +                          (match_operand:DI 2 "register_operand" "r")]
> +                          UNSPEC_PROBE_STACK_RANGE))]
> +  ""
> +{
> +  return aarch64_output_probe_stack_range (operands[0], operands[2]);
> +})
> +

This should be annotated with the sequence length.

>  ;; Named pattern for expanding thread pointer reference.
>  (define_expand "get_thread_pointerdi"
>    [(match_operand:DI 0 "register_operand" "=r")]
> Index: config/arm/arm.c
> ===================================================================
> --- config/arm/arm.c  (revision 228512)
> +++ config/arm/arm.c  (working copy)
> @@ -21262,11 +21262,12 @@ arm_emit_probe_stack_range (HOST_WIDE_IN
>  
>        /* Step 3: the loop
>  
> -      while (TEST_ADDR != LAST_ADDR)
> +      do
>          {
>            TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
>            probe at TEST_ADDR
>          }
> +      while (TEST_ADDR != LAST_ADDR)
>  
>        probes at FIRST + N * PROBE_INTERVAL for values of N from 1
>        until it is equal to ROUNDED_SIZE.  */
> @@ -21311,22 +21312,22 @@ output_probe_stack_range (rtx reg1, rtx
>  
>    ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
>  
> +  /* Loop.  */
>    ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
>  
> -   /* Test if TEST_ADDR == LAST_ADDR.  */
> +  /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL.  */
>    xops[0] = reg1;
> -  xops[1] = reg2;
> -  output_asm_insn ("cmp\t%0, %1", xops);
> +  xops[1] = GEN_INT (PROBE_INTERVAL);
> +  output_asm_insn ("sub\t%0, %0, %1", xops);
>  
> -  if (TARGET_THUMB2)
> -    fputs ("\tittt\tne\n", asm_out_file);
> +  /* Probe at TEST_ADDR.  */
> +  output_asm_insn ("str\tr0, [%0, #0]", xops);
>  
> -  /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL.  */
> -  xops[1] = GEN_INT (PROBE_INTERVAL);
> -  output_asm_insn ("subne\t%0, %0, %1", xops);
> +  /* Test if TEST_ADDR == LAST_ADDR.  */
> +  xops[1] = reg2;
> +  output_asm_insn ("cmp\t%0, %1", xops);
>  
> -  /* Probe at TEST_ADDR and branch.  */
> -  output_asm_insn ("strne\tr0, [%0, #0]", xops);
> +  /* Branch.  */
>    fputs ("\tbne\t", asm_out_file);
>    assemble_name_raw (asm_out_file, loop_lab);
>    fputc ('\n', asm_out_file);
> @@ -24869,7 +24870,7 @@ thumb1_expand_prologue (void)
>  
>    /* If we have a frame, then do stack checking.  FIXME: not implemented.  */
>    if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK && size)
> -    sorry ("-fstack-check=specific for THUMB1");
> +    sorry ("-fstack-check=specific for Thumb-1");
>  
>    amount = offsets->outgoing_args - offsets->saved_regs;
>    amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
> Index: config/arm/arm.md
> ===================================================================
> --- config/arm/arm.md (revision 228512)
> +++ config/arm/arm.md (working copy)
> @@ -8262,9 +8262,7 @@ (define_insn "probe_stack"
>    [(set (match_operand 0 "memory_operand" "=m")
>          (unspec [(const_int 0)] UNSPEC_PROBE_STACK))]
>    "TARGET_32BIT"
> -{
> -  return "str%?\\tr0, %0";
> -}
> +  "str%?\\tr0, %0"
>    [(set_attr "type" "store1")
>     (set_attr "predicable" "yes")]
>  )
> 
> 
> stack-checking.c
> 
> 
> /* { dg-do run { target { *-*-linux* } } } */
> /* { dg-options "-fstack-check" } */
> 
> int main(void)
> {
>   char *p;
>   if (1)
>     {
>       char i[48];
>       p = __builtin_alloca(8);
>       p[0] = 1;
>     }
> 
>   if (1)
>     {
>       char i[48], j[64];
>       j[48] = 0;
>     }
> 
>   return !p[0];
> }
> 

R.


Reply via email to