Adds basic support to vector stack-clash protection using a loop to do
the probing and stack adjustments.

gcc/ChangeLog:
        * config/riscv/riscv.cc
        (riscv_allocate_and_probe_stack_loop): New function.
        (riscv_v_adjust_scalable_frame): Add stack-clash protection
        support.
        (riscv_allocate_and_probe_stack_space): Move the probe loop
        implementation to riscv_allocate_and_probe_stack_loop.
        * config/riscv/riscv.h: Define RISCV_STACK_CLASH_VECTOR_CFA_REGNUM.

gcc/testsuite/ChangeLog:
        * gcc.target/riscv/stack-check-cfa-3.c: New test.
        * gcc.target/riscv/stack-check-prologue-16.c: New test.
        * gcc.target/riscv/struct_vect_24.c: New test.
---
 gcc/config/riscv/riscv.cc                     | 99 +++++++++++++++----
 gcc/config/riscv/riscv.h                      |  2 +
 .../gcc.target/riscv/stack-check-cfa-3.c      | 13 +++
 .../riscv/stack-check-prologue-16.c           | 30 ++++++
 .../gcc.target/riscv/struct_vect_24.c         | 47 +++++++++
 5 files changed, 170 insertions(+), 21 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/stack-check-cfa-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/stack-check-prologue-16.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/struct_vect_24.c

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 292d190f319..69c0e07f4c5 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -7897,6 +7897,35 @@ static const code_for_push_pop_t 
code_for_push_pop[ZCMP_MAX_GRP_SLOTS][ZCMP_OP_N
       code_for_gpr_multi_popret_up_to_s11,
       code_for_gpr_multi_popretz_up_to_s11}};
 
+/*  Set a probe loop for stack clash protection.  */
+static void
+riscv_allocate_and_probe_stack_loop (rtx tmp, enum rtx_code code,
+                                    rtx op0, rtx op1, bool vector,
+                                    HOST_WIDE_INT offset)
+{
+  tmp = riscv_force_temporary (tmp, gen_int_mode (offset, Pmode));
+
+  /* Loop.  */
+  rtx label = gen_label_rtx ();
+  emit_label (label);
+
+  /* Allocate and probe stack.  */
+  emit_insn (gen_sub3_insn (stack_pointer_rtx, stack_pointer_rtx, tmp));
+  emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
+                   STACK_CLASH_CALLER_GUARD));
+  emit_insn (gen_blockage ());
+
+  /* Adjust the remaining vector length.  */
+  if (vector)
+    emit_insn (gen_sub3_insn (op0, op0, tmp));
+
+  /* Branch if there's still more bytes to probe.  */
+  riscv_expand_conditional_branch (label, code, op0, op1);
+  JUMP_LABEL (get_last_insn ()) = label;
+
+  emit_insn (gen_blockage ());
+}
+
 /* Adjust scalable frame of vector for prologue && epilogue. */
 
 static void
@@ -7909,6 +7938,49 @@ riscv_v_adjust_scalable_frame (rtx target, poly_int64 
offset, bool epilogue)
   riscv_legitimize_poly_move (Pmode, adjust_size, tmp,
                              gen_int_mode (offset, Pmode));
 
+  /* If doing stack clash protection then we use a loop to allocate and probe
+     the stack.  */
+  if (flag_stack_clash_protection && !epilogue)
+    {
+      HOST_WIDE_INT min_probe_threshold
+       = (1 << param_stack_clash_protection_guard_size) - 
STACK_CLASH_CALLER_GUARD;
+
+      if (!frame_pointer_needed)
+       {
+         /* This is done to provide unwinding information for the stack
+            adjustments we're about to do, however to prevent the optimizers
+            from removing the S0 move and leaving the CFA note (which would be
+            very wrong) we tie the old and new stack pointer together.
+            The tie will expand to nothing but the optimizers will not touch
+            the instruction.  */
+         insn = get_last_insn ();
+         rtx stack_ptr_copy = gen_rtx_REG (Pmode, 
RISCV_STACK_CLASH_VECTOR_CFA_REGNUM);
+         emit_move_insn (stack_ptr_copy, stack_pointer_rtx);
+         riscv_emit_stack_tie (stack_ptr_copy);
+
+         /* We want the CFA independent of the stack pointer for the
+            duration of the loop.  */
+         add_reg_note (insn, REG_CFA_DEF_CFA, stack_ptr_copy);
+         RTX_FRAME_RELATED_P (insn) = 1;
+       }
+
+      riscv_allocate_and_probe_stack_loop (tmp, GE, adjust_size, tmp, true,
+                                          min_probe_threshold);
+
+      /* Allocate the residual.  */
+      insn = emit_insn (gen_sub3_insn (target, target, adjust_size));
+
+      /* Now reset the CFA register if needed.  */
+      if (!frame_pointer_needed)
+       {
+         add_reg_note (insn, REG_CFA_DEF_CFA,
+                       plus_constant (Pmode, stack_pointer_rtx, -offset));
+         RTX_FRAME_RELATED_P (insn) = 1;
+       }
+
+      return;
+    }
+
   if (epilogue)
     insn = gen_add3_insn (target, target, adjust_size);
   else
@@ -8056,8 +8128,9 @@ riscv_allocate_and_probe_stack_space (rtx temp1, 
HOST_WIDE_INT size)
   else
     {
       /* Compute the ending address.  */
-      temp1 = riscv_force_temporary (temp1, gen_int_mode (rounded_size, 
Pmode));
-      insn = emit_insn (gen_sub3_insn (temp1, stack_pointer_rtx, temp1));
+      rtx temp2 = gen_rtx_REG (Pmode, RISCV_PROLOGUE_TEMP2_REGNUM);
+      temp2 = riscv_force_temporary (temp2, gen_int_mode (rounded_size, 
Pmode));
+      insn = emit_insn (gen_sub3_insn (temp2, stack_pointer_rtx, temp2));
 
       if (!frame_pointer_needed)
        {
@@ -8068,25 +8141,9 @@ riscv_allocate_and_probe_stack_space (rtx temp1, 
HOST_WIDE_INT size)
          RTX_FRAME_RELATED_P (insn) = 1;
        }
 
-      /* Allocate and probe the stack.  */
-
-      rtx temp2 = gen_rtx_REG (Pmode, RISCV_PROLOGUE_TEMP2_REGNUM);
-      temp2 = riscv_force_temporary (temp2, gen_int_mode (guard_size, Pmode));
-
-      /* Loop.  */
-      rtx label = gen_label_rtx ();
-      emit_label (label);
-
-      emit_insn (gen_sub3_insn (stack_pointer_rtx, stack_pointer_rtx, temp2));
-      emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
-                          guard_used_by_caller));
-      emit_insn (gen_blockage ());
-
-      /* Check if the stack pointer is at the ending address.  */
-      riscv_expand_conditional_branch (label, NE, stack_pointer_rtx, temp1);
-      JUMP_LABEL (get_last_insn ()) = label;
-
-      emit_insn (gen_blockage ());
+      /* This allocates and probes the stack.  */
+      riscv_allocate_and_probe_stack_loop (temp1, NE, stack_pointer_rtx, temp2,
+                                          false, guard_size);
 
       /* Now reset the CFA register if needed.  */
       if (!frame_pointer_needed)
diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h
index 9670c7df8f7..0432beb81e0 100644
--- a/gcc/config/riscv/riscv.h
+++ b/gcc/config/riscv/riscv.h
@@ -429,6 +429,8 @@ ASM_MISA_SPEC
 #define RISCV_PROLOGUE_TEMP2_REGNUM (GP_TEMP_FIRST + 1)
 #define RISCV_PROLOGUE_TEMP2(MODE) gen_rtx_REG (MODE, 
RISCV_PROLOGUE_TEMP2_REGNUM)
 
+#define RISCV_STACK_CLASH_VECTOR_CFA_REGNUM (GP_TEMP_FIRST + 4)
+
 #define RISCV_CALL_ADDRESS_TEMP_REGNUM (GP_TEMP_FIRST + 1)
 #define RISCV_CALL_ADDRESS_TEMP(MODE) \
   gen_rtx_REG (MODE, RISCV_CALL_ADDRESS_TEMP_REGNUM)
diff --git a/gcc/testsuite/gcc.target/riscv/stack-check-cfa-3.c 
b/gcc/testsuite/gcc.target/riscv/stack-check-cfa-3.c
new file mode 100644
index 00000000000..cc0d54ed7c4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/stack-check-cfa-3.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -fstack-clash-protection 
-funwind-tables -fno-stack-protector" } */
+/* { dg-require-effective-target supports_stack_clash_protection } */
+
+#include "stack-check-prologue-16.c"
+
+/* Checks that the CFA notes are correct for every sp adjustment, but we also
+   need to make sure we can unwind correctly before the frame is set up.  So
+   check that we're emitting s0 with a copy of sp an setting the CFA there.  */
+
+/* { dg-final { scan-assembler-times {mv\ts1,sp} 1 } } */
+/* { dg-final { scan-assembler-times {\.cfi_def_cfa [0-9]+, 0} 1 } } */
+/* { dg-final { scan-assembler-times {\.cfi_escape 
0xf,0xa,0x72,0,0x92,0xa2,0x38,0,0x9,0xec,0x1e,0x22} 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/stack-check-prologue-16.c 
b/gcc/testsuite/gcc.target/riscv/stack-check-prologue-16.c
new file mode 100644
index 00000000000..c74dce04b23
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/stack-check-prologue-16.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target supports_stack_clash_protection } */
+/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -fstack-clash-protection" } */
+
+/* Invoke X (P##n) for n in [0, 7].  */
+#define REPEAT8(X, P) \
+  X (P##0) X (P##1) X (P##2) X (P##3) X (P##4) X (P##5) X (P##6) X (P##7)
+
+/* Invoke X (n) for all octal n in [0, 39].  */
+#define REPEAT40(X) \
+  REPEAT8 (X, 0) REPEAT8 (X, 1)  REPEAT8 (X, 2) REPEAT8 (X, 3) REPEAT8 (X, 4)
+
+/* Expect vector work to be done, with spilling of vector registers.  */
+void
+f2 (int x[40][100], int *y)
+{
+  /* Try to force some spilling.  */
+#define DECLARE(N) int y##N = y[N];
+  REPEAT40 (DECLARE);
+#pragma omp simd
+  for (int i = 0; i < 100; ++i)
+    {
+#define INC(N) x[N][i] += y##N;
+      REPEAT40 (INC);
+    }
+}
+
+/* Vector spill, requires probing as vector size is unknown at compile time.  
*/
+
+/* { dg-final { scan-assembler-times {sd\tzero,1024\(sp\)} 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/struct_vect_24.c 
b/gcc/testsuite/gcc.target/riscv/struct_vect_24.c
new file mode 100644
index 00000000000..7c0852f1a55
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/struct_vect_24.c
@@ -0,0 +1,47 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target supports_stack_clash_protection } */
+/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -fstack-clash-protection 
-fno-stack-protector" } */
+/* { dg-skip-if "" { *-*-* } {"-O0" "-O1" "-O2" "-Og" "-Os" "-Oz" 
"-funroll-loops"} } */
+
+#include <stdint.h>
+
+#define N 50
+#define S 2 * 4 * 1024
+
+/* Invoke X (P##n) for n in [0, 9].  */
+#define REPEAT8(X, P) \
+  X (P##0) X (P##1) X (P##2) X (P##3) X (P##4) X (P##5) X (P##6) X (P##7) \
+  X (P##8)  X (P##9)
+
+/* Invoke X (n) for all n in [0, 49].  */
+#define REPEAT50(X) \
+  REPEAT8 (X, ) REPEAT8 (X, 1)  REPEAT8 (X, 2) REPEAT8 (X, 3) REPEAT8 (X, 4)
+
+  /* Try to force some spilling.  */
+#define DECLARE(N) int src##N = src[N * 4];
+#define INC(N) dest[i] += src##N;
+
+#define TEST_LOOP(NAME, TYPE)                          \
+  void __attribute__ ((noinline))      \
+  NAME (TYPE *restrict dest, TYPE *restrict src)       \
+  {                                                    \
+    REPEAT50 (DECLARE);                                        \
+    volatile char foo[S];                              \
+    foo[S-1]=1;                                                \
+    for (int i = 0; i < N; i++)                                \
+      {                                                        \
+       REPEAT50 (INC);                                 \
+      }                                                        \
+  }
+
+#define TEST(NAME) \
+  TEST_LOOP (NAME##_i32, int32_t) \
+  TEST_LOOP (NAME##_i64, int64_t)
+
+TEST (test)
+
+/* Check the vectorized loop for stack clash probing.  */
+
+/* { dg-final { scan-assembler-times {sd\tzero,1024\(sp\)} 6 } } */
+/* { dg-final { scan-assembler-times {bge\tt1,t0,.[^\\r\\n]*} 2 } } */
+/* { dg-final { scan-assembler-times {sub\s+t1,t1,t0} 2 } } */
-- 
2.42.0

Reply via email to