See [1] thread for original patch which spawned this one.

We are currently seeing the following code where we perform a vsetvl
before a branching instruction against the avl.

        vsetvli a5,a1,e32,m1,tu,ma
        vle32.v v2,0(a0)
        sub     a1,a1,a5 <-- a1 potentially set to 0
        sh2add  a0,a5,a0
        vfmacc.vv       v1,v2,v2
        vsetvli a5,a1,e32,m1,tu,ma <-- incompatible vinfo. update vl to 0
        beq     a1,zero,.L12 <-- check if avl is 0

Since we are branching off of the avl, we don't need to update vl until
after the branch is taken. Search the ready queue for vsetvls scheduled
before branching instructions that branch off of the same regno and
promote the branches to execute first. This can improve performancy by
potentially avoiding setting VL=0 which may be expensive on some uarches.

[1] https://gcc.gnu.org/pipermail/gcc-patches/2025-February/675622.html

        PR/117974

gcc/ChangeLog:

        * config/riscv/riscv.cc (vsetvl_avl_regno): New helper function.
        (insn_increases_zeroness_p): Ditto.
        (riscv_promote_ready): Ditto.
        (riscv_sched_reorder): Implement hook.
        (TARGET_SCHED_REORDER): Define Hook.
        * config/riscv/riscv.opt: New flag.

gcc/testsuite/ChangeLog:

        * gcc.target/riscv/rvv/vsetvl/pr117974.c: New test.

Signed-off-by: Edwin Lu <e...@rivosinc.com>
Co-authored-by: Palmer Dabbelt <pal...@rivosinc.com>
---
 gcc/config/riscv/riscv.cc                     | 103 ++++++++++++++++++
 gcc/config/riscv/riscv.opt                    |   4 +
 .../gcc.target/riscv/rvv/vsetvl/pr117974.c    |  15 +++
 3 files changed, 122 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr117974.c

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 89aa25d5da9..cf0866fa3fb 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -14035,6 +14035,106 @@ bool need_shadow_stack_push_pop_p ()
   return is_zicfiss_p () && riscv_save_return_addr_reg_p ();
 }

+static int
+vsetvl_avl_regno(rtx_insn *insn)
+{
+  if (recog_memoized (insn) < 0)
+    return -1;
+
+  if (get_attr_type (insn) != TYPE_VSETVL
+      && get_attr_type (insn) != TYPE_VSETVL_PRE)
+    return -1;
+
+  extract_insn (insn);
+  /* From vector.md, vsetvl operands are as follows:
+      ;; operands[0]: VL.
+      ;; operands[1]: AVL.
+      ;; operands[2]: SEW
+      ;; operands[3]: LMUL
+      ;; operands[4]: Tail policy 0 or 1 (undisturbed/agnostic)
+      ;; operands[5]: Mask policy 0 or 1 (undisturbed/agnostic)
+     Return regno of avl operand.  */
+  return REGNO (recog_data.operand[1]);
+}
+
+static bool
+insn_increases_zeroness_p(rtx_insn *insn, int regno)
+{
+  /* Check for branching against zero.  */
+  if (JUMP_P (insn))
+    {
+      extract_insn (insn);
+      bool match_reg = false;
+      bool comp_zero = false;
+      for (int i = 0; i < recog_data.n_operands; i++)
+       {
+         if (REG_P (recog_data.operand[i])
+             && REGNO (recog_data.operand[i]) == regno)
+           match_reg = true;
+         if (CONST_INT_P (recog_data.operand[i])
+             && XINT (recog_data.operand[i], 0) == 0
+             && XWINT (recog_data.operand[i], 0) == 0)
+           comp_zero = true;
+       }
+      return match_reg && comp_zero;
+    }
+  return false;
+}
+
+/* Copied from MIPS.  Removes the instruction at index LOWER from ready
+   queue READY and reinserts it in from of the instruction at index
+   HIGHER.  LOWER must be <= HIGHER.  */
+static void
+riscv_promote_ready (rtx_insn **ready, int lower, int higher)
+{
+  rtx_insn *new_head;
+  int i;
+
+  new_head = ready[lower];
+  for (i = lower; i < higher; i++)
+    ready[i] = ready[i + 1];
+  ready[i] = new_head;
+}
+
+/* Attempt to avoid issuing VSETVL-type instructions before a branch that
+   ensures they are non-zero, as setting VL=0 dynamically can be slow.  */
+static int
+riscv_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
+                    rtx_insn **ready, int *nreadyp, int cycle ATTRIBUTE_UNUSED)
+{
+    if (! TARGET_AVOID_VL_EQ_0)
+      return riscv_issue_rate ();
+
+    for (int i = *nreadyp - 1; i >= 0; i--)
+      {
+       /* Find the vsetvl.  */
+       int avl_regno = vsetvl_avl_regno (ready[i]);
+       if (avl_regno == -1 || i == 0)
+         continue;
+       for (int j = i - 1; j >= 0; j--)
+         {
+           /* Exit if another vsetvl is found before finding a branch insn
+              in the ready queue.  */
+           if (recog_memoized (ready[j]) >= 0
+               && get_attr_type (ready[j]) == TYPE_VSETVL
+               && get_attr_type (ready[j]) == TYPE_VSETVL_PRE)
+             break;
+           /* Find branch.  */
+           if (recog_memoized (ready[j]) >= 0
+               && insn_increases_zeroness_p (ready[j], avl_regno))
+             {
+               /* Right now the only zeroness-increasing pattern we recognize
+                  is a branch-not-zero, so there's no sense in looking for any
+                  more zeroness at that point.  */
+               riscv_promote_ready (ready, j, i);
+               break;
+             }
+         }
+      }
+
+    return riscv_issue_rate ();
+}
+
 /* Initialize the GCC target structure.  */
 #undef TARGET_ASM_ALIGNED_HI_OP
 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
@@ -14430,6 +14530,9 @@ bool need_shadow_stack_push_pop_p ()
 #undef TARGET_DOCUMENTATION_NAME
 #define TARGET_DOCUMENTATION_NAME "RISC-V"

+#undef TARGET_SCHED_REORDER
+#define TARGET_SCHED_REORDER riscv_sched_reorder
+
 struct gcc_target targetm = TARGET_INITIALIZER;

 #include "gt-riscv.h"
diff --git a/gcc/config/riscv/riscv.opt b/gcc/config/riscv/riscv.opt
index 7515c8ea13d..c6cab61fdc0 100644
--- a/gcc/config/riscv/riscv.opt
+++ b/gcc/config/riscv/riscv.opt
@@ -681,3 +681,7 @@ Specifies whether the fence.tso instruction should be used.
 mautovec-segment
 Target Integer Var(riscv_mautovec_segment) Init(1)
 Enable (default) or disable generation of vector segment load/store 
instructions.
+
+mavoid-vl0
+Target Var(TARGET_AVOID_VL_EQ_0) Init(1)
+Avoid (default) code that dynamically sets VL=0 where possible.
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr117974.c 
b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr117974.c
new file mode 100644
index 00000000000..275922eb0bf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr117974.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvl256b -mabi=lp64d -mrvv-vector-bits=zvl 
-Ofast" } */
+
+float g(float q[], int N){
+    float dqnorm = 0.0;
+
+    #pragma GCC unroll 4
+
+    for (int i=0; i < N; i++) {
+        dqnorm = dqnorm + q[i] * q[i];
+    }
+    return dqnorm;
+}
+
+/* { dg-final { scan-assembler-times {beq\s+[a-x0-9]+,zero,.L12\s+vsetvli} 3 } 
} */
--
2.43.0

Reply via email to