[PATCH] RISC-V: Fix vec_series expander[PR110985]

Juzhe-Zhong Fri, 11 Aug 2023 01:45:53 -0700

This patch fix bug: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110985


        PR target/110985

gcc/ChangeLog:

        * config/riscv/riscv-v.cc (expand_vec_series): Refactor the expander.

gcc/testsuite/ChangeLog:

        * gcc.target/riscv/rvv/autovec/vls-vlmax/pr110985.c: New test.

---
 gcc/config/riscv/riscv-v.cc                   | 74 +++++++--------
 .../riscv/rvv/autovec/vls-vlmax/pr110985.c    | 90 +++++++++++++++++++
 2 files changed, 129 insertions(+), 35 deletions(-)
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/pr110985.c

diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index a3062c90618..5f9b296c92e 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -1309,6 +1309,7 @@ expand_vec_series (rtx dest, rtx base, rtx step)
   machine_mode mode = GET_MODE (dest);
   poly_int64 nunits_m1 = GET_MODE_NUNITS (mode) - 1;
   poly_int64 value;
+  rtx result = register_operand (dest, mode) ? dest : gen_reg_rtx (mode);
 
   /* VECT_IV = BASE + I * STEP.  */
 
@@ -1317,15 +1318,10 @@ expand_vec_series (rtx dest, rtx base, rtx step)
   rtx op[] = {vid};
   emit_vlmax_insn (code_for_pred_series (mode), RVV_MISC_OP, op);
 
-  /* Step 2: Generate I * STEP.
-     - STEP is 1, we don't emit any instructions.
-     - STEP is power of 2, we use vsll.vi/vsll.vx.
-     - STEP is non-power of 2, we use vmul.vx.  */
   rtx step_adj;
-  if (rtx_equal_p (step, const1_rtx))
-    step_adj = vid;
-  else if (rtx_equal_p (step, constm1_rtx) && poly_int_rtx_p (base, &value)
-          && known_eq (nunits_m1, value))
+  if (rtx_equal_p (step, constm1_rtx)
+      && poly_int_rtx_p (base, &value)
+      && known_eq (nunits_m1, value))
     {
       /* Special case:
           {nunits - 1, nunits - 2, ... , 0}.
@@ -1334,46 +1330,54 @@ expand_vec_series (rtx dest, rtx base, rtx step)
         Code sequence:
           vid.v v
           vrsub nunits - 1, v.  */
-      rtx ops[] = {dest, vid, gen_int_mode (nunits_m1, GET_MODE_INNER (mode))};
+      rtx ops[]
+       = {result, vid, gen_int_mode (nunits_m1, GET_MODE_INNER (mode))};
       insn_code icode = code_for_pred_sub_reverse_scalar (mode);
       emit_vlmax_insn (icode, RVV_BINOP, ops);
-      return;
     }
   else
     {
-      step_adj = gen_reg_rtx (mode);
-      if (CONST_INT_P (step) && pow2p_hwi (INTVAL (step)))
+      /* Step 2: Generate I * STEP.
+        - STEP is 1, we don't emit any instructions.
+        - STEP is power of 2, we use vsll.vi/vsll.vx.
+        - STEP is non-power of 2, we use vmul.vx.  */
+      if (rtx_equal_p (step, const1_rtx))
+       step_adj = vid;
+      else
        {
-         /* Emit logical left shift operation.  */
-         int shift = exact_log2 (INTVAL (step));
-         rtx shift_amount = gen_int_mode (shift, Pmode);
-         insn_code icode = code_for_pred_scalar (ASHIFT, mode);
-         rtx ops[] = {step_adj, vid, shift_amount};
-         emit_vlmax_insn (icode, RVV_BINOP, ops);
+         step_adj = gen_reg_rtx (mode);
+         if (CONST_INT_P (step) && pow2p_hwi (INTVAL (step)))
+           {
+             /* Emit logical left shift operation.  */
+             int shift = exact_log2 (INTVAL (step));
+             rtx shift_amount = gen_int_mode (shift, Pmode);
+             insn_code icode = code_for_pred_scalar (ASHIFT, mode);
+             rtx ops[] = {step_adj, vid, shift_amount};
+             emit_vlmax_insn (icode, RVV_BINOP, ops);
+           }
+         else
+           {
+             insn_code icode = code_for_pred_scalar (MULT, mode);
+             rtx ops[] = {step_adj, vid, step};
+             emit_vlmax_insn (icode, RVV_BINOP, ops);
+           }
        }
+
+      /* Step 3: Generate BASE + I * STEP.
+         - BASE is 0, use result of vid.
+         - BASE is not 0, we use vadd.vx/vadd.vi.  */
+      if (rtx_equal_p (base, const0_rtx))
+       emit_move_insn (result, step_adj);
       else
        {
-         insn_code icode = code_for_pred_scalar (MULT, mode);
-         rtx ops[] = {step_adj, vid, step};
+         insn_code icode = code_for_pred_scalar (PLUS, mode);
+         rtx ops[] = {result, step_adj, base};
          emit_vlmax_insn (icode, RVV_BINOP, ops);
        }
     }
 
-  /* Step 3: Generate BASE + I * STEP.
-     - BASE is 0, use result of vid.
-     - BASE is not 0, we use vadd.vx/vadd.vi.  */
-  if (rtx_equal_p (base, const0_rtx))
-    {
-      emit_move_insn (dest, step_adj);
-    }
-  else
-    {
-      rtx result = gen_reg_rtx (mode);
-      insn_code icode = code_for_pred_scalar (PLUS, mode);
-      rtx ops[] = {result, step_adj, base};
-      emit_vlmax_insn (icode, RVV_BINOP, ops);
-      emit_move_insn (dest, result);
-    }
+  if (result != dest)
+    emit_move_insn (dest, result);
 }
 
 static void
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/pr110985.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/pr110985.c
new file mode 100644
index 00000000000..7710654c1bb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/pr110985.c
@@ -0,0 +1,90 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvl256b -mabi=lp64d -O3 
--param=riscv-autovec-preference=fixed-vlmax -fno-schedule-insns 
-fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include <stdint-gcc.h>
+
+typedef int16_t vnx16i __attribute__ ((vector_size (32)));
+
+/*
+** foo1:
+**   vsetivli\s+zero,\s*16,\s*e16,\s*m1,\s*t[au],\s*m[au]
+**   vid\.v\s+v[0-9]+
+**   vrsub\.vi\s+v[0-9]+,\s*v[0-9]+,\s*15
+**   vs1r\.v\s+v[0-9]+,\s*0\([a-x0-9]+\)
+**   ret
+*/
+void
+foo1 (int16_t *__restrict out)
+{
+  vnx16i v = {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0};
+  *(vnx16i *) out = v;
+}
+
+/*
+** foo2:
+**   vsetivli\s+zero,\s*16,\s*e16,\s*m1,\s*t[au],\s*m[au]
+**   vid\.v\s+v[0-9]+
+**   li\s+[a-x0-9]+,\s*7
+**   vmul\.vx\s+v[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+**   vadd\.vi\s+v[0-9]+,\s*v[0-9]+,\s*3
+**   vs1r\.v\s+v[0-9]+,\s*0\([a-x0-9]+\)
+**   ret
+*/
+void
+foo2 (int16_t *__restrict out)
+{
+  vnx16i v
+    = {3,         3 + 7 * 1,  3 + 7 * 2,  3 + 7 * 3, 3 + 7 * 4,  3 + 7 * 5,
+       3 + 7 * 6,  3 + 7 * 7,  3 + 7 * 8,  3 + 7 * 9, 3 + 7 * 10, 3 + 7 * 11,
+       3 + 7 * 12, 3 + 7 * 13, 3 + 7 * 14, 3 + 7 * 15};
+  *(vnx16i *) out = v;
+}
+
+/*
+** foo3:
+**   vsetivli\s+zero,\s*16,\s*e16,\s*m1,\s*t[au],\s*m[au]
+**   vid\.v\s+v[0-9]+
+**   vs1r\.v\s+v[0-9]+,\s*0\([a-x0-9]+\)
+**   ret
+*/
+void
+foo3 (int16_t *__restrict out)
+{
+  vnx16i v
+    = {0, 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+  *(vnx16i *) out = v;
+}
+
+/*
+** foo4:
+**   vsetivli\s+zero,\s*16,\s*e16,\s*m1,\s*t[au],\s*m[au]
+**   vid\.v\s+v[0-9]+
+**   li\s+[a-x0-9]+,\s*6
+**   vmul\.vx\s+v[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
+**   vs1r\.v\s+v[0-9]+,\s*0\([a-x0-9]+\)
+**   ret
+*/
+void
+foo4 (int16_t *__restrict out)
+{
+  vnx16i v
+    = {0*6, 1*6,2*6,3*6,4*6,5*6,6*6,7*6,8*6,9*6,10*6,11*6,12*6,13*6,14*6,15*6};
+  *(vnx16i *) out = v;
+}
+
+/*
+** foo5:
+**   vsetivli\s+zero,\s*16,\s*e16,\s*m1,\s*t[au],\s*m[au]
+**   vid\.v\s+v[0-9]+
+**   vadd\.vi\s+v[0-9]+,\s*v[0-9]+,\s*-16
+**   vs1r\.v\s+v[0-9]+,\s*0\([a-x0-9]+\)
+**   ret
+*/
+void
+foo5 (int16_t *__restrict out)
+{
+  vnx16i v
+    = {0-16, 
1-16,2-16,3-16,4-16,5-16,6-16,7-16,8-16,9-16,10-16,11-16,12-16,13-16,14-16,15-16};
+  *(vnx16i *) out = v;
+}
-- 
2.36.1

[PATCH] RISC-V: Fix vec_series expander[PR110985]

Reply via email to