Prior to this patch the expander would emit vectors like:
{ 0, 0, 5, 5, 10, 10, ...}
as:
{ 0, 0, 2, 2,  4,  4, ...}

This patch sets the step size to the requested value.

gcc/ChangeLog:

        * config/riscv/riscv-v.cc (expand_const_vector): Fix STEP size in
        expander.

Signed-off-by: Patrick O'Neill <patr...@rivosinc.com>
---
Detected with the existing testsuite after patch 8/9 is applied:
FAIL: gcc.dg/torture/vshuf-v16qi.c   -O2  execution test
FAIL: gcc.dg/torture/vshuf-v8hi.c   -O2  execution test
FAIL: gcc.dg/torture/vshuf-v8qi.c   -O2  execution test
---
 gcc/config/riscv/riscv-v.cc | 48 ++++++++++++++++++++++++++++++++-----
 1 file changed, 42 insertions(+), 6 deletions(-)

diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index c89603669e3..a3039a2cb19 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -1312,25 +1312,61 @@ expand_const_vector (rtx target, rtx src)
              /* Generate the variable-length vector following this rule:
                 { a, a, a + step, a + step, a + step * 2, a + step * 2, ...}
                   E.g. { 0, 0, 8, 8, 16, 16, ... } */
-             /* We want to create a pattern where value[ix] = floor (ix /
+
+             /* We want to create a pattern where value[idx] = floor (idx /
                 NPATTERNS). As NPATTERNS is always a power of two we can
-                rewrite this as = ix & -NPATTERNS.  */
+                rewrite this as = idx & -NPATTERNS.  */
              /* Step 2: VID AND -NPATTERNS:
                 { 0&-4, 1&-4, 2&-4, 3 &-4, 4 &-4, 5 &-4, 6 &-4, 7 &-4, ... }
              */
              rtx imm
                = gen_int_mode (-builder.npatterns (), builder.inner_mode ());
-             rtx tmp = gen_reg_rtx (builder.mode ());
-             rtx and_ops[] = {tmp, vid, imm};
+             rtx tmp1 = gen_reg_rtx (builder.mode ());
+             rtx and_ops[] = {tmp1, vid, imm};
              icode = code_for_pred_scalar (AND, builder.mode ());
              emit_vlmax_insn (icode, BINARY_OP, and_ops);
+
+             /* Step 3: Convert to step size 1.  */
+             rtx tmp2 = gen_reg_rtx (builder.mode ());
+             /* log2 (npatterns) to get the shift amount to convert
+                Eg.  { 0, 0, 0, 0, 4, 4, ... }
+                into { 0, 0, 0, 0, 1, 1, ... }.  */
+             HOST_WIDE_INT shift_amt = exact_log2 (builder.npatterns ()) ;
+             rtx shift = gen_int_mode (shift_amt, builder.inner_mode ());
+             rtx shift_ops[] = {tmp2, tmp1, shift};
+             icode = code_for_pred_scalar (ASHIFTRT, builder.mode ());
+             emit_vlmax_insn (icode, BINARY_OP, shift_ops);
+
+             /* Step 4: Multiply to step size n.  */
+             HOST_WIDE_INT step_size =
+               INTVAL (builder.elt (builder.npatterns ()))
+               - INTVAL (builder.elt (0));
+             rtx tmp3 = gen_reg_rtx (builder.mode ());
+             if (pow2p_hwi (step_size))
+               {
+                 /* Power of 2 can be handled with a left shift.  */
+                 HOST_WIDE_INT shift = exact_log2 (step_size);
+                 rtx shift_amount = gen_int_mode (shift, Pmode);
+                 insn_code icode = code_for_pred_scalar (ASHIFT, mode);
+                 rtx ops[] = {tmp3, tmp2, shift_amount};
+                 emit_vlmax_insn (icode, BINARY_OP, ops);
+               }
+             else
+               {
+                 rtx mult_amt = gen_int_mode (step_size, builder.inner_mode 
());
+                 insn_code icode = code_for_pred_scalar (MULT, builder.mode 
());
+                 rtx ops[] = {tmp3, tmp2, mult_amt};
+                 emit_vlmax_insn (icode, BINARY_OP, ops);
+               }
+
+             /* Step 5: Add starting value to all elements.  */
              HOST_WIDE_INT init_val = INTVAL (builder.elt (0));
              if (init_val == 0)
-               emit_move_insn (target, tmp);
+               emit_move_insn (target, tmp3);
              else
                {
                  rtx dup = gen_const_vector_dup (builder.mode (), init_val);
-                 rtx add_ops[] = {target, tmp, dup};
+                 rtx add_ops[] = {target, tmp3, dup};
                  icode = code_for_pred (PLUS, builder.mode ());
                  emit_vlmax_insn (icode, BINARY_OP, add_ops);
                }
--
2.34.1

Reply via email to