From: Pan Li <pan2...@intel.com>

This patch would like to implment the MASK_LEN_STRIDED_LOAD{STORE} in
the RISC-V backend by leveraging the vector strided load/store insn.

For example:
void foo (int * __restrict a, int * __restrict b, int stride, int n)
{
    for (int i = 0; i < n; i++)
      a[i*stride] = b[i*stride] + 100;
}

Before this patch:
  38   │     vsetvli a5,a3,e32,m1,ta,ma
  39   │     vluxei64.v  v1,(a1),v4
  40   │     mul a4,a2,a5
  41   │     sub a3,a3,a5
  42   │     vadd.vv v1,v1,v2
  43   │     vsuxei64.v  v1,(a0),v4
  44   │     add a1,a1,a4
  45   │     add a0,a0,a4

After this patch:
  33   │     vsetvli a5,a3,e32,m1,ta,ma
  34   │     vlse32.v    v1,0(a1),a2
  35   │     mul a4,a2,a5
  36   │     sub a3,a3,a5
  37   │     vadd.vv v1,v1,v2
  38   │     vsse32.v    v1,0(a0),a2
  39   │     add a1,a1,a4
  40   │     add a0,a0,a4

The below test suites are passed for this patch:
* The riscv fully regression test.

gcc/ChangeLog:

        * config/riscv/autovec.md (mask_len_strided_load_<mode>): Add
        new pattern for MASK_LEN_STRIDED_LOAD.
        (mask_len_strided_store_<mode>): Ditto but for store.
        * config/riscv/riscv-protos.h (expand_strided_load): Add new
        func decl to expand strided load.
        (expand_strided_store): Ditto but for store.
        * config/riscv/riscv-v.cc (expand_strided_load): Add new
        func impl to expand strided load.
        (expand_strided_store): Ditto but for store.

Signed-off-by: Pan Li <pan2...@intel.com>
Co-Authored-By: Juzhe-Zhong <juzhe.zh...@rivai.ai>
---
 gcc/config/riscv/autovec.md     | 29 ++++++++++++++++++
 gcc/config/riscv/riscv-protos.h |  2 ++
 gcc/config/riscv/riscv-v.cc     | 52 +++++++++++++++++++++++++++++++++
 3 files changed, 83 insertions(+)

diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index a34f63c9651..85a915bd65f 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -2855,3 +2855,32 @@ (define_expand "v<bitmanip_optab><mode>3"
     DONE;
   }
 )
+
+;; =========================================================================
+;; == Strided Load/Store
+;; =========================================================================
+(define_expand "mask_len_strided_load_<mode>"
+  [(match_operand:V     0 "register_operand")
+   (match_operand       1 "pmode_reg_or_0_operand")
+   (match_operand       2 "pmode_reg_or_0_operand")
+   (match_operand:<VM>  3 "vector_mask_operand")
+   (match_operand       4 "autovec_length_operand")
+   (match_operand       5 "const_0_operand")]
+  "TARGET_VECTOR"
+  {
+    riscv_vector::expand_strided_load (<MODE>mode, operands);
+    DONE;
+  })
+
+(define_expand "mask_len_strided_store_<mode>"
+  [(match_operand       0 "pmode_reg_or_0_operand")
+   (match_operand       1 "pmode_reg_or_0_operand")
+   (match_operand:V     2 "register_operand")
+   (match_operand:<VM>  3 "vector_mask_operand")
+   (match_operand       4 "autovec_length_operand")
+   (match_operand       5 "const_0_operand")]
+  "TARGET_VECTOR"
+  {
+    riscv_vector::expand_strided_store(<MODE>mode, operands);
+    DONE;
+  })
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index d690162bb0c..47c9494ff2b 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -696,6 +696,8 @@ bool expand_strcmp (rtx, rtx, rtx, rtx, unsigned 
HOST_WIDE_INT, bool);
 void emit_vec_extract (rtx, rtx, rtx);
 bool expand_vec_setmem (rtx, rtx, rtx);
 bool expand_vec_cmpmem (rtx, rtx, rtx, rtx);
+void expand_strided_load (machine_mode, rtx *);
+void expand_strided_store (machine_mode, rtx *);
 
 /* Rounding mode bitfield for fixed point VXRM.  */
 enum fixed_point_rounding_mode
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 630fbd80e94..ae028e8928a 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -3833,6 +3833,58 @@ expand_load_store (rtx *ops, bool is_load)
     }
 }
 
+/* Expand MASK_LEN_STRIDED_LOAD.  */
+void
+expand_strided_load (machine_mode mode, rtx *ops)
+{
+  rtx v_reg = ops[0];
+  rtx base = ops[1];
+  rtx stride = ops[2];
+  rtx mask = ops[3];
+  rtx len = ops[4];
+  poly_int64 len_val;
+
+  insn_code icode = code_for_pred_strided_load (mode);
+  rtx emit_ops[] = {v_reg, mask, gen_rtx_MEM (mode, base), stride};
+
+  if (poly_int_rtx_p (len, &len_val)
+      && known_eq (len_val, GET_MODE_NUNITS (mode)))
+    emit_vlmax_insn (icode, BINARY_OP_TAMA, emit_ops);
+  else
+    {
+      len = satisfies_constraint_K (len) ? len : force_reg (Pmode, len);
+      emit_nonvlmax_insn (icode, BINARY_OP_TAMA, emit_ops, len);
+    }
+}
+
+/* Expand MASK_LEN_STRIDED_STORE.  */
+void
+expand_strided_store (machine_mode mode, rtx *ops)
+{
+  rtx v_reg = ops[2];
+  rtx base = ops[0];
+  rtx stride = ops[1];
+  rtx mask = ops[3];
+  rtx len = ops[4];
+  poly_int64 len_val;
+  rtx vl_type;
+
+  if (poly_int_rtx_p (len, &len_val)
+      && known_eq (len_val, GET_MODE_NUNITS (mode)))
+    {
+      len = gen_reg_rtx (Pmode);
+      emit_vlmax_vsetvl (mode, len);
+      vl_type = get_avl_type_rtx (VLMAX);
+    }
+  else
+    {
+      len = satisfies_constraint_K (len) ? len : force_reg (Pmode, len);
+      vl_type = get_avl_type_rtx (NONVLMAX);
+    }
+
+  emit_insn (gen_pred_strided_store (mode, gen_rtx_MEM (mode, base),
+                                    mask, stride, v_reg, len, vl_type));
+}
 
 /* Return true if the operation is the floating-point operation need FRM.  */
 static bool
-- 
2.43.0

Reply via email to