From: Pan Li <pan2...@intel.com> This patch would like to implment the MASK_LEN_STRIDED_LOAD{STORE} in the RISC-V backend by leveraging the vector strided load/store insn.
For example: void foo (int * __restrict a, int * __restrict b, int stride, int n) { for (int i = 0; i < n; i++) a[i*stride] = b[i*stride] + 100; } Before this patch: 38 │ vsetvli a5,a3,e32,m1,ta,ma 39 │ vluxei64.v v1,(a1),v4 40 │ mul a4,a2,a5 41 │ sub a3,a3,a5 42 │ vadd.vv v1,v1,v2 43 │ vsuxei64.v v1,(a0),v4 44 │ add a1,a1,a4 45 │ add a0,a0,a4 After this patch: 33 │ vsetvli a5,a3,e32,m1,ta,ma 34 │ vlse32.v v1,0(a1),a2 35 │ mul a4,a2,a5 36 │ sub a3,a3,a5 37 │ vadd.vv v1,v1,v2 38 │ vsse32.v v1,0(a0),a2 39 │ add a1,a1,a4 40 │ add a0,a0,a4 The below test suites are passed for this patch: * The riscv fully regression test. gcc/ChangeLog: * config/riscv/autovec.md (mask_len_strided_load_<mode>): Add new pattern for MASK_LEN_STRIDED_LOAD. (mask_len_strided_store_<mode>): Ditto but for store. * config/riscv/riscv-protos.h (expand_strided_load): Add new func decl to expand strided load. (expand_strided_store): Ditto but for store. * config/riscv/riscv-v.cc (expand_strided_load): Add new func impl to expand strided load. (expand_strided_store): Ditto but for store. Signed-off-by: Pan Li <pan2...@intel.com> Co-Authored-By: Juzhe-Zhong <juzhe.zh...@rivai.ai> --- gcc/config/riscv/autovec.md | 29 ++++++++++++++++++ gcc/config/riscv/riscv-protos.h | 2 ++ gcc/config/riscv/riscv-v.cc | 52 +++++++++++++++++++++++++++++++++ 3 files changed, 83 insertions(+) diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md index a34f63c9651..85a915bd65f 100644 --- a/gcc/config/riscv/autovec.md +++ b/gcc/config/riscv/autovec.md @@ -2855,3 +2855,32 @@ (define_expand "v<bitmanip_optab><mode>3" DONE; } ) + +;; ========================================================================= +;; == Strided Load/Store +;; ========================================================================= +(define_expand "mask_len_strided_load_<mode>" + [(match_operand:V 0 "register_operand") + (match_operand 1 "pmode_reg_or_0_operand") + (match_operand 2 "pmode_reg_or_0_operand") + (match_operand:<VM> 3 "vector_mask_operand") + (match_operand 4 "autovec_length_operand") + (match_operand 5 "const_0_operand")] + "TARGET_VECTOR" + { + riscv_vector::expand_strided_load (<MODE>mode, operands); + DONE; + }) + +(define_expand "mask_len_strided_store_<mode>" + [(match_operand 0 "pmode_reg_or_0_operand") + (match_operand 1 "pmode_reg_or_0_operand") + (match_operand:V 2 "register_operand") + (match_operand:<VM> 3 "vector_mask_operand") + (match_operand 4 "autovec_length_operand") + (match_operand 5 "const_0_operand")] + "TARGET_VECTOR" + { + riscv_vector::expand_strided_store(<MODE>mode, operands); + DONE; + }) diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h index d690162bb0c..47c9494ff2b 100644 --- a/gcc/config/riscv/riscv-protos.h +++ b/gcc/config/riscv/riscv-protos.h @@ -696,6 +696,8 @@ bool expand_strcmp (rtx, rtx, rtx, rtx, unsigned HOST_WIDE_INT, bool); void emit_vec_extract (rtx, rtx, rtx); bool expand_vec_setmem (rtx, rtx, rtx); bool expand_vec_cmpmem (rtx, rtx, rtx, rtx); +void expand_strided_load (machine_mode, rtx *); +void expand_strided_store (machine_mode, rtx *); /* Rounding mode bitfield for fixed point VXRM. */ enum fixed_point_rounding_mode diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index 630fbd80e94..ae028e8928a 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -3833,6 +3833,58 @@ expand_load_store (rtx *ops, bool is_load) } } +/* Expand MASK_LEN_STRIDED_LOAD. */ +void +expand_strided_load (machine_mode mode, rtx *ops) +{ + rtx v_reg = ops[0]; + rtx base = ops[1]; + rtx stride = ops[2]; + rtx mask = ops[3]; + rtx len = ops[4]; + poly_int64 len_val; + + insn_code icode = code_for_pred_strided_load (mode); + rtx emit_ops[] = {v_reg, mask, gen_rtx_MEM (mode, base), stride}; + + if (poly_int_rtx_p (len, &len_val) + && known_eq (len_val, GET_MODE_NUNITS (mode))) + emit_vlmax_insn (icode, BINARY_OP_TAMA, emit_ops); + else + { + len = satisfies_constraint_K (len) ? len : force_reg (Pmode, len); + emit_nonvlmax_insn (icode, BINARY_OP_TAMA, emit_ops, len); + } +} + +/* Expand MASK_LEN_STRIDED_STORE. */ +void +expand_strided_store (machine_mode mode, rtx *ops) +{ + rtx v_reg = ops[2]; + rtx base = ops[0]; + rtx stride = ops[1]; + rtx mask = ops[3]; + rtx len = ops[4]; + poly_int64 len_val; + rtx vl_type; + + if (poly_int_rtx_p (len, &len_val) + && known_eq (len_val, GET_MODE_NUNITS (mode))) + { + len = gen_reg_rtx (Pmode); + emit_vlmax_vsetvl (mode, len); + vl_type = get_avl_type_rtx (VLMAX); + } + else + { + len = satisfies_constraint_K (len) ? len : force_reg (Pmode, len); + vl_type = get_avl_type_rtx (NONVLMAX); + } + + emit_insn (gen_pred_strided_store (mode, gen_rtx_MEM (mode, base), + mask, stride, v_reg, len, vl_type)); +} /* Return true if the operation is the floating-point operation need FRM. */ static bool -- 2.43.0