From: Pan Li <pan2...@intel.com> This patch would like to combine the vec_duplicate + vadd.vv to the vadd.vx. From example as below code. The related pattern will depend on the cost of vec_duplicate from GR2VR, it will:
* The pattern matching will be inactive if GR2VR cost is zero. * The cost of GR2VR will be added to the total cost of pattern, and the late-combine will decide to perform the replacement or not based on the cost value. Assume we have example code like below, GR2VR cost is 2 by default. #define DEF_VX_BINARY(T, OP) \ void \ test_vx_binary (T * restrict out, T * restrict in, T x, unsigned n) \ { \ for (unsigned i = 0; i < n; i++) \ out[i] = in[i] OP x; \ } DEF_VX_BINARY(int32_t, +) Before this patch: 10 │ test_binary_vx_add: 11 │ beq a3,zero,.L8 12 │ vsetvli a5,zero,e32,m1,ta,ma // eliminated if GR2VR cost non-zero 13 │ vmv.v.x v2,a2 // Ditto. 14 │ slli a3,a3,32 15 │ srli a3,a3,32 16 │ .L3: 17 │ vsetvli a5,a3,e32,m1,ta,ma 18 │ vle32.v v1,0(a1) 19 │ slli a4,a5,2 20 │ sub a3,a3,a5 21 │ add a1,a1,a4 22 │ vadd.vv v1,v2,v1 23 │ vse32.v v1,0(a0) 24 │ add a0,a0,a4 25 │ bne a3,zero,.L3 After this patch: 10 │ test_binary_vx_add: 11 │ beq a3,zero,.L8 12 │ slli a3,a3,32 13 │ srli a3,a3,32 14 │ .L3: 15 │ vsetvli a5,a3,e32,m1,ta,ma 16 │ vle32.v v1,0(a1) 17 │ slli a4,a5,2 18 │ sub a3,a3,a5 19 │ add a1,a1,a4 20 │ vadd.vx v1,v1,a2 21 │ vse32.v v1,0(a0) 22 │ add a0,a0,a4 23 │ bne a3,zero,.L3 The below test suites are passed for this patch. * The rv64gcv fully regression test. gcc/ChangeLog: * config/riscv/autovec-opt.md (*<optab>_vx_<mode>): Add new combine to convert vec_duplicate + vadd.vv to vaddvx on GR2VR cost. * config/riscv/riscv.cc (riscv_rtx_costs): Extract vector cost into a separated func. (riscv_vector_rtx_costs): Add new func to take care of the cost of vector rtx, default to 1 and append GR2VR cost to vec_duplicate rtx. * config/riscv/vector-iterators.md: Add new iterator for vx. Signed-off-by: Pan Li <pan2...@intel.com> --- gcc/config/riscv/autovec-opt.md | 22 ++++++++++++++++++++++ gcc/config/riscv/riscv.cc | 26 ++++++++++++++++++++------ gcc/config/riscv/vector-iterators.md | 4 ++++ 3 files changed, 46 insertions(+), 6 deletions(-) diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md index 0c3b0cc7e05..1bc3985f1a3 100644 --- a/gcc/config/riscv/autovec-opt.md +++ b/gcc/config/riscv/autovec-opt.md @@ -1673,3 +1673,25 @@ (define_insn_and_split "*vandn_<mode>" DONE; } [(set_attr "type" "vandn")]) + +;; ============================================================================= +;; Combine vec_duplicate + op.vv to op.vx +;; Include +;; - vadd.vx +;; ============================================================================= +(define_insn_and_split "*<optab>_vx_<mode>" + [(set (match_operand:V_VLSI 0 "register_operand") + (any_int_binop_no_shift_vx:V_VLSI + (vec_duplicate:V_VLSI + (match_operand:<VEL> 1 "register_operand")) + (match_operand:V_VLSI 2 "<binop_rhs2_predicate>")))] + "TARGET_VECTOR && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] + { + rtx ops[] = {operands[0], operands[2], operands[1]}; + riscv_vector::emit_vlmax_insn (code_for_pred_scalar (<CODE>, <MODE>mode), + riscv_vector::BINARY_OP, ops); + } + [(set_attr "type" "vialu")]) diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index d3656a7a430..31e9b06568a 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -3856,16 +3856,30 @@ riscv_extend_cost (rtx op, bool unsigned_p) #define SINGLE_SHIFT_COST 1 static bool -riscv_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno ATTRIBUTE_UNUSED, - int *total, bool speed) +riscv_vector_rtx_costs (rtx x, machine_mode mode, int *total) { + gcc_assert (riscv_v_ext_mode_p (mode)); + /* TODO: We set RVV instruction cost as 1 by default. Cost Model need to be well analyzed and supported in the future. */ + int cost_val = 1; + enum rtx_code rcode = GET_CODE (x); + + /* Aka (vec_duplicate:RVVM1DI (reg/v:DI 143 [ x ])) */ + if (rcode == VEC_DUPLICATE && SCALAR_INT_MODE_P (GET_MODE (XEXP (x, 0)))) + cost_val += get_vector_costs ()->regmove->GR2VR; + + *total = COSTS_N_INSNS (cost_val); + + return true; +} + +static bool +riscv_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno ATTRIBUTE_UNUSED, + int *total, bool speed) +{ if (riscv_v_ext_mode_p (mode)) - { - *total = COSTS_N_INSNS (1); - return true; - } + return riscv_vector_rtx_costs (x, mode, total); bool float_mode_p = FLOAT_MODE_P (mode); int cost; diff --git a/gcc/config/riscv/vector-iterators.md b/gcc/config/riscv/vector-iterators.md index f8da71b1d65..b5fc833f1d5 100644 --- a/gcc/config/riscv/vector-iterators.md +++ b/gcc/config/riscv/vector-iterators.md @@ -4052,6 +4052,10 @@ (define_code_iterator any_int_binop_no_shift [plus minus and ior xor smax umax smin umin mult div udiv mod umod ]) +(define_code_iterator any_int_binop_no_shift_vx + [plus +]) + (define_code_iterator any_sat_int_binop [ss_plus ss_minus us_plus us_minus]) (define_code_iterator sat_int_plus_binop [ss_plus us_plus]) (define_code_iterator sat_int_minus_binop [ss_minus us_minus]) -- 2.43.0