From: Pan Li <pan2...@intel.com> This patch would like to combine the vec_duplicate + vmadd.vv to the vmadd.vx. From example as below code. The related pattern will depend on the cost of vec_duplicate from GR2VR. Then the late-combine will take action if the cost of GR2VR is zero, and reject the combination if the GR2VR cost is greater than zero.
Assume we have example code like below, GR2VR cost is 0. Before this patch: 11 │ beq a3,zero,.L8 12 │ vsetvli a5,zero,e32,m1,ta,ma 13 │ vmv.v.x v2,a2 ... 16 │ .L3: 17 │ vsetvli a5,a3,e32,m1,ta,ma ... 22 │ vmadd.vv v1,v2,v3 ... 25 │ bne a3,zero,.L3 After this patch: 11 │ beq a3,zero,.L8 ... 14 │ .L3: 15 │ vsetvli a5,a3,e32,m1,ta,ma ... 20 │ vmadd.vx v1,a2,v3 ... 23 │ bne a3,zero,.L3 gcc/ChangeLog: * config/riscv/autovec-opt.md (*vmacc_vx_<mode>): Rename to handle both the macc and madd. (*mul_then_plus_vx_<mode>): Add madd pattern. * config/riscv/vector.md (@pred_mul_plus_vx_<mode>): Rename to handle both the macc and madd. (*pred_macc_<mode>_scalar_undef): Remove. (*pred_nmsac_<mode>_scalar_undef): Remove. (*pred_mul_then_plus_vx<mode>_undef): Add new pattern to handle both the vmacc and vmadd. (@pred_mul_then_plus_vx<mode>): Ditto. Signed-off-by: Pan Li <pan2...@intel.com> --- gcc/config/riscv/autovec-opt.md | 8 +- gcc/config/riscv/vector.md | 172 ++++++++++++++++---------------- 2 files changed, 92 insertions(+), 88 deletions(-) diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md index 9695fdcb5c9..3caabf0ff4d 100644 --- a/gcc/config/riscv/autovec-opt.md +++ b/gcc/config/riscv/autovec-opt.md @@ -1824,7 +1824,7 @@ (define_insn_and_split "*merge_vx_<mode>" } [(set_attr "type" "vimerge")]) -(define_insn_and_split "*vmacc_vx_<mode>" +(define_insn_and_split "*mul_then_plus_vx_<mode>" [(set (match_operand:V_VLSI 0 "register_operand") (plus:V_VLSI (mult:V_VLSI @@ -1837,9 +1837,9 @@ (define_insn_and_split "*vmacc_vx_<mode>" "&& 1" [(const_int 0)] { - insn_code icode = code_for_pred_mul_plus_vx (<MODE>mode); - rtx ops[] = {operands[0], operands[1], operands[2], operands[3], - RVV_VUNDEF(<MODE>mode)}; + insn_code icode = code_for_pred_mul_then_plus_vx (<MODE>mode); + rtx v_undef = RVV_VUNDEF(<MODE>mode); + rtx ops[] = {operands[0], operands[1], operands[2], operands[3], v_undef}; riscv_vector::emit_vlmax_insn (icode, riscv_vector::TERNARY_OP, ops); DONE; diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md index 2b35d66b611..d0de08d07c0 100644 --- a/gcc/config/riscv/vector.md +++ b/gcc/config/riscv/vector.md @@ -5493,52 +5493,6 @@ (define_expand "@pred_mul_plus<mode>_scalar" "TARGET_VECTOR" {}) -(define_expand "@pred_mul_plus_vx_<mode>" - [(set (match_operand:V_VLSI_QHS 0 "register_operand") - (if_then_else:V_VLSI_QHS - (unspec:<VM> - [(match_operand:<VM> 1 "vector_mask_operand") - (match_operand 6 "vector_length_operand") - (match_operand 7 "const_int_operand") - (match_operand 8 "const_int_operand") - (match_operand 9 "const_int_operand") - (reg:SI VL_REGNUM) - (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) - (plus:V_VLSI_QHS - (mult:V_VLSI_QHS - (vec_duplicate:V_VLSI_QHS - (match_operand:<VEL> 2 "register_operand")) - (match_operand:V_VLSI_QHS 3 "register_operand")) - (match_operand:V_VLSI_QHS 4 "register_operand")) - (match_operand:V_VLSI_QHS 5 "vector_merge_operand")))] - "TARGET_VECTOR" -{ - riscv_vector::prepare_ternary_operands (operands); -}) - -(define_expand "@pred_mul_plus_vx_<mode>" - [(set (match_operand:V_VLSI_D 0 "register_operand") - (if_then_else:V_VLSI_D - (unspec:<VM> - [(match_operand:<VM> 1 "vector_mask_operand") - (match_operand 6 "vector_length_operand") - (match_operand 7 "const_int_operand") - (match_operand 8 "const_int_operand") - (match_operand 9 "const_int_operand") - (reg:SI VL_REGNUM) - (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) - (plus:V_VLSI_D - (mult:V_VLSI_D - (vec_duplicate:V_VLSI_D - (match_operand:<VEL> 2 "register_operand")) - (match_operand:V_VLSI_D 3 "register_operand")) - (match_operand:V_VLSI_D 4 "register_operand")) - (match_operand:V_VLSI_D 5 "vector_merge_operand")))] - "TARGET_VECTOR && TARGET_64BIT" -{ - riscv_vector::prepare_ternary_operands (operands); -}) - (define_expand "@pred_vnmsac_vx_<mode>" [(set (match_operand:V_VLSI_QHS 0 "register_operand") (if_then_else:V_VLSI_QHS @@ -8934,7 +8888,7 @@ (define_insn "@pred_indexed_<order>store<V32T:mode><RATIO2I:mode>" [(set_attr "type" "vssegt<order>x") (set_attr "mode" "<V32T:MODE>")]) -(define_insn "*pred_macc_<mode>_scalar_undef" +(define_insn "*pred_nmsac_<mode>_scalar_undef" [(set (match_operand:V_VLSI_QHS 0 "register_operand" "=vd, vr") (if_then_else:V_VLSI_QHS (unspec:<VM> @@ -8945,21 +8899,21 @@ (define_insn "*pred_macc_<mode>_scalar_undef" (match_operand 9 "const_int_operand" " i, i") (reg:SI VL_REGNUM) (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) - (plus:V_VLSI_QHS + (minus:V_VLSI_QHS + (match_operand:V_VLSI_QHS 5 "register_operand" " 0, 0") (mult:V_VLSI_QHS (vec_duplicate:V_VLSI_QHS (match_operand:<VEL> 3 "reg_or_0_operand" " rJ, rJ")) - (match_operand:V_VLSI_QHS 4 "register_operand" " vr, vr")) - (match_operand:V_VLSI_QHS 5 "register_operand" " 0, 0")) + (match_operand:V_VLSI_QHS 4 "register_operand" " vr, vr"))) (match_operand:V_VLSI_QHS 2 "vector_undef_operand")))] "TARGET_VECTOR" "@ - vmacc.vx\t%0,%z3,%4%p1 - vmacc.vx\t%0,%z3,%4%p1" + vnmsac.vx\t%0,%z3,%4%p1 + vnmsac.vx\t%0,%z3,%4%p1" [(set_attr "type" "vimuladd") (set_attr "mode" "<MODE>")]) -(define_insn "*pred_macc_<mode>_scalar_undef" +(define_insn "*pred_nmsac_<mode>_scalar_undef" [(set (match_operand:V_VLSI_D 0 "register_operand" "=vd, vr") (if_then_else:V_VLSI_D (unspec:<VM> @@ -8970,70 +8924,120 @@ (define_insn "*pred_macc_<mode>_scalar_undef" (match_operand 9 "const_int_operand" " i, i") (reg:SI VL_REGNUM) (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) - (plus:V_VLSI_D + (minus:V_VLSI_D + (match_operand:V_VLSI_D 5 "register_operand" " 0, 0") (mult:V_VLSI_D (vec_duplicate:V_VLSI_D (match_operand:<VEL> 3 "reg_or_0_operand" " rJ, rJ")) - (match_operand:V_VLSI_D 4 "register_operand" " vr, vr")) - (match_operand:V_VLSI_D 5 "register_operand" " 0, 0")) + (match_operand:V_VLSI_D 4 "register_operand" " vr, vr"))) (match_operand:V_VLSI_D 2 "vector_undef_operand")))] "TARGET_VECTOR && TARGET_64BIT" "@ - vmacc.vx\t%0,%z3,%4%p1 - vmacc.vx\t%0,%z3,%4%p1" + vnmsac.vx\t%0,%z3,%4%p1 + vnmsac.vx\t%0,%z3,%4%p1" [(set_attr "type" "vimuladd") (set_attr "mode" "<MODE>")]) -(define_insn "*pred_nmsac_<mode>_scalar_undef" - [(set (match_operand:V_VLSI_QHS 0 "register_operand" "=vd, vr") +(define_insn "*pred_mul_then_plus_vx<mode>_undef" + [(set (match_operand:V_VLSI_QHS 0 "register_operand" "=vd, vd, vr, vr") (if_then_else:V_VLSI_QHS (unspec:<VM> - [(match_operand:<VM> 1 "vector_mask_operand" " vm, Wc1") - (match_operand 6 "vector_length_operand" "rvl, rvl") - (match_operand 7 "const_int_operand" " i, i") - (match_operand 8 "const_int_operand" " i, i") - (match_operand 9 "const_int_operand" " i, i") + [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm, Wc1, Wc1") + (match_operand 6 "vector_length_operand" "rvl, rvl, rvl, rvl") + (match_operand 7 "const_int_operand" " i, i, i, i") + (match_operand 8 "const_int_operand" " i, i, i, i") + (match_operand 9 "const_int_operand" " i, i, i, i") (reg:SI VL_REGNUM) (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) - (minus:V_VLSI_QHS - (match_operand:V_VLSI_QHS 5 "register_operand" " 0, 0") + (plus:V_VLSI_QHS (mult:V_VLSI_QHS (vec_duplicate:V_VLSI_QHS - (match_operand:<VEL> 3 "reg_or_0_operand" " rJ, rJ")) - (match_operand:V_VLSI_QHS 4 "register_operand" " vr, vr"))) + (match_operand:<VEL> 3 "reg_or_0_operand" " rJ, rJ, rJ, rJ")) + (match_operand:V_VLSI_QHS 4 "register_operand" " 0, vr, 0, vr")) + (match_operand:V_VLSI_QHS 5 "register_operand" " vr, 0, vr, 0")) (match_operand:V_VLSI_QHS 2 "vector_undef_operand")))] "TARGET_VECTOR" "@ - vnmsac.vx\t%0,%z3,%4%p1 - vnmsac.vx\t%0,%z3,%4%p1" + vmadd.vx\t%0,%z3,%5%p1 + vmacc.vx\t%0,%z3,%4%p1 + vmadd.vx\t%0,%z3,%5%p1 + vmacc.vx\t%0,%z3,%4%p1" [(set_attr "type" "vimuladd") (set_attr "mode" "<MODE>")]) -(define_insn "*pred_nmsac_<mode>_scalar_undef" - [(set (match_operand:V_VLSI_D 0 "register_operand" "=vd, vr") +(define_insn "*pred_mul_then_plus_vx<mode>_undef" + [(set (match_operand:V_VLSI_D 0 "register_operand" "=vd, vd, vr, vr") (if_then_else:V_VLSI_D (unspec:<VM> - [(match_operand:<VM> 1 "vector_mask_operand" " vm, Wc1") - (match_operand 6 "vector_length_operand" "rvl, rvl") - (match_operand 7 "const_int_operand" " i, i") - (match_operand 8 "const_int_operand" " i, i") - (match_operand 9 "const_int_operand" " i, i") + [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm, Wc1, Wc1") + (match_operand 6 "vector_length_operand" "rvl, rvl, rvl, rvl") + (match_operand 7 "const_int_operand" " i, i, i, i") + (match_operand 8 "const_int_operand" " i, i, i, i") + (match_operand 9 "const_int_operand" " i, i, i, i") (reg:SI VL_REGNUM) (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) - (minus:V_VLSI_D - (match_operand:V_VLSI_D 5 "register_operand" " 0, 0") + (plus:V_VLSI_D (mult:V_VLSI_D (vec_duplicate:V_VLSI_D - (match_operand:<VEL> 3 "reg_or_0_operand" " rJ, rJ")) - (match_operand:V_VLSI_D 4 "register_operand" " vr, vr"))) + (match_operand:<VEL> 3 "reg_or_0_operand" " rJ, rJ, rJ, rJ")) + (match_operand:V_VLSI_D 4 "register_operand" " 0, vr, 0, vr")) + (match_operand:V_VLSI_D 5 "register_operand" " vr, 0, vr, 0")) (match_operand:V_VLSI_D 2 "vector_undef_operand")))] "TARGET_VECTOR && TARGET_64BIT" "@ - vnmsac.vx\t%0,%z3,%4%p1 - vnmsac.vx\t%0,%z3,%4%p1" + vmadd.vx\t%0,%z3,%5%p1 + vmacc.vx\t%0,%z3,%4%p1 + vmadd.vx\t%0,%z3,%5%p1 + vmacc.vx\t%0,%z3,%4%p1" [(set_attr "type" "vimuladd") (set_attr "mode" "<MODE>")]) +(define_expand "@pred_mul_then_plus_vx<mode>" + [(set (match_operand:V_VLSI_QHS 0 "register_operand") + (if_then_else:V_VLSI_QHS + (unspec:<VM> + [(match_operand:<VM> 1 "vector_mask_operand") + (match_operand 6 "vector_length_operand") + (match_operand 7 "const_int_operand") + (match_operand 8 "const_int_operand") + (match_operand 9 "const_int_operand") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (plus:V_VLSI_QHS + (mult:V_VLSI_QHS + (vec_duplicate:V_VLSI_QHS + (match_operand:<VEL> 2 "reg_or_0_operand")) + (match_operand:V_VLSI_QHS 3 "register_operand")) + (match_operand:V_VLSI_QHS 4 "register_operand")) + (match_operand:V_VLSI_QHS 5 "vector_merge_operand")))] + "TARGET_VECTOR" + { + riscv_vector::prepare_ternary_operands (operands); + }) + +(define_expand "@pred_mul_then_plus_vx<mode>" + [(set (match_operand:V_VLSI_D 0 "register_operand") + (if_then_else:V_VLSI_D + (unspec:<VM> + [(match_operand:<VM> 1 "vector_mask_operand") + (match_operand 6 "vector_length_operand") + (match_operand 7 "const_int_operand") + (match_operand 8 "const_int_operand") + (match_operand 9 "const_int_operand") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (plus:V_VLSI_D + (mult:V_VLSI_D + (vec_duplicate:V_VLSI_D + (match_operand:<VEL> 2 "reg_or_0_operand")) + (match_operand:V_VLSI_D 3 "register_operand")) + (match_operand:V_VLSI_D 4 "register_operand")) + (match_operand:V_VLSI_D 5 "vector_merge_operand")))] + "TARGET_VECTOR && TARGET_64BIT" + { + riscv_vector::prepare_ternary_operands (operands); + }) + (include "autovec.md") (include "autovec-opt.md") (include "sifive-vector.md") -- 2.43.0