https://gcc.gnu.org/g:bdc4062a0796788e44d5e6ecd753268a8b453cc7
commit r16-2377-gbdc4062a0796788e44d5e6ecd753268a8b453cc7 Author: Andrew Stubbs <a...@baylibre.com> Date: Thu Jun 12 16:57:23 2025 +0000 amdgcn: add more insn patterns using vec_duplicate These new insns allow more efficient use of scalar inputs to 64-bit vector add and mul. Also, the patch adjusts the existing mul.._dup because it was actually a dup2 (the vec_duplicate is on the second input), and that was inconveniently inconsistent. The patterns are generally useful, but will be used directly by a follow-up patch. gcc/ChangeLog: * config/gcn/gcn-valu.md (add<mode>3_dup): New. (add<mode>3_dup_exec): New. (<su>mul<mode>3_highpart_dup<exec>): New. (mul<mode>3_dup): Move the vec_duplicate to operand 1. (mul<mode>3_dup_exec): New. (vec_series<mode>): Adjust call to gen_mul<mode>3_dup. * config/gcn/gcn.cc (gcn_expand_vector_init): Likewise. Diff: --- gcc/config/gcn/gcn-valu.md | 181 ++++++++++++++++++++++++++++++++++++++++++++- gcc/config/gcn/gcn.cc | 4 +- 2 files changed, 179 insertions(+), 6 deletions(-) diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md index 7c4dde1cfce4..dfa6b1523bd7 100644 --- a/gcc/config/gcn/gcn-valu.md +++ b/gcc/config/gcn/gcn-valu.md @@ -1645,6 +1645,39 @@ [(set_attr "type" "vmult") (set_attr "length" "8")]) +(define_insn_and_split "add<mode>3_dup" + [(set (match_operand:V_DI 0 "register_operand" "= v") + (plus:V_DI + (vec_duplicate:V_DI + (match_operand:DI 1 "register_operand" "SvB")) + (match_operand:V_DI 2 "gcn_alu_operand" "vDb"))) + (clobber (reg:DI VCC_REG)) + (clobber (match_scratch:<VnSI> 3 "=&v"))] + "" + "#" + "gcn_can_split_p (<MODE>mode, operands[0]) + && gcn_can_split_p (<MODE>mode, operands[1]) + && gcn_can_split_p (<MODE>mode, operands[2])" + [(const_int 0)] + { + rtx vcc = gen_rtx_REG (DImode, VCC_REG); + emit_insn (gen_add<vnsi>3_vcc_dup + (gcn_operand_part (<MODE>mode, operands[0], 0), + gcn_operand_part (DImode, operands[1], 0), + gcn_operand_part (<MODE>mode, operands[2], 0), + vcc)); + emit_insn (gen_vec_duplicate<vnsi> (operands[3], + gcn_operand_part (DImode, operands[1], 1))); + emit_insn (gen_addc<vnsi>3 + (gcn_operand_part (<MODE>mode, operands[0], 1), + operands[3], + gcn_operand_part (<MODE>mode, operands[2], 1), + vcc, vcc)); + DONE; + } + [(set_attr "type" "vmult") + (set_attr "length" "8")]) + (define_insn_and_split "add<mode>3_exec" [(set (match_operand:V_DI 0 "register_operand" "= v") (vec_merge:V_DI @@ -1682,6 +1715,49 @@ [(set_attr "type" "vmult") (set_attr "length" "8")]) +(define_insn_and_split "add<mode>3_dup_exec" + [(set (match_operand:V_DI 0 "register_operand" "= v") + (vec_merge:V_DI + (plus:V_DI + (vec_duplicate:V_DI + (match_operand:DI 1 "register_operand" "SvB")) + (match_operand:V_DI 2 "gcn_alu_operand" "vDb")) + (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0") + (match_operand:DI 4 "gcn_exec_reg_operand" " e"))) + (clobber (reg:DI VCC_REG)) + (clobber (match_scratch:<VnSI> 5 "=&v"))] + "" + "#" + "gcn_can_split_p (<MODE>mode, operands[0]) + && gcn_can_split_p (<MODE>mode, operands[1]) + && gcn_can_split_p (<MODE>mode, operands[2]) + && gcn_can_split_p (<MODE>mode, operands[4])" + [(const_int 0)] + { + rtx vcc = gen_rtx_REG (DImode, VCC_REG); + emit_insn (gen_add<vnsi>3_vcc_dup_exec + (gcn_operand_part (<MODE>mode, operands[0], 0), + gcn_operand_part (DImode, operands[1], 0), + gcn_operand_part (<MODE>mode, operands[2], 0), + vcc, + gcn_operand_part (<MODE>mode, operands[3], 0), + operands[4])); + emit_insn (gen_vec_duplicate<vnsi>_exec (operands[5], + gcn_operand_part (DImode, operands[1], 1), + gcn_gen_undef (<VnSI>mode), + operands[4])); + emit_insn (gen_addc<vnsi>3_exec + (gcn_operand_part (<MODE>mode, operands[0], 1), + operands[5], + gcn_operand_part (<MODE>mode, operands[2], 1), + vcc, vcc, + gcn_operand_part (<MODE>mode, operands[3], 1), + operands[4])); + DONE; + } + [(set_attr "type" "vmult") + (set_attr "length" "8")]) + (define_insn_and_split "sub<mode>3" [(set (match_operand:V_DI 0 "register_operand" "= v, v") (minus:V_DI @@ -2187,6 +2263,22 @@ [(set_attr "type" "vop3a") (set_attr "length" "8")]) +(define_insn "<su>mul<mode>3_highpart_dup<exec>" + [(set (match_operand:V_SI 0 "register_operand" "= v") + (truncate:V_SI + (lshiftrt:<VnDI> + (mult:<VnDI> + (any_extend:<VnDI> + (vec_duplicate:V_SI + (match_operand:SI 1 "gcn_alu_operand" "SvA"))) + (any_extend:<VnDI> + (match_operand:V_SI 2 "gcn_alu_operand" " vA"))) + (const_int 32))))] + "" + "v_mul_hi<sgnsuffix>0\t%0, %2, %1" + [(set_attr "type" "vop3a") + (set_attr "length" "8")]) + (define_insn "mul<mode>3<exec>" [(set (match_operand:V_INT_1REG 0 "register_operand" "= v") (mult:V_INT_1REG @@ -2198,11 +2290,11 @@ (set_attr "length" "8")]) (define_insn "mul<mode>3_dup<exec>" - [(set (match_operand:V_INT_1REG 0 "register_operand" "= v") + [(set (match_operand:V_INT_1REG 0 "register_operand" "= v") (mult:V_INT_1REG - (match_operand:V_INT_1REG 1 "gcn_alu_operand" "%vSvA") (vec_duplicate:V_INT_1REG - (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand" " SvA"))))] + (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand" "SvA")) + (match_operand:V_INT_1REG 2 "gcn_alu_operand" " vA")))] "" "v_mul_lo_u32\t%0, %1, %2" [(set_attr "type" "vop3a") @@ -2238,6 +2330,37 @@ DONE; }) +(define_insn_and_split "mul<mode>3_dup" + [(set (match_operand:V_DI 0 "register_operand" "=&v") + (mult:V_DI + (vec_duplicate:V_DI + (match_operand:DI 1 "gcn_alu_operand" " Sv")) + (match_operand:V_DI 2 "gcn_alu_operand" "vDA"))) + (clobber (match_scratch:<VnSI> 3 "=&v"))] + "" + "#" + "reload_completed" + [(const_int 0)] + { + rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0); + rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1); + rtx left_lo = gcn_operand_part (DImode, operands[1], 0); + rtx left_hi = gcn_operand_part (DImode, operands[1], 1); + rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0); + rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1); + rtx tmp = operands[3]; + + emit_insn (gen_mul<vnsi>3_dup (out_lo, left_lo, right_lo)); + emit_insn (gen_umul<vnsi>3_highpart_dup (out_hi, left_lo, right_lo)); + emit_insn (gen_mul<vnsi>3_dup (tmp, left_hi, right_lo)); + emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp)); + emit_insn (gen_mul<vnsi>3_dup (tmp, left_lo, right_hi)); + emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp)); + emit_insn (gen_mul<vnsi>3_dup (tmp, left_hi, right_hi)); + emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp)); + DONE; + }) + (define_insn_and_split "mul<mode>3_exec" [(set (match_operand:V_DI 0 "register_operand" "=&v") (vec_merge:V_DI @@ -2286,6 +2409,56 @@ DONE; }) +(define_insn_and_split "mul<mode>3_dup_exec" + [(set (match_operand:V_DI 0 "register_operand" "=&v") + (vec_merge:V_DI + (mult:V_DI + (vec_duplicate:V_DI + (match_operand:DI 1 "gcn_alu_operand" " Sv")) + (match_operand:V_DI 2 "gcn_alu_operand" "vDA")) + (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0") + (match_operand:DI 4 "gcn_exec_reg_operand" " e"))) + (clobber (match_scratch:<VnSI> 5 "=&v"))] + "" + "#" + "reload_completed" + [(const_int 0)] + { + rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0); + rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1); + rtx left_lo = gcn_operand_part (DImode, operands[1], 0); + rtx left_hi = gcn_operand_part (DImode, operands[1], 1); + rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0); + rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1); + rtx exec = operands[4]; + rtx tmp = operands[5]; + + rtx old_lo, old_hi; + if (GET_CODE (operands[3]) == UNSPEC) + { + old_lo = old_hi = gcn_gen_undef (<VnSI>mode); + } + else + { + old_lo = gcn_operand_part (<MODE>mode, operands[3], 0); + old_hi = gcn_operand_part (<MODE>mode, operands[3], 1); + } + + rtx undef = gcn_gen_undef (<VnSI>mode); + + emit_insn (gen_mul<vnsi>3_dup_exec (out_lo, left_lo, right_lo, old_lo, + exec)); + emit_insn (gen_umul<vnsi>3_highpart_dup_exec (out_hi, left_lo, right_lo, + old_hi, exec)); + emit_insn (gen_mul<vnsi>3_dup_exec (tmp, left_hi, right_lo, undef, exec)); + emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec)); + emit_insn (gen_mul<vnsi>3_dup_exec (tmp, left_lo, right_hi, undef, exec)); + emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec)); + emit_insn (gen_mul<vnsi>3_dup_exec (tmp, left_hi, right_hi, undef, exec)); + emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec)); + DONE; + }) + (define_insn_and_split "mul<mode>3_zext" [(set (match_operand:V_DI 0 "register_operand" "=&v") (mult:V_DI @@ -4397,7 +4570,7 @@ rtx tmp = gen_reg_rtx (<MODE>mode); rtx v1 = gen_rtx_REG (<MODE>mode, VGPR_REGNO (1)); - emit_insn (gen_mul<mode>3_dup (tmp, v1, operands[2])); + emit_insn (gen_mul<mode>3_dup (tmp, operands[2], v1)); emit_insn (gen_add<mode>3_dup (operands[0], tmp, operands[1])); DONE; }) diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc index 0ce5a29fbb57..56c832a483e9 100644 --- a/gcc/config/gcn/gcn.cc +++ b/gcc/config/gcn/gcn.cc @@ -1995,8 +1995,8 @@ gcn_expand_vector_init (rtx op0, rtx vec) rtx addr = gen_reg_rtx (addrmode); int unit_size = GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op0))); - emit_insn (gen_mulvNsi3_dup (ramp, gen_rtx_REG (offsetmode, VGPR_REGNO (1)), - GEN_INT (unit_size))); + emit_insn (gen_mulvNsi3_dup (ramp, GEN_INT (unit_size), + gen_rtx_REG (offsetmode, VGPR_REGNO (1)))); bool simple_repeat = true;