https://gcc.gnu.org/g:351fa55c58a036f148d13bca972e687a0bacd113
commit r16-2379-g351fa55c58a036f148d13bca972e687a0bacd113 Author: Andrew Stubbs <a...@baylibre.com> Date: Thu Jun 12 16:58:33 2025 +0000 amdgcn: add DImode offsets for gather/scatter Add new variant of he gather_load and scatter_store instructions that take the offsets in DImode. This is not the natural width for offsets in the instruction set, but we can use them to compute a vector of absolute addresses, which does work. This enables the autovectorizer to use gather/scatter in a number of additional scenarios (one of which shows up in the SPEC HPC lbm benchmark). gcc/ChangeLog: * config/gcn/gcn-valu.md (gather_load<mode><vndi>): New. (scatter_store<mode><vndi>): New. (mask_gather_load<mode><vndi>): New. (mask_scatter_store<mode><vndi>): New. * config/gcn/gcn.cc (gcn_expand_scaled_offsets): Support DImode. Diff: --- gcc/config/gcn/gcn-valu.md | 81 ++++++++++++++++++++++++++++++++++++++++++++++ gcc/config/gcn/gcn.cc | 34 ++++++++++++------- 2 files changed, 103 insertions(+), 12 deletions(-) diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md index dfa6b1523bd7..3899117f2719 100644 --- a/gcc/config/gcn/gcn-valu.md +++ b/gcc/config/gcn/gcn-valu.md @@ -1133,6 +1133,23 @@ DONE; }) +(define_expand "gather_load<mode><vndi>" + [(match_operand:V_MOV 0 "register_operand") + (match_operand:DI 1 "register_operand") + (match_operand:<VnDI> 2 "register_operand") + (match_operand 3 "immediate_operand") + (match_operand:SI 4 "gcn_alu_operand")] + "" + { + rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[1], + operands[2], operands[4], + INTVAL (operands[3]), NULL); + + emit_insn (gen_gather<mode>_insn_1offset (operands[0], addr, const0_rtx, + const0_rtx, const0_rtx)); + DONE; + }) + ; Allow any address expression (define_expand "gather<mode>_expr<exec>" [(set (match_operand:V_MOV 0 "register_operand") @@ -1259,6 +1276,23 @@ DONE; }) +(define_expand "scatter_store<mode><vndi>" + [(match_operand:DI 0 "register_operand") + (match_operand:<VnDI> 1 "register_operand") + (match_operand 2 "immediate_operand") + (match_operand:SI 3 "gcn_alu_operand") + (match_operand:V_MOV 4 "register_operand")] + "" + { + rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[0], + operands[1], operands[3], + INTVAL (operands[2]), NULL); + + emit_insn (gen_scatter<mode>_insn_1offset (addr, const0_rtx, operands[4], + const0_rtx, const0_rtx)); + DONE; + }) + ; Allow any address expression (define_expand "scatter<mode>_expr<exec_scatter>" [(set (mem:BLK (scratch)) @@ -4222,6 +4256,32 @@ DONE; }) +(define_expand "mask_gather_load<mode><vndi>" + [(set:V_MOV (match_operand:V_MOV 0 "register_operand") + (unspec:V_MOV + [(match_operand:DI 1 "register_operand") + (match_operand:<VnDI> 2 "register_operand") + (match_operand 3 "immediate_operand") + (match_operand:SI 4 "gcn_alu_operand") + (match_operand:DI 5 "") + (match_operand:V_MOV 6 "maskload_else_operand")] + UNSPEC_GATHER))] + "" + { + rtx exec = force_reg (DImode, operands[5]); + + rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[1], + operands[2], operands[4], + INTVAL (operands[3]), exec); + + emit_insn (gen_gather<mode>_insn_1offset_exec (operands[0], addr, + const0_rtx, const0_rtx, + const0_rtx, + gcn_gen_undef (<MODE>mode), + exec)); + DONE; + }) + (define_expand "mask_scatter_store<mode><vnsi>" [(match_operand:DI 0 "register_operand") (match_operand:<VnSI> 1 "register_operand") @@ -4250,6 +4310,27 @@ DONE; }) +(define_expand "mask_scatter_store<mode><vndi>" + [(match_operand:DI 0 "register_operand") + (match_operand:<VnDI> 1 "register_operand") + (match_operand 2 "immediate_operand") + (match_operand:SI 3 "gcn_alu_operand") + (match_operand:V_MOV 4 "register_operand") + (match_operand:DI 5 "")] + "" + { + rtx exec = force_reg (DImode, operands[5]); + + rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[0], + operands[1], operands[3], + INTVAL (operands[2]), exec); + + emit_insn (gen_scatter<mode>_insn_1offset_exec (addr, const0_rtx, + operands[4], const0_rtx, + const0_rtx, exec)); + DONE; + }) + (define_code_iterator cond_op [plus minus mult]) (define_expand "cond_<expander><mode>" diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc index 81a8578cf5d1..3b26d5c6a585 100644 --- a/gcc/config/gcn/gcn.cc +++ b/gcc/config/gcn/gcn.cc @@ -2307,36 +2307,46 @@ gcn_expand_scalar_to_vector_address (machine_mode mode, rtx exec, rtx mem, Return values. ADDR_SPACE_FLAT - return VnDImode vector of absolute addresses. - ADDR_SPACE_GLOBAL - return VnSImode vector of offsets. */ + ADDR_SPACE_GLOBAL - return VnSImode vector of offsets. + 64-bit offsets - return VnDImode vector of absolute addresses. */ rtx gcn_expand_scaled_offsets (addr_space_t as, rtx base, rtx offsets, rtx scale, bool unsigned_p, rtx exec) { int vf = GET_MODE_NUNITS (GET_MODE (offsets)); - rtx tmpsi = gen_reg_rtx (VnMODE (vf, SImode)); - rtx tmpdi = gen_reg_rtx (VnMODE (vf, DImode)); + rtx scaled_offsets = gen_reg_rtx (GET_MODE (offsets)); + rtx abs_addr = gen_reg_rtx (VnMODE (vf, DImode)); + bool use_di = GET_MODE_INNER (GET_MODE (scaled_offsets)) == DImode; if (CONST_INT_P (scale) && INTVAL (scale) > 0 && exact_log2 (INTVAL (scale)) >= 0) - emit_insn (gen_ashlvNsi3 (tmpsi, offsets, - GEN_INT (exact_log2 (INTVAL (scale))), - NULL, exec)); + emit_insn (gen_ashlvNm3 (scaled_offsets, offsets, + GEN_INT (exact_log2 (INTVAL (scale))), + NULL, exec)); else - emit_insn (gen_mulvNsi3_dup (tmpsi, offsets, scale, NULL, exec)); + emit_insn (gen_mulvNm3_dup (scaled_offsets, scale, offsets, NULL, exec)); + /* No instructions support DImode offsets. */ + if (use_di) + { + emit_insn (gen_addvNdi3_dup (abs_addr, base, scaled_offsets, NULL, exec)); + return abs_addr; + } /* "Global" instructions do not support negative register offsets. */ - if (as == ADDR_SPACE_FLAT || !unsigned_p) + else if (as == ADDR_SPACE_FLAT || !unsigned_p) { if (unsigned_p) - emit_insn (gen_addvNdi3_zext_dup2 (tmpdi, tmpsi, base, NULL, exec)); + emit_insn (gen_addvNdi3_zext_dup2 (abs_addr, scaled_offsets, base, + NULL, exec)); else - emit_insn (gen_addvNdi3_sext_dup2 (tmpdi, tmpsi, base, NULL, exec)); - return tmpdi; + emit_insn (gen_addvNdi3_sext_dup2 (abs_addr, scaled_offsets, base, + NULL, exec)); + return abs_addr; } else if (as == ADDR_SPACE_GLOBAL) - return tmpsi; + return scaled_offsets; gcc_unreachable (); }