This patch adds a new constraint ('wD') that matches the accumulator registers that overlap with VSX registers 0..31 on power10. Future patches will add the support for a separate accumulator register class that will be used when the support for dense math registes is added.
2024-12-04 Michael Meissner <meiss...@linux.ibm.com> * config/rs6000/constraints.md (wD): New constraint. * config/rs6000/mma.md (mma_<acc>): Prepare for alternate accumulator registers. Use wD constraint instead of 'd' constraint. Use accumulator_operand instead of fpr_reg_operand. (mma_<vv>): Likewise. (mma_<avv>): Likewise. (mma_<pv>): Likewise. (mma_<apv>): Likewise. (mma_<vvi4i4i8>): Likewise. (mma_<avvi4i4i8>): Likewise. (mma_<vvi4i4i2>): Likewise. (mma_<avvi4i4i2>): Likewise. (mma_<vvi4i4>): Likewise. (mma_<avvi4i4>): Likewise. (mma_<pvi4i2): Likewise. (mma_<apvi4i2>): Likewise. (mma_<vvi4i4i4>): Likewise. (mma_<avvi4i4i4): Likewise. * config/rs6000/predicates.md (accumulator_operand): New predicate. * config/rs6000/rs6000.cc (rs6000_debug_reg_global): Print the register class for the 'wD' constraint. (rs6000_init_hard_regno_mode_ok): Set up the 'wD' register constraint class. * config/rs6000/rs6000.h (enum r6000_reg_class_enum): Add element for the 'wD' constraint. * doc/md.texi (PowerPC constraints): Document the 'wD' constraint. --- gcc/config/rs6000/constraints.md | 3 +++ gcc/config/rs6000/mma.md | 46 ++++++++++++++++---------------- gcc/config/rs6000/predicates.md | 15 +++++++++++ gcc/config/rs6000/rs6000.cc | 7 ++++- gcc/config/rs6000/rs6000.h | 1 + gcc/doc/md.texi | 5 ++++ 6 files changed, 53 insertions(+), 24 deletions(-) diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md index 369a7b75042..277a30a8245 100644 --- a/gcc/config/rs6000/constraints.md +++ b/gcc/config/rs6000/constraints.md @@ -107,6 +107,9 @@ (define_constraint "wB" (match_test "TARGET_P8_VECTOR") (match_operand 0 "s5bit_cint_operand"))) +(define_register_constraint "wD" "rs6000_constraints[RS6000_CONSTRAINT_wD]" + "Accumulator register.") + (define_constraint "wE" "@internal Vector constant that can be loaded with the XXSPLTIB instruction." (match_test "xxspltib_constant_nosplit (op, mode)")) diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md index 04e2d0066df..e051239df57 100644 --- a/gcc/config/rs6000/mma.md +++ b/gcc/config/rs6000/mma.md @@ -504,8 +504,8 @@ (define_insn_and_split "*mma_disassemble_acc" ;; the accumulator. We enforce this by marking the output as early clobber. (define_insn "mma_<acc>" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d") - (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")] + [(set (match_operand:XO 0 "accumulator_operand" "=&wD") + (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0")] MMA_ACC))] "TARGET_MMA" "<acc> %A0" @@ -523,7 +523,7 @@ (define_insn "mma_xxsetaccz" [(set_attr "type" "mma")]) (define_insn "mma_<vv>" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa") (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")] MMA_VV))] @@ -532,8 +532,8 @@ (define_insn "mma_<vv>" [(set_attr "type" "mma")]) (define_insn "mma_<avv>" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") - (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0") + [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") + (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0") (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")] MMA_AVV))] @@ -542,7 +542,7 @@ (define_insn "mma_<avv>" [(set_attr "type" "mma")]) (define_insn "mma_<pv>" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") (unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa") (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")] MMA_PV))] @@ -551,8 +551,8 @@ (define_insn "mma_<pv>" [(set_attr "type" "mma")]) (define_insn "mma_<apv>" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") - (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0") + [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") + (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0") (match_operand:OO 2 "vsx_register_operand" "v,?wa") (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")] MMA_APV))] @@ -561,7 +561,7 @@ (define_insn "mma_<apv>" [(set_attr "type" "mma")]) (define_insn "mma_<vvi4i4i8>" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa") (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") (match_operand:SI 3 "const_0_to_15_operand" "n,n") @@ -574,8 +574,8 @@ (define_insn "mma_<vvi4i4i8>" (set_attr "prefixed" "yes")]) (define_insn "mma_<avvi4i4i8>" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") - (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0") + [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") + (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0") (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") (match_operand:V16QI 3 "vsx_register_operand" "v,?wa") (match_operand:SI 4 "const_0_to_15_operand" "n,n") @@ -588,7 +588,7 @@ (define_insn "mma_<avvi4i4i8>" (set_attr "prefixed" "yes")]) (define_insn "mma_<vvi4i4i2>" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa") (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") (match_operand:SI 3 "const_0_to_15_operand" "n,n") @@ -601,8 +601,8 @@ (define_insn "mma_<vvi4i4i2>" (set_attr "prefixed" "yes")]) (define_insn "mma_<avvi4i4i2>" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") - (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0") + [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") + (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0") (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") (match_operand:V16QI 3 "vsx_register_operand" "v,?wa") (match_operand:SI 4 "const_0_to_15_operand" "n,n") @@ -615,7 +615,7 @@ (define_insn "mma_<avvi4i4i2>" (set_attr "prefixed" "yes")]) (define_insn "mma_<vvi4i4>" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa") (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") (match_operand:SI 3 "const_0_to_15_operand" "n,n") @@ -627,8 +627,8 @@ (define_insn "mma_<vvi4i4>" (set_attr "prefixed" "yes")]) (define_insn "mma_<avvi4i4>" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") - (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0") + [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") + (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0") (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") (match_operand:V16QI 3 "vsx_register_operand" "v,?wa") (match_operand:SI 4 "const_0_to_15_operand" "n,n") @@ -640,7 +640,7 @@ (define_insn "mma_<avvi4i4>" (set_attr "prefixed" "yes")]) (define_insn "mma_<pvi4i2>" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") (unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa") (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") (match_operand:SI 3 "const_0_to_15_operand" "n,n") @@ -652,8 +652,8 @@ (define_insn "mma_<pvi4i2>" (set_attr "prefixed" "yes")]) (define_insn "mma_<apvi4i2>" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") - (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0") + [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") + (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0") (match_operand:OO 2 "vsx_register_operand" "v,?wa") (match_operand:V16QI 3 "vsx_register_operand" "v,?wa") (match_operand:SI 4 "const_0_to_15_operand" "n,n") @@ -665,7 +665,7 @@ (define_insn "mma_<apvi4i2>" (set_attr "prefixed" "yes")]) (define_insn "mma_<vvi4i4i4>" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa") (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") (match_operand:SI 3 "const_0_to_15_operand" "n,n") @@ -678,8 +678,8 @@ (define_insn "mma_<vvi4i4i4>" (set_attr "prefixed" "yes")]) (define_insn "mma_<avvi4i4i4>" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") - (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0") + [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") + (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0") (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") (match_operand:V16QI 3 "vsx_register_operand" "v,?wa") (match_operand:SI 4 "const_0_to_15_operand" "n,n") diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index 0b78901e94b..1827647b7c1 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -186,6 +186,21 @@ (define_predicate "vlogical_operand" return VLOGICAL_REGNO_P (REGNO (op)); }) +;; Return 1 if op is an accumulator. On power10 systems, the accumulators +;; overlap with the FPRs. +(define_predicate "accumulator_operand" + (match_operand 0 "register_operand") +{ + if (!REG_P (op)) + return 0; + + if (!HARD_REGISTER_P (op)) + return 1; + + int r = REGNO (op); + return FP_REGNO_P (r) && (r & 3) == 0; +}) + ;; Return 1 if op is the carry register. (define_predicate "ca_operand" (match_operand 0 "register_operand") diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc index 0878929de22..3047a9e9a9b 100644 --- a/gcc/config/rs6000/rs6000.cc +++ b/gcc/config/rs6000/rs6000.cc @@ -2412,6 +2412,7 @@ rs6000_debug_reg_global (void) "wr reg_class = %s\n" "wx reg_class = %s\n" "wA reg_class = %s\n" + "wD reg_class = %s\n" "\n", reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]], reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]], @@ -2419,7 +2420,8 @@ rs6000_debug_reg_global (void) reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_we]], reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]], reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]], - reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wA]]); + reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wA]], + reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wD]]); nl = "\n"; for (m = 0; m < NUM_MACHINE_MODES; ++m) @@ -3082,6 +3084,9 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) if (TARGET_DIRECT_MOVE_128) rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS; + if (TARGET_MMA) + rs6000_constraints[RS6000_CONSTRAINT_wD] = FLOAT_REGS; + /* Set up the reload helper and direct move functions. */ if (TARGET_VSX || TARGET_ALTIVEC) { diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h index 392ca858fc4..69519851326 100644 --- a/gcc/config/rs6000/rs6000.h +++ b/gcc/config/rs6000/rs6000.h @@ -1197,6 +1197,7 @@ enum r6000_reg_class_enum { RS6000_CONSTRAINT_wr, /* GPR register if 64-bit */ RS6000_CONSTRAINT_wx, /* FPR register for STFIWX */ RS6000_CONSTRAINT_wA, /* BASE_REGS if 64-bit. */ + RS6000_CONSTRAINT_wD, /* Accumulator regs if MMA/Dense Math. */ RS6000_CONSTRAINT_MAX }; diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi index 69605bf75c0..5ceccc9b97f 100644 --- a/gcc/doc/md.texi +++ b/gcc/doc/md.texi @@ -3379,6 +3379,11 @@ Like @code{d}, if @option{-mpowerpc-gfxopt} is used; otherwise, @code{NO_REGS}. @item wA Like @code{b}, if @option{-mpowerpc64} is used; otherwise, @code{NO_REGS}. +@item wD +Accumulator register if @option{-mma} is used; otherwise, +@code{NO_REGS}. For @option{-mcpu=power10} the accumulator registers +overlap with VSX vector registers 0..31. + @item wB Signed 5-bit constant integer that can be loaded into an Altivec register. -- 2.47.0 -- Michael Meissner, IBM PO Box 98, Ayer, Massachusetts, USA, 01432 email: meiss...@linux.ibm.com