https://gcc.gnu.org/g:67fa78e3953c481018d73fb7258d938a9c67eb4e
commit 67fa78e3953c481018d73fb7258d938a9c67eb4e Author: Michael Meissner <meiss...@linux.ibm.com> Date: Mon Dec 2 15:34:47 2024 -0500 RFC2653-Add wD constraint. This patch adds a new constraint ('wD') that matches the accumulator registers that overlap with VSX registers 0..31 on power10. Future patches will add the support for a separate accumulator register class that will be used when the support for dense math registes is added. 2024-12-02 Michael Meissner <meiss...@linux.ibm.com> * config/rs6000/constraints.md (wD): New constraint. * config/rs6000/mma.md (mma_<acc>): Prepare for alternate accumulator registers. Use wD constraint instead of 'd' constraint. Use accumulator_operand instead of fpr_reg_operand. (mma_<vv>): Likewise. (mma_<avv>): Likewise. (mma_<pv>): Likewise. (mma_<apv>): Likewise. (mma_<vvi4i4i8>): Likewise. (mma_<avvi4i4i8>): Likewise. (mma_<vvi4i4i2>): Likewise. (mma_<avvi4i4i2>): Likewise. (mma_<vvi4i4>): Likewise. (mma_<avvi4i4>): Likewise. (mma_<pvi4i2): Likewise. (mma_<apvi4i2>): Likewise. (mma_<vvi4i4i4>): Likewise. (mma_<avvi4i4i4): Likewise. * config/rs6000/predicates.md (accumulator_operand): New predicate. * config/rs6000/rs6000.cc (rs6000_debug_reg_global): Print the register class for the 'wD' constraint. (rs6000_init_hard_regno_mode_ok): Set up the 'wD' register constraint class. * config/rs6000/rs6000.h (enum r6000_reg_class_enum): Add element for the 'wD' constraint. * doc/md.texi (PowerPC constraints): Document the 'wD' constraint. Diff: --- gcc/config/rs6000/constraints.md | 3 +++ gcc/config/rs6000/mma.md | 46 ++++++++++++++++++++-------------------- gcc/config/rs6000/predicates.md | 15 +++++++++++++ gcc/config/rs6000/rs6000.cc | 7 +++++- gcc/config/rs6000/rs6000.h | 1 + gcc/doc/md.texi | 5 +++++ 6 files changed, 53 insertions(+), 24 deletions(-) diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md index 369a7b75042d..277a30a82458 100644 --- a/gcc/config/rs6000/constraints.md +++ b/gcc/config/rs6000/constraints.md @@ -107,6 +107,9 @@ (match_test "TARGET_P8_VECTOR") (match_operand 0 "s5bit_cint_operand"))) +(define_register_constraint "wD" "rs6000_constraints[RS6000_CONSTRAINT_wD]" + "Accumulator register.") + (define_constraint "wE" "@internal Vector constant that can be loaded with the XXSPLTIB instruction." (match_test "xxspltib_constant_nosplit (op, mode)")) diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md index 04e2d0066df2..e051239df57b 100644 --- a/gcc/config/rs6000/mma.md +++ b/gcc/config/rs6000/mma.md @@ -504,8 +504,8 @@ ;; the accumulator. We enforce this by marking the output as early clobber. (define_insn "mma_<acc>" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d") - (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")] + [(set (match_operand:XO 0 "accumulator_operand" "=&wD") + (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0")] MMA_ACC))] "TARGET_MMA" "<acc> %A0" @@ -523,7 +523,7 @@ [(set_attr "type" "mma")]) (define_insn "mma_<vv>" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa") (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")] MMA_VV))] @@ -532,8 +532,8 @@ [(set_attr "type" "mma")]) (define_insn "mma_<avv>" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") - (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0") + [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") + (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0") (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")] MMA_AVV))] @@ -542,7 +542,7 @@ [(set_attr "type" "mma")]) (define_insn "mma_<pv>" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") (unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa") (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")] MMA_PV))] @@ -551,8 +551,8 @@ [(set_attr "type" "mma")]) (define_insn "mma_<apv>" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") - (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0") + [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") + (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0") (match_operand:OO 2 "vsx_register_operand" "v,?wa") (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")] MMA_APV))] @@ -561,7 +561,7 @@ [(set_attr "type" "mma")]) (define_insn "mma_<vvi4i4i8>" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa") (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") (match_operand:SI 3 "const_0_to_15_operand" "n,n") @@ -574,8 +574,8 @@ (set_attr "prefixed" "yes")]) (define_insn "mma_<avvi4i4i8>" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") - (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0") + [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") + (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0") (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") (match_operand:V16QI 3 "vsx_register_operand" "v,?wa") (match_operand:SI 4 "const_0_to_15_operand" "n,n") @@ -588,7 +588,7 @@ (set_attr "prefixed" "yes")]) (define_insn "mma_<vvi4i4i2>" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa") (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") (match_operand:SI 3 "const_0_to_15_operand" "n,n") @@ -601,8 +601,8 @@ (set_attr "prefixed" "yes")]) (define_insn "mma_<avvi4i4i2>" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") - (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0") + [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") + (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0") (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") (match_operand:V16QI 3 "vsx_register_operand" "v,?wa") (match_operand:SI 4 "const_0_to_15_operand" "n,n") @@ -615,7 +615,7 @@ (set_attr "prefixed" "yes")]) (define_insn "mma_<vvi4i4>" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa") (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") (match_operand:SI 3 "const_0_to_15_operand" "n,n") @@ -627,8 +627,8 @@ (set_attr "prefixed" "yes")]) (define_insn "mma_<avvi4i4>" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") - (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0") + [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") + (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0") (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") (match_operand:V16QI 3 "vsx_register_operand" "v,?wa") (match_operand:SI 4 "const_0_to_15_operand" "n,n") @@ -640,7 +640,7 @@ (set_attr "prefixed" "yes")]) (define_insn "mma_<pvi4i2>" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") (unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa") (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") (match_operand:SI 3 "const_0_to_15_operand" "n,n") @@ -652,8 +652,8 @@ (set_attr "prefixed" "yes")]) (define_insn "mma_<apvi4i2>" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") - (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0") + [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") + (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0") (match_operand:OO 2 "vsx_register_operand" "v,?wa") (match_operand:V16QI 3 "vsx_register_operand" "v,?wa") (match_operand:SI 4 "const_0_to_15_operand" "n,n") @@ -665,7 +665,7 @@ (set_attr "prefixed" "yes")]) (define_insn "mma_<vvi4i4i4>" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa") (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") (match_operand:SI 3 "const_0_to_15_operand" "n,n") @@ -678,8 +678,8 @@ (set_attr "prefixed" "yes")]) (define_insn "mma_<avvi4i4i4>" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") - (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0") + [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") + (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0") (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") (match_operand:V16QI 3 "vsx_register_operand" "v,?wa") (match_operand:SI 4 "const_0_to_15_operand" "n,n") diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index 0b78901e94be..1827647b7c1e 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -186,6 +186,21 @@ return VLOGICAL_REGNO_P (REGNO (op)); }) +;; Return 1 if op is an accumulator. On power10 systems, the accumulators +;; overlap with the FPRs. +(define_predicate "accumulator_operand" + (match_operand 0 "register_operand") +{ + if (!REG_P (op)) + return 0; + + if (!HARD_REGISTER_P (op)) + return 1; + + int r = REGNO (op); + return FP_REGNO_P (r) && (r & 3) == 0; +}) + ;; Return 1 if op is the carry register. (define_predicate "ca_operand" (match_operand 0 "register_operand") diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc index 0878929de224..3047a9e9a9b0 100644 --- a/gcc/config/rs6000/rs6000.cc +++ b/gcc/config/rs6000/rs6000.cc @@ -2412,6 +2412,7 @@ rs6000_debug_reg_global (void) "wr reg_class = %s\n" "wx reg_class = %s\n" "wA reg_class = %s\n" + "wD reg_class = %s\n" "\n", reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]], reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]], @@ -2419,7 +2420,8 @@ rs6000_debug_reg_global (void) reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_we]], reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]], reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]], - reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wA]]); + reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wA]], + reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wD]]); nl = "\n"; for (m = 0; m < NUM_MACHINE_MODES; ++m) @@ -3082,6 +3084,9 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) if (TARGET_DIRECT_MOVE_128) rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS; + if (TARGET_MMA) + rs6000_constraints[RS6000_CONSTRAINT_wD] = FLOAT_REGS; + /* Set up the reload helper and direct move functions. */ if (TARGET_VSX || TARGET_ALTIVEC) { diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h index 392ca858fc40..69519851326e 100644 --- a/gcc/config/rs6000/rs6000.h +++ b/gcc/config/rs6000/rs6000.h @@ -1197,6 +1197,7 @@ enum r6000_reg_class_enum { RS6000_CONSTRAINT_wr, /* GPR register if 64-bit */ RS6000_CONSTRAINT_wx, /* FPR register for STFIWX */ RS6000_CONSTRAINT_wA, /* BASE_REGS if 64-bit. */ + RS6000_CONSTRAINT_wD, /* Accumulator regs if MMA/Dense Math. */ RS6000_CONSTRAINT_MAX }; diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi index 69605bf75c0f..5ceccc9b97f3 100644 --- a/gcc/doc/md.texi +++ b/gcc/doc/md.texi @@ -3379,6 +3379,11 @@ Like @code{d}, if @option{-mpowerpc-gfxopt} is used; otherwise, @code{NO_REGS}. @item wA Like @code{b}, if @option{-mpowerpc64} is used; otherwise, @code{NO_REGS}. +@item wD +Accumulator register if @option{-mma} is used; otherwise, +@code{NO_REGS}. For @option{-mcpu=power10} the accumulator registers +overlap with VSX vector registers 0..31. + @item wB Signed 5-bit constant integer that can be loaded into an Altivec register.