[llvm-branch-commits] [llvm] AMDGPU: Handle rewriting non-tied MFMA to AGPR form (PR #149027)

via llvm-branch-commits Wed, 16 Jul 2025 00:03:10 -0700

llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)

<details>
<summary>Changes</summary>

If src2 and dst aren't the same register, to fold a copy
to AGPR into the instruction we also need to reassign src2
to an available AGPR. All the other uses of src2 also need
to be compatible with the AGPR replacement in order to avoid
inserting other copies somewhere else.

Perform this transform, after verifying all other uses are
compatible with AGPR, and have an available AGPR available at
all points (which effectively means rewriting a full chain of
mfmas and load/store at once).

---

Patch is 31.90 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/149027.diff


2 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp (+208-60) 
- (modified) 
llvm/test/CodeGen/AMDGPU/inflate-reg-class-vgpr-mfma-to-av-with-load-source.mir 
(+17-22) 


``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp
index 79088b18cf97b..d258ba5f4736d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp
@@ -14,12 +14,7 @@
 /// MFMA opcode.
 ///
 /// TODO:
-///  - Handle non-tied dst+src2 cases. We need to try to find a copy from an
-///    AGPR from src2, or reassign src2 to an available AGPR (which should work
-///    in the common case of a load).
-///
-///  - Handle multiple MFMA uses of the same register. e.g. chained MFMAs that
-///    can be rewritten as a set
+///  - Handle SplitKit partial copy bundles, and not just full copy 
instructions
 ///
 ///  - Update LiveIntervals incrementally instead of recomputing from scratch
 ///
@@ -49,13 +44,18 @@ class AMDGPURewriteAGPRCopyMFMAImpl {
   VirtRegMap &VRM;
   LiveRegMatrix &LRM;
   LiveIntervals &LIS;
+  const RegisterClassInfo &RegClassInfo;
+
+  bool attemptReassignmentsToAGPR(SmallSetVector<Register, 4> &InterferingRegs,
+                                  MCPhysReg PrefPhysReg) const;
 
 public:
   AMDGPURewriteAGPRCopyMFMAImpl(MachineFunction &MF, VirtRegMap &VRM,
-                                LiveRegMatrix &LRM, LiveIntervals &LIS)
+                                LiveRegMatrix &LRM, LiveIntervals &LIS,
+                                const RegisterClassInfo &RegClassInfo)
       : ST(MF.getSubtarget<GCNSubtarget>()), TII(*ST.getInstrInfo()),
         TRI(*ST.getRegisterInfo()), MRI(MF.getRegInfo()), VRM(VRM), LRM(LRM),
-        LIS(LIS) {}
+        LIS(LIS), RegClassInfo(RegClassInfo) {}
 
   bool isRewriteCandidate(const MachineInstr &MI) const {
     return TII.isMAI(MI) && AMDGPU::getMFMASrcCVDstAGPROp(MI.getOpcode()) != 
-1;
@@ -64,10 +64,10 @@ class AMDGPURewriteAGPRCopyMFMAImpl {
   /// Compute the register class constraints based on the uses of \p Reg,
   /// excluding uses from \p ExceptMI. This should be nearly identical to
   /// MachineRegisterInfo::recomputeRegClass.
-  const TargetRegisterClass *
-  recomputeRegClassExceptRewritable(Register Reg,
-                                    const TargetRegisterClass *OldRC,
-                                    const TargetRegisterClass *NewRC) const;
+  const TargetRegisterClass *recomputeRegClassExceptRewritable(
+      Register Reg, const TargetRegisterClass *OldRC,
+      const TargetRegisterClass *NewRC,
+      SmallVectorImpl<MachineInstr *> &RewriteCandidates) const;
 
   bool run(MachineFunction &MF) const;
 };
@@ -75,7 +75,8 @@ class AMDGPURewriteAGPRCopyMFMAImpl {
 const TargetRegisterClass *
 AMDGPURewriteAGPRCopyMFMAImpl::recomputeRegClassExceptRewritable(
     Register Reg, const TargetRegisterClass *OldRC,
-    const TargetRegisterClass *NewRC) const {
+    const TargetRegisterClass *NewRC,
+    SmallVectorImpl<MachineInstr *> &RewriteCandidates) const {
 
   // Accumulate constraints from all uses.
   for (MachineOperand &MO : MRI.reg_nodbg_operands(Reg)) {
@@ -86,8 +87,11 @@ 
AMDGPURewriteAGPRCopyMFMAImpl::recomputeRegClassExceptRewritable(
     // effects of rewrite candidates. It just so happens that we can use either
     // AGPR or VGPR in src0/src1, so don't bother checking the constraint
     // effects of the individual operands.
-    if (isRewriteCandidate(*MI))
+    if (isRewriteCandidate(*MI)) {
+      if (!is_contained(RewriteCandidates, MI))
+        RewriteCandidates.push_back(MI);
       continue;
+    }
 
     unsigned OpNo = &MO - &MI->getOperand(0);
     NewRC = MI->getRegClassConstraintEffect(OpNo, NewRC, &TII, &TRI);
@@ -98,6 +102,58 @@ 
AMDGPURewriteAGPRCopyMFMAImpl::recomputeRegClassExceptRewritable(
   return NewRC;
 }
 
+/// Attempt to reassign the registers in \p InterferingRegs to be AGPRs, with a
+/// preference to use \p PhysReg first. Returns false if the reassignments
+/// cannot be trivially performed.
+bool AMDGPURewriteAGPRCopyMFMAImpl::attemptReassignmentsToAGPR(
+    SmallSetVector<Register, 4> &InterferingRegs, MCPhysReg PrefPhysReg) const 
{
+  // FIXME: The ordering may matter here, but we're just taking uselistorder
+  // with the special case of ensuring to process the starting instruction
+  // first. We probably should extract the priority advisor out of greedy and
+  // use that ordering.
+  for (Register InterferingReg : InterferingRegs) {
+    LiveInterval &ReassignLI = LIS.getInterval(InterferingReg);
+    const TargetRegisterClass *EquivalentAGPRRegClass =
+        TRI.getEquivalentAGPRClass(MRI.getRegClass(InterferingReg));
+
+    MCPhysReg Assignable = AMDGPU::NoRegister;
+    if (EquivalentAGPRRegClass->contains(PrefPhysReg) &&
+        LRM.checkInterference(ReassignLI, PrefPhysReg) ==
+            LiveRegMatrix::IK_Free) {
+      // First try to assign to the AGPR we were already copying to. This
+      // should be the first assignment we attempt. We have to guard
+      // against the use being a subregister (which doesn't have an exact
+      // class match).
+
+      // TODO: If this does happen to be a subregister use, we should
+      // still try to assign to a subregister of the original copy result.
+      Assignable = PrefPhysReg;
+    } else {
+      ArrayRef<MCPhysReg> AllocOrder =
+          RegClassInfo.getOrder(EquivalentAGPRRegClass);
+      for (MCPhysReg Reg : AllocOrder) {
+        if (LRM.checkInterference(ReassignLI, Reg) == LiveRegMatrix::IK_Free) {
+          Assignable = Reg;
+          break;
+        }
+      }
+    }
+
+    if (!Assignable) {
+      LLVM_DEBUG(dbgs() << "Unable to reassign VGPR "
+                        << printReg(InterferingReg, &TRI)
+                        << " to a free AGPR\n");
+      return false;
+    }
+
+    LLVM_DEBUG(dbgs() << "Reassigning VGPR " << printReg(InterferingReg, &TRI)
+                      << " to " << printReg(Assignable, &TRI) << '\n');
+    LRM.assign(ReassignLI, Assignable);
+  }
+
+  return true;
+}
+
 bool AMDGPURewriteAGPRCopyMFMAImpl::run(MachineFunction &MF) const {
   // This only applies on subtargets that have a configurable AGPR vs. VGPR
   // allocation.
@@ -127,7 +183,6 @@ bool AMDGPURewriteAGPRCopyMFMAImpl::run(MachineFunction 
&MF) const {
 
     LiveInterval &LI = LIS.getInterval(VReg);
 
-    // TODO: Test multiple uses
     for (VNInfo *VNI : LI.vnis()) {
       MachineInstr *DefMI = LIS.getInstructionFromIndex(VNI->def);
 
@@ -136,22 +191,21 @@ bool AMDGPURewriteAGPRCopyMFMAImpl::run(MachineFunction 
&MF) const {
       if (!DefMI || !DefMI->isFullCopy())
         continue;
 
-      Register CopySrcReg = DefMI->getOperand(1).getReg();
-      if (!CopySrcReg.isVirtual())
+      Register MFMADstReg = DefMI->getOperand(1).getReg();
+      if (!MFMADstReg.isVirtual())
         continue;
 
-      LiveInterval &CopySrcLI = LIS.getInterval(CopySrcReg);
+      LiveInterval &CopySrcLI = LIS.getInterval(MFMADstReg);
       LiveQueryResult LRQ = CopySrcLI.Query(VNI->def.getRegSlot());
-      MachineInstr *CopySrcMI = 
LIS.getInstructionFromIndex(LRQ.valueIn()->def);
-      if (!CopySrcMI)
+      MachineInstr *MFMA = LIS.getInstructionFromIndex(LRQ.valueIn()->def);
+      if (!MFMA)
         continue;
 
-      int AGPROp = AMDGPU::getMFMASrcCVDstAGPROp(CopySrcMI->getOpcode());
+      int AGPROp = AMDGPU::getMFMASrcCVDstAGPROp(MFMA->getOpcode());
       if (AGPROp == -1)
         continue;
 
-      MachineOperand *Src2 =
-          TII.getNamedOperand(*CopySrcMI, AMDGPU::OpName::src2);
+      MachineOperand *Src2 = TII.getNamedOperand(*MFMA, AMDGPU::OpName::src2);
 
       // FIXME: getMinimalPhysRegClass returns a nonsense AV_* subclass instead
       // of an AGPR or VGPR subclass, so we can't simply use the result on the
@@ -163,27 +217,23 @@ bool AMDGPURewriteAGPRCopyMFMAImpl::run(MachineFunction 
&MF) const {
                << " Dst=[" << printReg(VReg) << " => "
                << printReg(PhysReg, &TRI) << "], Src2=["
                << printReg(Src2->getReg(), &TRI) << " => "
-               << printReg(Src2PhysReg, &TRI) << "]: " << *CopySrcMI;
+               << printReg(Src2PhysReg, &TRI) << "]: " << *MFMA;
       });
 
-      // If the inputs are tied and the same register, we can shortcut and
-      // directly replace the register.
-      if (Src2->getReg() != CopySrcReg) {
-        LLVM_DEBUG(
-            dbgs()
-            << "Replacing untied VGPR MFMAs with AGPR form not yet handled\n");
-        // TODO: Only handles the tied case for now. If the input operand is a
-        // different register, we need to also reassign it (either by looking
-        // for a compatible copy-from-AGPR, or by seeing if an available AGPR 
is
-        // compatible with all other uses.
+      const TargetRegisterClass *DstVirtRegRC = 
MRI.getRegClass(Src2->getReg());
+      const TargetRegisterClass *NewDstConstraintRC =
+          TII.getRegClass(TII.get(AGPROp), 0, &TRI, MF);
+      const TargetRegisterClass *NewSrc2ConstraintRC = NewDstConstraintRC;
 
-        // If we can't reassign it, we'd need to introduce a different copy
-        // which is likely worse than the copy we'd be saving.
-        continue;
-      }
+      assert(NewSrc2ConstraintRC == TII.getRegClass(TII.get(AGPROp),
+                                                    Src2->getOperandNo(), &TRI,
+                                                    MF) &&
+             "expected src2 and dst to have same class constraint");
 
-      const TargetRegisterClass *Src2VirtRegRC =
-          MRI.getRegClass(Src2->getReg());
+      Register Src2Reg = Src2->getReg(); // XXX: does this support inline imm
+      const TargetRegisterClass *Src2RC = MRI.getRegClass(Src2Reg);
+
+      SmallVector<MachineInstr *, 4> DstRewriteCandidates;
 
       // We've found av = COPY (MFMA), and need to verify that we can trivially
       // rewrite src2 to use the new AGPR. If we can't trivially replace it,
@@ -191,35 +241,128 @@ bool AMDGPURewriteAGPRCopyMFMAImpl::run(MachineFunction 
&MF) const {
       // first place, as well as need to assign another register, and need to
       // figure out where to put them. The live range splitting is smarter than
       // anything we're doing here, so trust it did something reasonable.
-      const TargetRegisterClass *Src2ExceptRC =
-          recomputeRegClassExceptRewritable(Src2->getReg(), Src2VirtRegRC,
-                                            VirtRegRC);
-      if (!Src2ExceptRC) {
-        LLVM_DEBUG(dbgs() << "Could not recompute the regclass\n");
+      const TargetRegisterClass *DstExceptRC =
+          recomputeRegClassExceptRewritable(MFMADstReg, DstVirtRegRC, 
VirtRegRC,
+                                            DstRewriteCandidates);
+      if (!DstExceptRC) {
+        LLVM_DEBUG(dbgs() << "Could not recompute the regclass of "
+                          << printReg(MFMADstReg, &TRI) << '\n');
         continue;
       }
 
-      const TargetRegisterClass *NewSrc2ConstraintRC =
-          TII.getRegClass(TII.get(AGPROp), Src2->getOperandNo(), &TRI, MF);
-
       // Try to constrain src2 to the replacement instruction candidate's
       // register class.
-      const TargetRegisterClass *NewSrc2RC =
-          TRI.getCommonSubClass(Src2ExceptRC, NewSrc2ConstraintRC);
-      if (!NewSrc2RC) {
-        LLVM_DEBUG(dbgs() << "Other uses of " << printReg(Src2->getReg(), &TRI)
+      const TargetRegisterClass *NewDstRC =
+          TRI.getCommonSubClass(DstExceptRC, NewDstConstraintRC);
+      if (!NewDstRC) {
+        LLVM_DEBUG(dbgs() << "Other uses of " << printReg(MFMADstReg, &TRI)
                           << " are incompatible with replacement class\n");
         continue;
       }
 
+      // If the inputs are tied and the same register, we can shortcut and
+      // directly replace the register.
+      if (Src2->getReg() != MFMADstReg) {
+        // If src2 and dst are different registers, we need to also reassign 
the
+        // input to an available AGPR if it is compatible with all other uses.
+        //
+        // If we can't reassign it, we'd need to introduce a different copy
+        // which is likely worse than the copy we'd be saving.
+        SmallVector<MachineInstr *, 4> Src2RewriteCandidates;
+        const TargetRegisterClass *Src2ExceptRC =
+            recomputeRegClassExceptRewritable(Src2Reg, Src2RC, VirtRegRC,
+                                              Src2RewriteCandidates);
+        if (!Src2ExceptRC) {
+          LLVM_DEBUG(dbgs() << "Could not recompute the regclass of "
+                            << printReg(Src2Reg, &TRI) << '\n');
+          continue;
+        }
+
+        const TargetRegisterClass *NewSrc2RC =
+            TRI.getCommonSubClass(Src2ExceptRC, NewSrc2ConstraintRC);
+        if (!NewSrc2RC) {
+          LLVM_DEBUG(dbgs() << "Other uses of " << printReg(Src2Reg, &TRI)
+                            << " are incompatible with AGPR replacement\n");
+          continue;
+        }
+
+        // It's likely that the MFMA is used in sequence with other MFMAs; if 
we
+        // cannot migrate the full use/def chain of MFMAs, we would need to
+        // introduce intermediate copies somewhere. So we only make the
+        // transform if all the interfering MFMAs can also be migrated. Collect
+        // the set of rewritable MFMAs and check if we can assign an AGPR at
+        // that point.
+        //
+        // If any of the MFMAs aren't reassignable, we give up and rollback to
+        // the original register assignments.
+
+        using RecoloringStack =
+            SmallVector<std::pair<const LiveInterval *, MCRegister>, 8>;
+
+        SmallSetVector<Register, 4> InterferingRegs;
+
+        // Make sure we reassign the MFMA we found the copy from first. We want
+        // to ensure dst ends up in the physreg we were originally copying to.
+        InterferingRegs.insert(MFMADstReg);
+
+        RecoloringStack TentativeReassignments;
+
+        for (MachineInstr *RewriteCandidate : Src2RewriteCandidates) {
+          MachineOperand *CandDst =
+              TII.getNamedOperand(*RewriteCandidate, AMDGPU::OpName::vdst);
+          MachineOperand *CandSrc2 =
+              TII.getNamedOperand(*RewriteCandidate, AMDGPU::OpName::src2);
+
+          InterferingRegs.insert(CandDst->getReg());
+          if (CandDst->getReg() != CandSrc2->getReg())
+            InterferingRegs.insert(CandSrc2->getReg());
+        }
+
+        for (Register InterferingReg : InterferingRegs) {
+          LiveInterval &LI = LIS.getInterval(InterferingReg);
+          TentativeReassignments.push_back({&LI, VRM.getPhys(InterferingReg)});
+          LRM.unassign(LI);
+        }
+
+        if (!attemptReassignmentsToAGPR(InterferingRegs, PhysReg)) {
+          // Roll back the register assignments to the original state.
+          for (auto [LI, OldAssign] : TentativeReassignments) {
+            if (VRM.hasPhys(LI->reg()))
+              LRM.unassign(*LI);
+            LRM.assign(*LI, OldAssign);
+          }
+
+          continue;
+        }
+
+        // Fixup the register classes of the virtual registers now that we've
+        // committed to the reassignments.
+        for (Register InterferingReg : InterferingRegs) {
+          const TargetRegisterClass *EquivalentAGPRRegClass =
+              TRI.getEquivalentAGPRClass(MRI.getRegClass(InterferingReg));
+          MRI.setRegClass(InterferingReg, EquivalentAGPRRegClass);
+        }
+
+        for (MachineInstr *RewriteCandidate : Src2RewriteCandidates) {
+          int NewMFMAOp =
+              AMDGPU::getMFMASrcCVDstAGPROp(RewriteCandidate->getOpcode());
+          RewriteCandidate->setDesc(TII.get(NewMFMAOp));
+        }
+
+        // We likely left an identity copy behind after assignment; let
+        // VirtRegRewriter deal with it later.
+        MadeChange = true;
+        continue;
+      }
+
       MRI.setRegClass(VReg, AssignedRC);
-      MRI.setRegClass(Src2->getReg(), NewSrc2RC);
+      MRI.setRegClass(MFMADstReg, NewDstRC);
 
-      CopySrcMI->setDesc(TII.get(AGPROp));
+      MFMA->setDesc(TII.get(AGPROp));
 
       // Perform replacement of the register, rewriting the rewritable uses.
       for (MachineInstr &UseMI :
-           make_early_inc_range(MRI.reg_instructions(CopySrcReg))) {
+           make_early_inc_range(MRI.reg_instructions(MFMADstReg))) {
         if (TII.isMAI(UseMI)) {
           // Note the register we need to rewrite may still appear in 
src0/src1,
           // but that's fine since those can use A or V anyway.
@@ -228,10 +371,10 @@ bool AMDGPURewriteAGPRCopyMFMAImpl::run(MachineFunction 
&MF) const {
             UseMI.setDesc(TII.get(ReplacementOp));
         }
 
-        UseMI.substituteRegister(CopySrcReg, VReg, AMDGPU::NoSubRegister, TRI);
+        UseMI.substituteRegister(MFMADstReg, VReg, AMDGPU::NoSubRegister, TRI);
       }
 
-      LLVM_DEBUG(dbgs() << "Replaced VGPR MFMA with AGPR: " << *CopySrcMI);
+      LLVM_DEBUG(dbgs() << "Replaced VGPR MFMA with AGPR: " << *MFMA);
 
       // We left behind an identity copy, so delete it.
       LIS.RemoveMachineInstrFromMaps(*DefMI);
@@ -242,7 +385,7 @@ bool AMDGPURewriteAGPRCopyMFMAImpl::run(MachineFunction 
&MF) const {
       // We don't need the liveness information anymore, so don't bother
       // updating the intervals. Just delete the stale information.
       // TODO: Is it worth preserving these?
-      LIS.removeInterval(CopySrcReg);
+      LIS.removeInterval(MFMADstReg);
       LIS.removeInterval(VReg);
       LIS.createAndComputeVirtRegInterval(VReg);
 
@@ -256,6 +399,7 @@ bool AMDGPURewriteAGPRCopyMFMAImpl::run(MachineFunction 
&MF) const {
 class AMDGPURewriteAGPRCopyMFMALegacy : public MachineFunctionPass {
 public:
   static char ID;
+  RegisterClassInfo RegClassInfo;
 
   AMDGPURewriteAGPRCopyMFMALegacy() : MachineFunctionPass(ID) {
     initializeAMDGPURewriteAGPRCopyMFMALegacyPass(
@@ -301,11 +445,13 @@ bool 
AMDGPURewriteAGPRCopyMFMALegacy::runOnMachineFunction(
   if (skipFunction(MF.getFunction()))
     return false;
 
+  RegClassInfo.runOnMachineFunction(MF);
+
   auto &VRM = getAnalysis<VirtRegMapWrapperLegacy>().getVRM();
   auto &LRM = getAnalysis<LiveRegMatrixWrapperLegacy>().getLRM();
   auto &LIS = getAnalysis<LiveIntervalsWrapperPass>().getLIS();
 
-  AMDGPURewriteAGPRCopyMFMAImpl Impl(MF, VRM, LRM, LIS);
+  AMDGPURewriteAGPRCopyMFMAImpl Impl(MF, VRM, LRM, LIS, RegClassInfo);
   return Impl.run(MF);
 }
 
@@ -315,8 +461,10 @@ AMDGPURewriteAGPRCopyMFMAPass::run(MachineFunction &MF,
   VirtRegMap &VRM = MFAM.getResult<VirtRegMapAnalysis>(MF);
   LiveRegMatrix &LRM = MFAM.getResult<LiveRegMatrixAnalysis>(MF);
   LiveIntervals &LIS = MFAM.getResult<LiveIntervalsAnalysis>(MF);
+  RegisterClassInfo RegClassInfo;
+  RegClassInfo.runOnMachineFunction(MF);
 
-  AMDGPURewriteAGPRCopyMFMAImpl Impl(MF, VRM, LRM, LIS);
+  AMDGPURewriteAGPRCopyMFMAImpl Impl(MF, VRM, LRM, LIS, RegClassInfo);
   if (!Impl.run(MF))
     return PreservedAnalyses::all();
   auto PA = getMachineFunctionPassPreservedAnalyses();
diff --git 
a/llvm/test/CodeGen/AMDGPU/inflate-reg-class-vgpr-mfma-to-av-with-load-source.mir
 
b/llvm/test/CodeGen/AMDGPU/inflate-reg-class-vgpr-mfma-to-av-with-load-source.mir
index fe6beceb0a682..2620acaf8770a 100644
--- 
a/llvm/test/CodeGen/AMDGPU/inflate-reg-class-vgpr-mfma-to-av-with-load-source.mir
+++ 
b/llvm/test/CodeGen/AMDGPU/inflate-reg-class-vgpr-mfma-to-av-with-load-source.mir
@@ -209,15 +209,14 @@ body:             |
   ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
   ; CHECK-NEXT:   liveins: $vcc, $vgpr2_vgpr3
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $vgpr0_vgpr1 = GLOBAL_LOAD_DWORDX2 undef renamable 
$vgpr0_vgpr1, 0, 0, implicit $exec :: (load (s64), addrspace 1)
-  ; CHECK-NEXT:   early-clobber renamable 
$vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19
 = V_MFMA_F32_32X32X8F16_vgprcd_e64 $vgpr2_vgpr3, $vgpr2_vgpr3, 
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15,
 0, 0, 0, implicit $mode, implicit $exec
+  ; CHECK-NEXT:   renamable $agpr16_agpr17 = GLOBAL_LOAD_DWORDX2 undef 
renamable $vgpr0_vgpr1, 0, 0, implicit $exec :: (load (s64), addrspace 1)
+  ; CHECK-NEXT:   early-clobber renamable 
$agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
 = V_MFMA_F32_32X32X8F16_e64 $vgpr2_vgpr3, $vgpr2_vgpr3, 
$agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/149027
_______________________________________________
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] AMDGPU: Handle rewriting non-tied MFMA to AGPR form (PR #149027)

Reply via email to