https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/155595

>From a6e2e0d83c2724f04313372df0deda5d1f889ed6 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <matthew.arsena...@amd.com>
Date: Wed, 27 Aug 2025 19:39:38 +0900
Subject: [PATCH 1/2] AMDGPU: Fix DPP combiner using isOperandLegal on
 incomplete inst

It is not safe to use isOperandLegal on an instruction that does
not have a complete set of operands. Unforunately the APIs are
not set up in a convenient way to speculatively check if an instruction
will be legal in a hypothetical instruction. Build all the operands
and then verify they are legal after. This is clumsy, we should have
a more direct check for will these operands give a legal instruction.

This seems to fix a missed optimization in the gfx11 test. The
fold was firing for gfx1150, but not gfx1100. Both should support
vop3 literals so I'm not sure why it wasn't working before.
---
 llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp      | 36 ++++++++++---------
 .../test/CodeGen/AMDGPU/dpp_combine_gfx11.mir | 18 ++++------
 2 files changed, 27 insertions(+), 27 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp 
b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
index 184929a5a50f6..81582613e93ae 100644
--- a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
@@ -250,7 +250,7 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr 
&OrigMI,
       ++NumOperands;
     }
     if (auto *SDst = TII->getNamedOperand(OrigMI, AMDGPU::OpName::sdst)) {
-      if (TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, SDst)) {
+      if (AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::sdst)) {
         DPPInst.add(*SDst);
         ++NumOperands;
       }
@@ -296,11 +296,7 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr 
&OrigMI,
     auto *Src0 = TII->getNamedOperand(MovMI, AMDGPU::OpName::src0);
     assert(Src0);
     int Src0Idx = NumOperands;
-    if (!TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src0)) {
-      LLVM_DEBUG(dbgs() << "  failed: src0 is illegal\n");
-      Fail = true;
-      break;
-    }
+
     DPPInst.add(*Src0);
     DPPInst->getOperand(NumOperands).setIsKill(false);
     ++NumOperands;
@@ -319,7 +315,8 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr 
&OrigMI,
     }
     auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1);
     if (Src1) {
-      int OpNum = NumOperands;
+      assert(AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::src1) &&
+             "dpp version of instruction missing src1");
       // If subtarget does not support SGPRs for src1 operand then the
       // requirements are the same as for src0. We check src0 instead because
       // pseudos are shared between subtargets and allow SGPR for src1 on all.
@@ -327,13 +324,8 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr 
&OrigMI,
         assert(getOperandSize(*DPPInst, Src0Idx, *MRI) ==
                    getOperandSize(*DPPInst, NumOperands, *MRI) &&
                "Src0 and Src1 operands should have the same size");
-        OpNum = Src0Idx;
-      }
-      if (!TII->isOperandLegal(*DPPInst.getInstr(), OpNum, Src1)) {
-        LLVM_DEBUG(dbgs() << "  failed: src1 is illegal\n");
-        Fail = true;
-        break;
       }
+
       DPPInst.add(*Src1);
       ++NumOperands;
     }
@@ -349,9 +341,8 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr 
&OrigMI,
     }
     auto *Src2 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src2);
     if (Src2) {
-      if (!TII->getNamedOperand(*DPPInst.getInstr(), AMDGPU::OpName::src2) ||
-          !TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src2)) {
-        LLVM_DEBUG(dbgs() << "  failed: src2 is illegal\n");
+      if (!AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::src2)) {
+        LLVM_DEBUG(dbgs() << "  failed: dpp does not have src2\n");
         Fail = true;
         break;
       }
@@ -431,6 +422,19 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr 
&OrigMI,
     DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask));
     DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask));
     DPPInst.addImm(CombBCZ ? 1 : 0);
+
+    for (AMDGPU::OpName Op :
+         {AMDGPU::OpName::src0, AMDGPU::OpName::src1, AMDGPU::OpName::src2}) {
+      int OpIdx = AMDGPU::getNamedOperandIdx(DPPOp, Op);
+      if (OpIdx == -1)
+        break;
+
+      if (!TII->isOperandLegal(*DPPInst, OpIdx)) {
+        LLVM_DEBUG(dbgs() << "  failed: src operand is illegal\n");
+        Fail = true;
+        break;
+      }
+    }
   } while (false);
 
   if (Fail) {
diff --git a/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir 
b/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
index fb20e72a77103..3725384e885ee 100644
--- a/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
+++ b/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
@@ -1,6 +1,6 @@
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 
-run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s 
-check-prefixes=GCN,GFX1100
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -mattr=-real-true16 
-run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s 
-check-prefixes=GCN,GFX1150
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 
-run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s 
-check-prefixes=GCN,GFX1150
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 
-run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s 
-check-prefixes=GCN
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -mattr=-real-true16 
-run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s 
-check-prefixes=GCN
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 
-run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s 
-check-prefixes=GCN
 
 ---
 
@@ -8,8 +8,7 @@
 # GCN: %6:vgpr_32, %7:sreg_32_xm0_xexec = V_SUBBREV_U32_e64_dpp %3, %0, %1, 
%5, 1, 1, 15, 15, 1, implicit $exec
 # GCN: %8:vgpr_32 = V_CVT_PK_U8_F32_e64_dpp %3, 4, %0, 2, %2, 2, %1, 1, 1, 15, 
15, 1, implicit $mode, implicit $exec
 # GCN: %10:vgpr_32 = V_MED3_F32_e64 0, %9, 0, %0, 0, 12345678, 0, 0, implicit 
$mode, implicit $exec
-# GFX1100: %12:vgpr_32 = V_MED3_F32_e64 0, %11, 0, 2, 0, %7, 0, 0, implicit 
$mode, implicit $exec
-# GFX1150: %12:vgpr_32 = V_MED3_F32_e64_dpp %3, 0, %1, 0, 2, 0, %7, 0, 0, 1, 
15, 15, 1, implicit $mode, implicit $exec
+# GCN: %12:vgpr_32 = V_MED3_F32_e64_dpp %3, 0, %1, 0, 2, 0, %7, 0, 0, 1, 15, 
15, 1, implicit $mode, implicit $exec
 name: vop3
 tracksRegLiveness: true
 body:             |
@@ -39,12 +38,9 @@ body:             |
 
 # GCN-LABEL: name: vop3_sgpr_src1
 # GCN: %6:vgpr_32 = V_MED3_F32_e64_dpp %4, 0, %0, 0, %1, 0, %2, 0, 0, 1, 15, 
15, 1, implicit $mode, implicit $exec
-# GFX1100: %8:vgpr_32 = V_MED3_F32_e64 0, %7, 0, %2, 0, %1, 0, 0, implicit 
$mode, implicit $exec
-# GFX1150: %8:vgpr_32 = V_MED3_F32_e64_dpp %4, 0, %0, 0, %2, 0, %1, 0, 0, 1, 
15, 15, 1, implicit $mode, implicit $exec
-# GFX1100: %10:vgpr_32 = V_MED3_F32_e64 0, %9, 0, %2, 0, %3, 0, 0, implicit 
$mode, implicit $exec
-# GFX1150: %10:vgpr_32 = V_MED3_F32_e64_dpp %4, 0, %0, 0, %2, 0, %3, 0, 0, 1, 
15, 15, 1, implicit $mode, implicit $exec
-# GFX1100: %12:vgpr_32 = V_MED3_F32_e64 0, %11, 0, 42, 0, %2, 0, 0, implicit 
$mode, implicit $exec
-# GFX1150: %12:vgpr_32 = V_MED3_F32_e64_dpp %4, 0, %0, 0, 42, 0, %2, 0, 0, 1, 
15, 15, 1, implicit $mode, implicit $exec
+# GCN: %8:vgpr_32 = V_MED3_F32_e64_dpp %4, 0, %0, 0, %2, 0, %1, 0, 0, 1, 15, 
15, 1, implicit $mode, implicit $exec
+# GCN: %10:vgpr_32 = V_MED3_F32_e64_dpp %4, 0, %0, 0, %2, 0, %3, 0, 0, 1, 15, 
15, 1, implicit $mode, implicit $exec
+# GCN: %12:vgpr_32 = V_MED3_F32_e64_dpp %4, 0, %0, 0, 42, 0, %2, 0, 0, 1, 15, 
15, 1, implicit $mode, implicit $exec
 # GCN: %14:vgpr_32 = V_MED3_F32_e64 0, %13, 0, 4242, 0, %2, 0, 0, implicit 
$mode, implicit $exec
 name: vop3_sgpr_src1
 tracksRegLiveness: true

>From b8551c4fffa838d04891c30b2b444a849ff6dcb3 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <matthew.arsena...@amd.com>
Date: Thu, 28 Aug 2025 13:41:28 +0900
Subject: [PATCH 2/2] Improve debug printing and add comment

---
 llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp 
b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
index 81582613e93ae..06186ab4e1b2d 100644
--- a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
@@ -423,14 +423,19 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr 
&OrigMI,
     DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask));
     DPPInst.addImm(CombBCZ ? 1 : 0);
 
-    for (AMDGPU::OpName Op :
-         {AMDGPU::OpName::src0, AMDGPU::OpName::src1, AMDGPU::OpName::src2}) {
-      int OpIdx = AMDGPU::getNamedOperandIdx(DPPOp, Op);
+    constexpr AMDGPU::OpName Srcs[] = {
+        AMDGPU::OpName::src0, AMDGPU::OpName::src1, AMDGPU::OpName::src2};
+
+    // FIXME: isOperandLegal expects to operate on an completely built
+    // instruction. We should have better legality APIs to check if the
+    // candidate operands will be legal without building the instruction first.
+    for (auto [I, OpName] : enumerate(Srcs)) {
+      int OpIdx = AMDGPU::getNamedOperandIdx(DPPOp, OpName);
       if (OpIdx == -1)
         break;
 
       if (!TII->isOperandLegal(*DPPInst, OpIdx)) {
-        LLVM_DEBUG(dbgs() << "  failed: src operand is illegal\n");
+        LLVM_DEBUG(dbgs() << "  failed: src" << I << " operand is illegal\n");
         Fail = true;
         break;
       }

_______________________________________________
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

Reply via email to