https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/127485
>From d978a9636ea12626dd7650efffba63fe8a91e1a4 Mon Sep 17 00:00:00 2001 From: Matt Arsenault <matthew.arsena...@amd.com> Date: Mon, 17 Feb 2025 17:18:27 +0700 Subject: [PATCH] AMDGPU: Handle subregister uses in SIFoldOperands constant folding --- llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 57 +++++++++++-------- .../AMDGPU/constant-fold-imm-immreg.mir | 34 +++++++++++ 2 files changed, 67 insertions(+), 24 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 999553bfaff38..8492bb2c3518b 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -123,7 +123,7 @@ class SIFoldOperandsImpl { SmallVectorImpl<FoldCandidate> &FoldList, SmallVectorImpl<MachineInstr *> &CopiesToReplace) const; - MachineOperand *getImmOrMaterializedImm(MachineOperand &Op) const; + std::optional<int64_t> getImmOrMaterializedImm(MachineOperand &Op) const; bool tryConstantFoldOp(MachineInstr *MI) const; bool tryFoldCndMask(MachineInstr &MI) const; bool tryFoldZeroHighBits(MachineInstr &MI) const; @@ -1298,21 +1298,22 @@ static void mutateCopyOp(MachineInstr &MI, const MCInstrDesc &NewDesc) { MI.removeOperand(I); } -MachineOperand * +std::optional<int64_t> SIFoldOperandsImpl::getImmOrMaterializedImm(MachineOperand &Op) const { - // If this has a subregister, it obviously is a register source. - if (!Op.isReg() || Op.getSubReg() != AMDGPU::NoSubRegister || - !Op.getReg().isVirtual()) - return &Op; + if (Op.isImm()) + return Op.getImm(); - MachineInstr *Def = MRI->getVRegDef(Op.getReg()); + if (!Op.isReg() || !Op.getReg().isVirtual()) + return std::nullopt; + + const MachineInstr *Def = MRI->getVRegDef(Op.getReg()); if (Def && Def->isMoveImmediate()) { - MachineOperand &ImmSrc = Def->getOperand(1); + const MachineOperand &ImmSrc = Def->getOperand(1); if (ImmSrc.isImm()) - return &ImmSrc; + return TII->extractSubregFromImm(ImmSrc.getImm(), Op.getSubReg()); } - return &Op; + return std::nullopt; } // Try to simplify operations with a constant that may appear after instruction @@ -1327,12 +1328,14 @@ bool SIFoldOperandsImpl::tryConstantFoldOp(MachineInstr *MI) const { int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); if (Src0Idx == -1) return false; - MachineOperand *Src0 = getImmOrMaterializedImm(MI->getOperand(Src0Idx)); + + MachineOperand *Src0 = &MI->getOperand(Src0Idx); + std::optional<int64_t> Src0Imm = getImmOrMaterializedImm(*Src0); if ((Opc == AMDGPU::V_NOT_B32_e64 || Opc == AMDGPU::V_NOT_B32_e32 || Opc == AMDGPU::S_NOT_B32) && - Src0->isImm()) { - MI->getOperand(1).ChangeToImmediate(~Src0->getImm()); + Src0Imm) { + MI->getOperand(1).ChangeToImmediate(~*Src0Imm); mutateCopyOp(*MI, TII->get(getMovOpc(Opc == AMDGPU::S_NOT_B32))); return true; } @@ -1340,17 +1343,19 @@ bool SIFoldOperandsImpl::tryConstantFoldOp(MachineInstr *MI) const { int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1); if (Src1Idx == -1) return false; - MachineOperand *Src1 = getImmOrMaterializedImm(MI->getOperand(Src1Idx)); - if (!Src0->isImm() && !Src1->isImm()) + MachineOperand *Src1 = &MI->getOperand(Src1Idx); + std::optional<int64_t> Src1Imm = getImmOrMaterializedImm(*Src1); + + if (!Src0Imm && !Src1Imm) return false; // and k0, k1 -> v_mov_b32 (k0 & k1) // or k0, k1 -> v_mov_b32 (k0 | k1) // xor k0, k1 -> v_mov_b32 (k0 ^ k1) - if (Src0->isImm() && Src1->isImm()) { + if (Src0Imm && Src1Imm) { int32_t NewImm; - if (!evalBinaryInstruction(Opc, NewImm, Src0->getImm(), Src1->getImm())) + if (!evalBinaryInstruction(Opc, NewImm, *Src0Imm, *Src1Imm)) return false; bool IsSGPR = TRI->isSGPRReg(*MRI, MI->getOperand(0).getReg()); @@ -1366,12 +1371,13 @@ bool SIFoldOperandsImpl::tryConstantFoldOp(MachineInstr *MI) const { if (!MI->isCommutable()) return false; - if (Src0->isImm() && !Src1->isImm()) { + if (Src0Imm && !Src1Imm) { std::swap(Src0, Src1); std::swap(Src0Idx, Src1Idx); + std::swap(Src0Imm, Src1Imm); } - int32_t Src1Val = static_cast<int32_t>(Src1->getImm()); + int32_t Src1Val = static_cast<int32_t>(*Src1Imm); if (Opc == AMDGPU::V_OR_B32_e64 || Opc == AMDGPU::V_OR_B32_e32 || Opc == AMDGPU::S_OR_B32) { @@ -1428,9 +1434,12 @@ bool SIFoldOperandsImpl::tryFoldCndMask(MachineInstr &MI) const { MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0); MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1); if (!Src1->isIdenticalTo(*Src0)) { - auto *Src0Imm = getImmOrMaterializedImm(*Src0); - auto *Src1Imm = getImmOrMaterializedImm(*Src1); - if (!Src1Imm->isIdenticalTo(*Src0Imm)) + std::optional<int64_t> Src1Imm = getImmOrMaterializedImm(*Src1); + if (!Src1Imm) + return false; + + std::optional<int64_t> Src0Imm = getImmOrMaterializedImm(*Src0); + if (!Src0Imm || *Src0Imm != *Src1Imm) return false; } @@ -1463,8 +1472,8 @@ bool SIFoldOperandsImpl::tryFoldZeroHighBits(MachineInstr &MI) const { MI.getOpcode() != AMDGPU::V_AND_B32_e32) return false; - MachineOperand *Src0 = getImmOrMaterializedImm(MI.getOperand(1)); - if (!Src0->isImm() || Src0->getImm() != 0xffff) + std::optional<int64_t> Src0Imm = getImmOrMaterializedImm(MI.getOperand(1)); + if (!Src0Imm || *Src0Imm != 0xffff) return false; Register Src1 = MI.getOperand(2).getReg(); diff --git a/llvm/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir b/llvm/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir index 39b5076ebe5ac..807eaf2160b3c 100644 --- a/llvm/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir +++ b/llvm/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir @@ -927,3 +927,37 @@ body: | S_ENDPGM 0, implicit %3 ... + +--- +name: constant_s_xor_b32_uses_subreg +tracksRegLiveness: true +body: | + bb.0: + ; GCN-LABEL: name: constant_s_xor_b32_uses_subreg + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 47 + ; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 0 + ; GCN-NEXT: S_ENDPGM 0, implicit [[S_MOV_B32_]], implicit [[S_MOV_B32_1]] + %0:sreg_64 = S_MOV_B64 32 + %1:sreg_64 = S_MOV_B64 15 + %2:sgpr_32 = S_XOR_B32 %0.sub0, %1.sub0, implicit-def dead $scc + %3:sgpr_32 = S_XOR_B32 %0.sub1, %1.sub1, implicit-def dead $scc + S_ENDPGM 0, implicit %2, implicit %3 + +... + +--- +name: constant_v_or_b32_uses_subreg +tracksRegLiveness: true +body: | + bb.0: + ; GCN-LABEL: name: constant_v_or_b32_uses_subreg + ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 268435455, implicit $exec + ; GCN-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]], implicit [[V_MOV_B32_e32_1]] + %0:vreg_64 = V_MOV_B64_PSEUDO 18446744069683019775, implicit $exec + %1:vreg_64 = V_MOV_B64_PSEUDO 0, implicit $exec + %2:vgpr_32 = V_OR_B32_e32 %0.sub0, %1.sub0, implicit $exec + %3:vgpr_32 = V_OR_B32_e32 %0.sub1, %1.sub1, implicit $exec + S_ENDPGM 0, implicit %2, implicit %3 + +... _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits