https://github.com/petar-avramovic updated https://github.com/llvm/llvm-project/pull/112882
>From 4675f79f28222cef60d1607acb1b682ca3363eb6 Mon Sep 17 00:00:00 2001 From: Petar Avramovic <petar.avramo...@amd.com> Date: Wed, 30 Oct 2024 15:37:59 +0100 Subject: [PATCH] AMDGPU/GlobalISel: RegBankLegalize rules for load Add IDs for bit width that cover multiple LLTs: B32 B64 etc. "Predicate" wrapper class for bool predicate functions used to write pretty rules. Predicates can be combined using &&, || and !. Lowering for splitting and widening loads. Write rules for loads to not change existing mir tests from old regbankselect. --- .../AMDGPU/AMDGPURegBankLegalizeHelper.cpp | 287 +++++++++++++++- .../AMDGPU/AMDGPURegBankLegalizeHelper.h | 5 + .../AMDGPU/AMDGPURegBankLegalizeRules.cpp | 309 ++++++++++++++++- .../AMDGPU/AMDGPURegBankLegalizeRules.h | 65 +++- .../AMDGPU/GlobalISel/regbankselect-load.mir | 320 +++++++++++++++--- .../GlobalISel/regbankselect-zextload.mir | 9 +- 6 files changed, 929 insertions(+), 66 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp index 15ccf1a38af9a5..19d8d466e3b12e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp @@ -36,6 +36,83 @@ void RegBankLegalizeHelper::findRuleAndApplyMapping(MachineInstr &MI) { lower(MI, Mapping, WaterfallSgprs); } +void RegBankLegalizeHelper::splitLoad(MachineInstr &MI, + ArrayRef<LLT> LLTBreakdown, LLT MergeTy) { + MachineFunction &MF = B.getMF(); + assert(MI.getNumMemOperands() == 1); + MachineMemOperand &BaseMMO = **MI.memoperands_begin(); + Register Dst = MI.getOperand(0).getReg(); + const RegisterBank *DstRB = MRI.getRegBankOrNull(Dst); + Register Base = MI.getOperand(1).getReg(); + LLT PtrTy = MRI.getType(Base); + const RegisterBank *PtrRB = MRI.getRegBankOrNull(Base); + LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits()); + SmallVector<Register, 4> LoadPartRegs; + + unsigned ByteOffset = 0; + for (LLT PartTy : LLTBreakdown) { + Register BasePlusOffset; + if (ByteOffset == 0) { + BasePlusOffset = Base; + } else { + auto Offset = B.buildConstant({PtrRB, OffsetTy}, ByteOffset); + BasePlusOffset = B.buildPtrAdd({PtrRB, PtrTy}, Base, Offset).getReg(0); + } + auto *OffsetMMO = MF.getMachineMemOperand(&BaseMMO, ByteOffset, PartTy); + auto LoadPart = B.buildLoad({DstRB, PartTy}, BasePlusOffset, *OffsetMMO); + LoadPartRegs.push_back(LoadPart.getReg(0)); + ByteOffset += PartTy.getSizeInBytes(); + } + + if (!MergeTy.isValid()) { + // Loads are of same size, concat or merge them together. + B.buildMergeLikeInstr(Dst, LoadPartRegs); + } else { + // Loads are not all of same size, need to unmerge them to smaller pieces + // of MergeTy type, then merge pieces to Dst. + SmallVector<Register, 4> MergeTyParts; + for (Register Reg : LoadPartRegs) { + if (MRI.getType(Reg) == MergeTy) { + MergeTyParts.push_back(Reg); + } else { + auto Unmerge = B.buildUnmerge({DstRB, MergeTy}, Reg); + for (unsigned i = 0; i < Unmerge->getNumOperands() - 1; ++i) + MergeTyParts.push_back(Unmerge.getReg(i)); + } + } + B.buildMergeLikeInstr(Dst, MergeTyParts); + } + MI.eraseFromParent(); +} + +void RegBankLegalizeHelper::widenLoad(MachineInstr &MI, LLT WideTy, + LLT MergeTy) { + MachineFunction &MF = B.getMF(); + assert(MI.getNumMemOperands() == 1); + MachineMemOperand &BaseMMO = **MI.memoperands_begin(); + Register Dst = MI.getOperand(0).getReg(); + const RegisterBank *DstRB = MRI.getRegBankOrNull(Dst); + Register Base = MI.getOperand(1).getReg(); + + MachineMemOperand *WideMMO = MF.getMachineMemOperand(&BaseMMO, 0, WideTy); + auto WideLoad = B.buildLoad({DstRB, WideTy}, Base, *WideMMO); + + if (WideTy.isScalar()) { + B.buildTrunc(Dst, WideLoad); + } else { + SmallVector<Register, 4> MergeTyParts; + auto Unmerge = B.buildUnmerge({DstRB, MergeTy}, WideLoad); + + LLT DstTy = MRI.getType(Dst); + unsigned NumElts = DstTy.getSizeInBits() / MergeTy.getSizeInBits(); + for (unsigned i = 0; i < NumElts; ++i) { + MergeTyParts.push_back(Unmerge.getReg(i)); + } + B.buildMergeLikeInstr(Dst, MergeTyParts); + } + MI.eraseFromParent(); +} + void RegBankLegalizeHelper::lower(MachineInstr &MI, const RegBankLLTMapping &Mapping, SmallSet<Register, 4> &WaterfallSgprs) { @@ -114,6 +191,50 @@ void RegBankLegalizeHelper::lower(MachineInstr &MI, MI.eraseFromParent(); break; } + case SplitLoad: { + LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); + unsigned Size = DstTy.getSizeInBits(); + // Even split to 128-bit loads + if (Size > 128) { + LLT B128; + if (DstTy.isVector()) { + LLT EltTy = DstTy.getElementType(); + B128 = LLT::fixed_vector(128 / EltTy.getSizeInBits(), EltTy); + } else { + B128 = LLT::scalar(128); + } + if (Size / 128 == 2) + splitLoad(MI, {B128, B128}); + if (Size / 128 == 4) + splitLoad(MI, {B128, B128, B128, B128}); + } + // 64 and 32 bit load + else if (DstTy == S96) + splitLoad(MI, {S64, S32}, S32); + else if (DstTy == V3S32) + splitLoad(MI, {V2S32, S32}, S32); + else if (DstTy == V6S16) + splitLoad(MI, {V4S16, V2S16}, V2S16); + else { + MI.dump(); + llvm_unreachable("SplitLoad type not supported"); + } + break; + } + case WidenLoad: { + LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); + if (DstTy == S96) + widenLoad(MI, S128); + else if (DstTy == V3S32) + widenLoad(MI, V4S32, S32); + else if (DstTy == V6S16) + widenLoad(MI, V8S16, V2S16); + else { + MI.dump(); + llvm_unreachable("WidenLoad type not supported"); + } + break; + } } // TODO: executeInWaterfallLoop(... WaterfallSgprs) @@ -137,13 +258,73 @@ LLT RegBankLegalizeHelper::getTyFromID(RegBankLLTMapingApplyID ID) { case Sgpr64: case Vgpr64: return LLT::scalar(64); - + case SgprP1: + case VgprP1: + return LLT::pointer(1, 64); + case SgprP3: + case VgprP3: + return LLT::pointer(3, 32); + case SgprP4: + case VgprP4: + return LLT::pointer(4, 64); + case SgprP5: + case VgprP5: + return LLT::pointer(5, 32); case SgprV4S32: case VgprV4S32: case UniInVgprV4S32: return LLT::fixed_vector(4, 32); - case VgprP1: - return LLT::pointer(1, 64); + default: + return LLT(); + } +} + +LLT RegBankLegalizeHelper::getBTyFromID(RegBankLLTMapingApplyID ID, LLT Ty) { + switch (ID) { + case SgprB32: + case VgprB32: + case UniInVgprB32: + if (Ty == LLT::scalar(32) || Ty == LLT::fixed_vector(2, 16) || + Ty == LLT::pointer(3, 32) || Ty == LLT::pointer(5, 32) || + Ty == LLT::pointer(6, 32)) + return Ty; + return LLT(); + case SgprB64: + case VgprB64: + case UniInVgprB64: + if (Ty == LLT::scalar(64) || Ty == LLT::fixed_vector(2, 32) || + Ty == LLT::fixed_vector(4, 16) || Ty == LLT::pointer(0, 64) || + Ty == LLT::pointer(1, 64) || Ty == LLT::pointer(4, 64)) + return Ty; + return LLT(); + case SgprB96: + case VgprB96: + case UniInVgprB96: + if (Ty == LLT::scalar(96) || Ty == LLT::fixed_vector(3, 32) || + Ty == LLT::fixed_vector(6, 16)) + return Ty; + return LLT(); + case SgprB128: + case VgprB128: + case UniInVgprB128: + if (Ty == LLT::scalar(128) || Ty == LLT::fixed_vector(4, 32) || + Ty == LLT::fixed_vector(2, 64)) + return Ty; + return LLT(); + case SgprB256: + case VgprB256: + case UniInVgprB256: + if (Ty == LLT::scalar(256) || Ty == LLT::fixed_vector(8, 32) || + Ty == LLT::fixed_vector(4, 64) || Ty == LLT::fixed_vector(16, 16)) + return Ty; + return LLT(); + case SgprB512: + case VgprB512: + case UniInVgprB512: + if (Ty == LLT::scalar(512) || Ty == LLT::fixed_vector(16, 32) || + Ty == LLT::fixed_vector(8, 64)) + return Ty; + return LLT(); default: return LLT(); } @@ -158,10 +339,26 @@ RegBankLegalizeHelper::getRBFromID(RegBankLLTMapingApplyID ID) { case Sgpr16: case Sgpr32: case Sgpr64: + case SgprP1: + case SgprP3: + case SgprP4: + case SgprP5: case SgprV4S32: + case SgprB32: + case SgprB64: + case SgprB96: + case SgprB128: + case SgprB256: + case SgprB512: case UniInVcc: case UniInVgprS32: case UniInVgprV4S32: + case UniInVgprB32: + case UniInVgprB64: + case UniInVgprB96: + case UniInVgprB128: + case UniInVgprB256: + case UniInVgprB512: case Sgpr32Trunc: case Sgpr32AExt: case Sgpr32AExtBoolInReg: @@ -171,7 +368,16 @@ RegBankLegalizeHelper::getRBFromID(RegBankLLTMapingApplyID ID) { case Vgpr32: case Vgpr64: case VgprP1: + case VgprP3: + case VgprP4: + case VgprP5: case VgprV4S32: + case VgprB32: + case VgprB64: + case VgprB96: + case VgprB128: + case VgprB256: + case VgprB512: return VgprRB; default: @@ -197,17 +403,42 @@ void RegBankLegalizeHelper::applyMappingDst( case Sgpr16: case Sgpr32: case Sgpr64: + case SgprP1: + case SgprP3: + case SgprP4: + case SgprP5: case SgprV4S32: case Vgpr32: case Vgpr64: case VgprP1: + case VgprP3: + case VgprP4: + case VgprP5: case VgprV4S32: { assert(Ty == getTyFromID(MethodIDs[OpIdx])); assert(RB == getRBFromID(MethodIDs[OpIdx])); break; } - // uniform in vcc/vgpr: scalars and vectors + // sgpr and vgpr B-types + case SgprB32: + case SgprB64: + case SgprB96: + case SgprB128: + case SgprB256: + case SgprB512: + case VgprB32: + case VgprB64: + case VgprB96: + case VgprB128: + case VgprB256: + case VgprB512: { + assert(Ty == getBTyFromID(MethodIDs[OpIdx], Ty)); + assert(RB == getRBFromID(MethodIDs[OpIdx])); + break; + } + + // uniform in vcc/vgpr: scalars, vectors and B-types case UniInVcc: { assert(Ty == S1); assert(RB == SgprRB); @@ -227,6 +458,20 @@ void RegBankLegalizeHelper::applyMappingDst( buildReadAnyLane(B, Reg, NewVgprDst, RBI); break; } + case UniInVgprB32: + case UniInVgprB64: + case UniInVgprB96: + case UniInVgprB128: + case UniInVgprB256: + case UniInVgprB512: { + assert(Ty == getBTyFromID(MethodIDs[OpIdx], Ty)); + assert(RB == SgprRB); + + Register NewVgprDst = MRI.createVirtualRegister({VgprRB, Ty}); + Op.setReg(NewVgprDst); + AMDGPU::buildReadAnyLane(B, Reg, NewVgprDst, RBI); + break; + } // sgpr trunc case Sgpr32Trunc: { @@ -278,16 +523,34 @@ void RegBankLegalizeHelper::applyMappingSrc( case Sgpr16: case Sgpr32: case Sgpr64: + case SgprP1: + case SgprP3: + case SgprP4: + case SgprP5: case SgprV4S32: { assert(Ty == getTyFromID(MethodIDs[i])); assert(RB == getRBFromID(MethodIDs[i])); break; } + // sgpr B-types + case SgprB32: + case SgprB64: + case SgprB96: + case SgprB128: + case SgprB256: + case SgprB512: { + assert(Ty == getBTyFromID(MethodIDs[i], Ty)); + assert(RB == getRBFromID(MethodIDs[i])); + break; + } // vgpr scalars, pointers and vectors case Vgpr32: case Vgpr64: case VgprP1: + case VgprP3: + case VgprP4: + case VgprP5: case VgprV4S32: { assert(Ty == getTyFromID(MethodIDs[i])); if (RB != VgprRB) { @@ -296,6 +559,20 @@ void RegBankLegalizeHelper::applyMappingSrc( } break; } + // vgpr B-types + case VgprB32: + case VgprB64: + case VgprB96: + case VgprB128: + case VgprB256: + case VgprB512: { + assert(Ty == getBTyFromID(MethodIDs[i], Ty)); + if (RB != VgprRB) { + auto CopyToVgpr = B.buildCopy({VgprRB, Ty}, Reg); + Op.setReg(CopyToVgpr.getReg(0)); + } + break; + } // sgpr and vgpr scalars with extend case Sgpr32AExt: { @@ -368,7 +645,7 @@ void RegBankLegalizeHelper::applyMappingPHI(MachineInstr &MI) { // We accept all types that can fit in some register class. // Uniform G_PHIs have all sgpr registers. // Divergent G_PHIs have vgpr dst but inputs can be sgpr or vgpr. - if (Ty == LLT::scalar(32)) { + if (Ty == LLT::scalar(32) || Ty == LLT::pointer(4, 64)) { return; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h index 7b23d222a1e756..9f6760cf5e6b76 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h @@ -90,6 +90,7 @@ class RegBankLegalizeHelper { SmallSet<Register, 4> &SgprOperandRegs); LLT getTyFromID(RegBankLLTMapingApplyID ID); + LLT getBTyFromID(RegBankLLTMapingApplyID ID, LLT Ty); const RegisterBank *getRBFromID(RegBankLLTMapingApplyID ID); @@ -102,6 +103,10 @@ class RegBankLegalizeHelper { const SmallVectorImpl<RegBankLLTMapingApplyID> &MethodIDs, SmallSet<Register, 4> &SgprWaterfallOperandRegs); + void splitLoad(MachineInstr &MI, ArrayRef<LLT> LLTBreakdown, + LLT MergeTy = LLT()); + void widenLoad(MachineInstr &MI, LLT WideTy, LLT MergeTy = LLT()); + void lower(MachineInstr &MI, const RegBankLLTMapping &Mapping, SmallSet<Register, 4> &SgprWaterfallOperandRegs); }; diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp index c54e5ff579f1d9..d90822750f76fe 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp @@ -14,9 +14,11 @@ //===----------------------------------------------------------------------===// #include "AMDGPURegBankLegalizeRules.h" +#include "AMDGPUInstrInfo.h" #include "GCNSubtarget.h" #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" #include "llvm/IR/IntrinsicsAMDGPU.h" +#include "llvm/Support/AMDGPUAddrSpace.h" using namespace llvm; using namespace AMDGPU; @@ -47,6 +49,24 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID, return MRI.getType(Reg) == LLT::scalar(64); case P1: return MRI.getType(Reg) == LLT::pointer(1, 64); + case P3: + return MRI.getType(Reg) == LLT::pointer(3, 32); + case P4: + return MRI.getType(Reg) == LLT::pointer(4, 64); + case P5: + return MRI.getType(Reg) == LLT::pointer(5, 32); + case B32: + return MRI.getType(Reg).getSizeInBits() == 32; + case B64: + return MRI.getType(Reg).getSizeInBits() == 64; + case B96: + return MRI.getType(Reg).getSizeInBits() == 96; + case B128: + return MRI.getType(Reg).getSizeInBits() == 128; + case B256: + return MRI.getType(Reg).getSizeInBits() == 256; + case B512: + return MRI.getType(Reg).getSizeInBits() == 512; case UniS1: return MRI.getType(Reg) == LLT::scalar(1) && MUI.isUniform(Reg); @@ -56,6 +76,26 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID, return MRI.getType(Reg) == LLT::scalar(32) && MUI.isUniform(Reg); case UniS64: return MRI.getType(Reg) == LLT::scalar(64) && MUI.isUniform(Reg); + case UniP1: + return MRI.getType(Reg) == LLT::pointer(1, 64) && MUI.isUniform(Reg); + case UniP3: + return MRI.getType(Reg) == LLT::pointer(3, 32) && MUI.isUniform(Reg); + case UniP4: + return MRI.getType(Reg) == LLT::pointer(4, 64) && MUI.isUniform(Reg); + case UniP5: + return MRI.getType(Reg) == LLT::pointer(5, 32) && MUI.isUniform(Reg); + case UniB32: + return MRI.getType(Reg).getSizeInBits() == 32 && MUI.isUniform(Reg); + case UniB64: + return MRI.getType(Reg).getSizeInBits() == 64 && MUI.isUniform(Reg); + case UniB96: + return MRI.getType(Reg).getSizeInBits() == 96 && MUI.isUniform(Reg); + case UniB128: + return MRI.getType(Reg).getSizeInBits() == 128 && MUI.isUniform(Reg); + case UniB256: + return MRI.getType(Reg).getSizeInBits() == 256 && MUI.isUniform(Reg); + case UniB512: + return MRI.getType(Reg).getSizeInBits() == 512 && MUI.isUniform(Reg); case DivS1: return MRI.getType(Reg) == LLT::scalar(1) && MUI.isDivergent(Reg); @@ -65,6 +105,24 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID, return MRI.getType(Reg) == LLT::scalar(64) && MUI.isDivergent(Reg); case DivP1: return MRI.getType(Reg) == LLT::pointer(1, 64) && MUI.isDivergent(Reg); + case DivP3: + return MRI.getType(Reg) == LLT::pointer(3, 32) && MUI.isDivergent(Reg); + case DivP4: + return MRI.getType(Reg) == LLT::pointer(4, 64) && MUI.isDivergent(Reg); + case DivP5: + return MRI.getType(Reg) == LLT::pointer(5, 32) && MUI.isDivergent(Reg); + case DivB32: + return MRI.getType(Reg).getSizeInBits() == 32 && MUI.isDivergent(Reg); + case DivB64: + return MRI.getType(Reg).getSizeInBits() == 64 && MUI.isDivergent(Reg); + case DivB96: + return MRI.getType(Reg).getSizeInBits() == 96 && MUI.isDivergent(Reg); + case DivB128: + return MRI.getType(Reg).getSizeInBits() == 128 && MUI.isDivergent(Reg); + case DivB256: + return MRI.getType(Reg).getSizeInBits() == 256 && MUI.isDivergent(Reg); + case DivB512: + return MRI.getType(Reg).getSizeInBits() == 512 && MUI.isDivergent(Reg); case _: return true; @@ -123,6 +181,22 @@ UniformityLLTOpPredicateID LLTToId(LLT Ty) { return _; } +UniformityLLTOpPredicateID LLTToBId(LLT Ty) { + if (Ty == LLT::scalar(32) || Ty == LLT::fixed_vector(2, 16) || + Ty == LLT::pointer(3, 32) || Ty == LLT::pointer(5, 32) || + Ty == LLT::pointer(6, 32)) + return B32; + if (Ty == LLT::scalar(64) || Ty == LLT::fixed_vector(2, 32) || + Ty == LLT::fixed_vector(4, 16) || Ty == LLT::pointer(1, 64) || + Ty == LLT::pointer(4, 64)) + return B64; + if (Ty == LLT::fixed_vector(3, 32)) + return B96; + if (Ty == LLT::fixed_vector(4, 32)) + return B128; + return _; +} + const RegBankLLTMapping & SetOfRulesForOpcode::findMappingForMI(const MachineInstr &MI, const MachineRegisterInfo &MRI, @@ -133,7 +207,12 @@ SetOfRulesForOpcode::findMappingForMI(const MachineInstr &MI, // returned which results in failure, does not search "Slow Rules". if (FastTypes != NoFastRules) { Register Reg = MI.getOperand(0).getReg(); - int Slot = getFastPredicateSlot(LLTToId(MRI.getType(Reg))); + int Slot; + if (FastTypes == StandardB) + Slot = getFastPredicateSlot(LLTToBId(MRI.getType(Reg))); + else + Slot = getFastPredicateSlot(LLTToId(MRI.getType(Reg))); + if (Slot != -1) return MUI.isUniform(Reg) ? Uni[Slot] : Div[Slot]; } @@ -180,7 +259,22 @@ int SetOfRulesForOpcode::getFastPredicateSlot( default: return -1; } - case Vector: + } + case StandardB: { + switch (Ty) { + case B32: + return 0; + case B64: + return 1; + case B96: + return 2; + case B128: + return 3; + default: + return -1; + } + } + case Vector: { switch (Ty) { case S32: return 0; @@ -232,6 +326,126 @@ RegBankLegalizeRules::getRulesForOpc(MachineInstr &MI) const { return GRules.at(GRulesAlias.at(Opc)); } +// Syntactic sugar wrapper for predicate lambda that enables '&&', '||' and '!'. +class Predicate { +public: + struct Elt { + // Save formula composed of Pred, '&&', '||' and '!' as a jump table. + // Sink ! to Pred. For example !((A && !B) || C) -> (!A || B) && !C + // Sequences of && and || will be represented by jumps, for example: + // (A && B && ... X) or (A && B && ... X) || Y + // A == true jump to B + // A == false jump to end or Y, result is A(false) or Y + // (A || B || ... X) or (A || B || ... X) && Y + // A == true jump to end or Y, result is B(true) or Y + // A == false jump to B + // Notice that when negating expression, we simply flip Neg on each Pred + // and swap TJumpOffset and FJumpOffset (&& becomes ||, || becomes &&). + std::function<bool(const MachineInstr &)> Pred; + bool Neg; // Neg of Pred is calculated before jump + unsigned TJumpOffset; + unsigned FJumpOffset; + }; + + SmallVector<Elt, 8> Expression; + + Predicate(std::function<bool(const MachineInstr &)> Pred) { + Expression.push_back({Pred, false, 1, 1}); + }; + + Predicate(SmallVectorImpl<Elt> &Expr) { Expression.swap(Expr); }; + + bool operator()(const MachineInstr &MI) const { + unsigned Idx = 0; + unsigned ResultIdx = Expression.size(); + bool Result; + do { + Result = Expression[Idx].Pred(MI); + Result = Expression[Idx].Neg ? !Result : Result; + if (Result) { + Idx += Expression[Idx].TJumpOffset; + } else { + Idx += Expression[Idx].FJumpOffset; + } + } while ((Idx != ResultIdx)); + + return Result; + }; + + Predicate operator!() { + SmallVector<Elt, 8> NegExpression; + for (Elt &ExprElt : Expression) { + NegExpression.push_back({ExprElt.Pred, !ExprElt.Neg, ExprElt.FJumpOffset, + ExprElt.TJumpOffset}); + } + return Predicate(NegExpression); + }; + + Predicate operator&&(Predicate &RHS) { + SmallVector<Elt, 8> AndExpression = Expression; + + unsigned RHSSize = RHS.Expression.size(); + unsigned ResultIdx = Expression.size(); + for (unsigned i = 0; i < ResultIdx; ++i) { + // LHS results in false, whole expression results in false. + if (i + AndExpression[i].FJumpOffset == ResultIdx) + AndExpression[i].FJumpOffset += RHSSize; + } + + AndExpression.append(RHS.Expression); + + return Predicate(AndExpression); + } + + Predicate operator&&(Predicate &&RHS) { + SmallVector<Elt, 8> AndExpression = Expression; + + unsigned RHSSize = RHS.Expression.size(); + unsigned ResultIdx = Expression.size(); + for (unsigned i = 0; i < ResultIdx; ++i) { + // LHS results in false, whole expression results in false. + if (i + AndExpression[i].FJumpOffset == ResultIdx) + AndExpression[i].FJumpOffset += RHSSize; + } + + AndExpression.append(RHS.Expression); + + return Predicate(AndExpression); + } + + Predicate operator||(Predicate &RHS) { + SmallVector<Elt, 8> OrExpression = Expression; + + unsigned RHSSize = RHS.Expression.size(); + unsigned ResultIdx = Expression.size(); + for (unsigned i = 0; i < ResultIdx; ++i) { + // LHS results in true, whole expression results in true. + if (i + OrExpression[i].TJumpOffset == ResultIdx) + OrExpression[i].TJumpOffset += RHSSize; + } + + OrExpression.append(RHS.Expression); + + return Predicate(OrExpression); + } + + Predicate operator||(Predicate &&RHS) { + SmallVector<Elt, 8> OrExpression = Expression; + + unsigned RHSSize = RHS.Expression.size(); + unsigned ResultIdx = Expression.size(); + for (unsigned i = 0; i < ResultIdx; ++i) { + // LHS results in true, whole expression results in true. + if (i + OrExpression[i].TJumpOffset == ResultIdx) + OrExpression[i].TJumpOffset += RHSSize; + } + + OrExpression.append(RHS.Expression); + + return Predicate(OrExpression); + } +}; + // Initialize rules RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST, MachineRegisterInfo &_MRI) @@ -243,10 +457,10 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST, addRulesForGOpcs({G_MUL}, Standard).Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}}); - addRulesForGOpcs({G_XOR, G_OR, G_AND}, Standard) + addRulesForGOpcs({G_XOR, G_OR, G_AND}, StandardB) .Any({{UniS1}, {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32AExt}}}) .Any({{DivS1}, {{Vcc}, {Vcc, Vcc}}}) - .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64}, SplitTo32}); + .Div(B64, {{VgprB64}, {VgprB64, VgprB64}, SplitTo32}); addRulesForGOpcs({G_SHL}, Standard) .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32}}) @@ -271,9 +485,9 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST, addRulesForGOpcs({G_BR}).Any({{_}, {{}, {None}}}); - addRulesForGOpcs({G_SELECT}, Standard) - .Div(S32, {{Vgpr32}, {Vcc, Vgpr32, Vgpr32}}) - .Uni(S32, {{Sgpr32}, {Sgpr32AExtBoolInReg, Sgpr32, Sgpr32}}); + addRulesForGOpcs({G_SELECT}, StandardB) + .Div(B32, {{VgprB32}, {Vcc, VgprB32, VgprB32}}) + .Uni(B32, {{SgprB32}, {Sgpr32AExtBoolInReg, SgprB32, SgprB32}}); addRulesForGOpcs({G_ANYEXT}).Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}}); @@ -289,7 +503,86 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST, .Any({{UniS64, S32}, {{Sgpr64}, {Sgpr32}, Ext32To64}}) .Any({{DivS64, S32}, {{Vgpr64}, {Vgpr32}, Ext32To64}}); - addRulesForGOpcs({G_LOAD}).Any({{DivS32, DivP1}, {{Vgpr32}, {VgprP1}}}); + bool hasUnAlignedLoads = ST->getGeneration() >= AMDGPUSubtarget::GFX12; + bool hasSMRDSmall = ST->hasScalarSubwordLoads(); + + Predicate isAlign16([](const MachineInstr &MI) -> bool { + return (*MI.memoperands_begin())->getAlign() >= Align(16); + }); + + Predicate isAlign4([](const MachineInstr &MI) -> bool { + return (*MI.memoperands_begin())->getAlign() >= Align(4); + }); + + Predicate isAtomicMMO([](const MachineInstr &MI) -> bool { + return (*MI.memoperands_begin())->isAtomic(); + }); + + Predicate isUniMMO([](const MachineInstr &MI) -> bool { + return AMDGPUInstrInfo::isUniformMMO(*MI.memoperands_begin()); + }); + + Predicate isConst([](const MachineInstr &MI) -> bool { + // Address space in MMO be different then address space on pointer. + const MachineMemOperand *MMO = *MI.memoperands_begin(); + const unsigned AS = MMO->getAddrSpace(); + return AS == AMDGPUAS::CONSTANT_ADDRESS || + AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT; + }); + + Predicate isVolatileMMO([](const MachineInstr &MI) -> bool { + return (*MI.memoperands_begin())->isVolatile(); + }); + + Predicate isInvMMO([](const MachineInstr &MI) -> bool { + return (*MI.memoperands_begin())->isInvariant(); + }); + + Predicate isNoClobberMMO([](const MachineInstr &MI) -> bool { + return (*MI.memoperands_begin())->getFlags() & MONoClobber; + }); + + Predicate isNaturalAlignedSmall([](const MachineInstr &MI) -> bool { + const MachineMemOperand *MMO = *MI.memoperands_begin(); + const unsigned MemSize = 8 * MMO->getSize().getValue(); + return (MemSize == 16 && MMO->getAlign() >= Align(2)) || + (MemSize == 8 && MMO->getAlign() >= Align(1)); + }); + + auto isUL = !isAtomicMMO && isUniMMO && (isConst || !isVolatileMMO) && + (isConst || isInvMMO || isNoClobberMMO); + + // clang-format off + addRulesForGOpcs({G_LOAD}) + .Any({{DivB32, DivP1}, {{VgprB32}, {VgprP1}}}) + .Any({{{UniB256, UniP1}, isAlign4 && isUL}, {{SgprB256}, {SgprP1}}}) + .Any({{{UniB512, UniP1}, isAlign4 && isUL}, {{SgprB512}, {SgprP1}}}) + .Any({{{UniB256, UniP1}, !isAlign4 || !isUL}, {{UniInVgprB256}, {VgprP1}, SplitLoad}}) + .Any({{{UniB512, UniP1}, !isAlign4 || !isUL}, {{UniInVgprB512}, {VgprP1}, SplitLoad}}) + + .Any({{DivB32, UniP3}, {{VgprB32}, {VgprP3}}}) + .Any({{{UniB32, UniP3}, isAlign4 && isUL}, {{SgprB32}, {SgprP3}}}) + .Any({{{UniB32, UniP3}, !isAlign4 || !isUL}, {{UniInVgprB32}, {VgprP3}}}) + + .Any({{{DivB256, DivP4}}, {{VgprB256}, {VgprP4}, SplitLoad}}) + .Any({{{UniB32, UniP4}, isNaturalAlignedSmall && isUL}, {{SgprB32}, {SgprP4}}}, hasSMRDSmall) // i8 and i16 load + .Any({{{UniB32, UniP4}, isAlign4 && isUL}, {{SgprB32}, {SgprP4}}}) + .Any({{{UniB96, UniP4}, isAlign16 && isUL}, {{SgprB96}, {SgprP4}, WidenLoad}}, !hasUnAlignedLoads) + .Any({{{UniB96, UniP4}, isAlign4 && !isAlign16 && isUL}, {{SgprB96}, {SgprP4}, SplitLoad}}, !hasUnAlignedLoads) + .Any({{{UniB96, UniP4}, isAlign4 && isUL}, {{SgprB96}, {SgprP4}}}, hasUnAlignedLoads) + .Any({{{UniB256, UniP4}, isAlign4 && isUL}, {{SgprB256}, {SgprP4}}}) + .Any({{{UniB512, UniP4}, isAlign4 && isUL}, {{SgprB512}, {SgprP4}}}) + .Any({{{UniB32, UniP4}, !isNaturalAlignedSmall || !isUL}, {{UniInVgprB32}, {VgprP4}}}, hasSMRDSmall) // i8 and i16 load + .Any({{{UniB32, UniP4}, !isAlign4 || !isUL}, {{UniInVgprB32}, {VgprP4}}}) + .Any({{{UniB256, UniP4}, !isAlign4 || !isUL}, {{UniInVgprB256}, {VgprP4}, SplitLoad}}) + .Any({{{UniB512, UniP4}, !isAlign4 || !isUL}, {{UniInVgprB512}, {VgprP4}, SplitLoad}}) + + .Any({{DivB32, P5}, {{VgprB32}, {VgprP5}}}); + + addRulesForGOpcs({G_ZEXTLOAD}) // i8 and i16 zero-extending loads + .Any({{{UniB32, UniP3}, !isAlign4 || !isUL}, {{UniInVgprB32}, {VgprP3}}}) + .Any({{{UniB32, UniP4}, !isAlign4 || !isUL}, {{UniInVgprB32}, {VgprP4}}}); + // clang-format on addRulesForGOpcs({G_AMDGPU_BUFFER_LOAD}, Vector) .Div(V4S32, {{VgprV4S32}, {SgprV4S32, Vgpr32, Vgpr32, Sgpr32}}) diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h index 6a5225263669a8..f5acfd847a77fc 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h @@ -42,14 +42,47 @@ enum UniformityLLTOpPredicateID { // pointers P1, + P3, + P4, + P5, + + UniP1, + UniP3, + UniP4, + UniP5, DivP1, + DivP3, + DivP4, + DivP5, // vectors V2S16, V2S32, V3S32, V4S32, + + // B types + B32, + B64, + B96, + B128, + B256, + B512, + + UniB32, + UniB64, + UniB96, + UniB128, + UniB256, + UniB512, + + DivB32, + DivB64, + DivB96, + DivB128, + DivB256, + DivB512, }; // How to apply register bank on register operand. @@ -67,18 +100,43 @@ enum RegBankLLTMapingApplyID { Sgpr16, Sgpr32, Sgpr64, + SgprP1, + SgprP3, + SgprP4, + SgprP5, SgprV4S32, + SgprB32, + SgprB64, + SgprB96, + SgprB128, + SgprB256, + SgprB512, // vgpr scalars, pointers, vectors and B-types Vgpr32, Vgpr64, VgprP1, + VgprP3, + VgprP4, + VgprP5, + VgprB32, + VgprB64, + VgprB96, + VgprB128, + VgprB256, + VgprB512, VgprV4S32, // Dst only modifiers: read-any-lane and truncs UniInVcc, UniInVgprS32, UniInVgprV4S32, + UniInVgprB32, + UniInVgprB64, + UniInVgprB96, + UniInVgprB128, + UniInVgprB256, + UniInVgprB512, Sgpr32Trunc, @@ -100,12 +158,15 @@ enum LoweringMethodID { SplitTo32, Ext32To64, UniCstExt, + SplitLoad, + WidenLoad, }; enum FastRulesTypes { NoFastRules, - Standard, // S16, S32, S64, V2S16 - Vector, // S32, V2S32, V3S32, V4S32 + Standard, // S16, S32, S64, V2S16 + StandardB, // B32, B64, B96, B128 + Vector, // S32, V2S32, V3S32, V4S32 }; struct RegBankLLTMapping { diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir index b66560710e37e2..85ad51f1b0b894 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir @@ -1,7 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -amdgpu-global-isel-new-legality -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s -check-prefixes=GCN,GFX7 -# RUN: llc -amdgpu-global-isel-new-legality -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s -check-prefixes=GCN,GFX7 -# RUN: llc -amdgpu-global-isel-new-legality -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -run-pass=regbankselect %s -verify-machineinstrs -o - | FileCheck %s -check-prefixes=GCN,GFX12 +# RUN: llc -amdgpu-global-isel-new-legality -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -run-pass="amdgpu-regbankselect,amdgpu-regbanklegalize" %s -verify-machineinstrs -o - | FileCheck %s -check-prefixes=GCN,GFX7 +# RUN: llc -amdgpu-global-isel-new-legality -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -run-pass="amdgpu-regbankselect,amdgpu-regbanklegalize" %s -verify-machineinstrs -o - | FileCheck %s -check-prefixes=GCN,GFX12 --- | define amdgpu_kernel void @load_global_v8i32_non_uniform(ptr addrspace(1) %in) { @@ -110,6 +109,7 @@ --- name: load_global_v8i32_non_uniform legalized: true +tracksRegLiveness: true body: | bb.0: @@ -119,11 +119,21 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GCN-NEXT: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>) from %ir.global.not.uniform.v8i32, align 32, addrspace 1) + ; GCN-NEXT: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[COPY1]](p1) :: (load (<4 x s32>) from %ir.global.not.uniform.v8i32, align 32, addrspace 1) ; GCN-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY1]], [[C]](s64) ; GCN-NEXT: [[LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<4 x s32>) from %ir.global.not.uniform.v8i32 + 16, basealign 32, addrspace 1) ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) + ; GCN-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; GCN-NEXT: [[READANYLANE:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV]] + ; GCN-NEXT: [[READANYLANE1:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV1]] + ; GCN-NEXT: [[READANYLANE2:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV2]] + ; GCN-NEXT: [[READANYLANE3:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV3]] + ; GCN-NEXT: [[READANYLANE4:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV4]] + ; GCN-NEXT: [[READANYLANE5:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV5]] + ; GCN-NEXT: [[READANYLANE6:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV6]] + ; GCN-NEXT: [[READANYLANE7:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV7]] + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[READANYLANE]](s32), [[READANYLANE1]](s32), [[READANYLANE2]](s32), [[READANYLANE3]](s32), [[READANYLANE4]](s32), [[READANYLANE5]](s32), [[READANYLANE6]](s32), [[READANYLANE7]](s32) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(<8 x s32>) = G_LOAD %0 :: (load (<8 x s32>) from %ir.global.not.uniform.v8i32) ... @@ -131,6 +141,7 @@ body: | --- name: load_global_v4i64_non_uniform legalized: true +tracksRegLiveness: true body: | bb.0: @@ -141,11 +152,29 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GCN-NEXT: [[LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>) from %ir.global.not.uniform.v4i64, align 32, addrspace 1) + ; GCN-NEXT: [[LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[COPY1]](p1) :: (load (<2 x s64>) from %ir.global.not.uniform.v4i64, align 32, addrspace 1) ; GCN-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY1]], [[C]](s64) ; GCN-NEXT: [[LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR_ADD]](p1) :: (load (<2 x s64>) from %ir.global.not.uniform.v4i64 + 16, basealign 32, addrspace 1) ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) + ; GCN-NEXT: [[UV:%[0-9]+]]:vgpr(s64), [[UV1:%[0-9]+]]:vgpr(s64), [[UV2:%[0-9]+]]:vgpr(s64), [[UV3:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s64>) + ; GCN-NEXT: [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GCN-NEXT: [[READANYLANE:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV4]] + ; GCN-NEXT: [[READANYLANE1:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV5]] + ; GCN-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[READANYLANE]](s32), [[READANYLANE1]](s32) + ; GCN-NEXT: [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GCN-NEXT: [[READANYLANE2:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV6]] + ; GCN-NEXT: [[READANYLANE3:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV7]] + ; GCN-NEXT: [[MV1:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[READANYLANE2]](s32), [[READANYLANE3]](s32) + ; GCN-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GCN-NEXT: [[READANYLANE4:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV8]] + ; GCN-NEXT: [[READANYLANE5:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV9]] + ; GCN-NEXT: [[MV2:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[READANYLANE4]](s32), [[READANYLANE5]](s32) + ; GCN-NEXT: [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GCN-NEXT: [[READANYLANE6:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV10]] + ; GCN-NEXT: [[READANYLANE7:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV11]] + ; GCN-NEXT: [[MV3:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[READANYLANE6]](s32), [[READANYLANE7]](s32) + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(<4 x s64>) = G_LOAD %0 :: (load (<4 x s64>) from %ir.global.not.uniform.v4i64) ... @@ -153,6 +182,7 @@ body: | --- name: load_global_v16i32_non_uniform legalized: true +tracksRegLiveness: true body: | bb.0: @@ -162,17 +192,35 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GCN-NEXT: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>) from %ir.global.not.uniform.v16i32, align 64, addrspace 1) + ; GCN-NEXT: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[COPY1]](p1) :: (load (<4 x s32>) from %ir.global.not.uniform.v16i32, align 64, addrspace 1) ; GCN-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY1]], [[C]](s64) ; GCN-NEXT: [[LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<4 x s32>) from %ir.global.not.uniform.v16i32 + 16, basealign 64, addrspace 1) ; GCN-NEXT: [[C1:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 32 - ; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY1]], [[C1]](s64) ; GCN-NEXT: [[LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD1]](p1) :: (load (<4 x s32>) from %ir.global.not.uniform.v16i32 + 32, align 32, basealign 64, addrspace 1) ; GCN-NEXT: [[C2:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 48 - ; GCN-NEXT: [[PTR_ADD2:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; GCN-NEXT: [[PTR_ADD2:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY1]], [[C2]](s64) ; GCN-NEXT: [[LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD2]](p1) :: (load (<4 x s32>) from %ir.global.not.uniform.v16i32 + 48, basealign 64, addrspace 1) ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>) + ; GCN-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s32>) + ; GCN-NEXT: [[READANYLANE:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV]] + ; GCN-NEXT: [[READANYLANE1:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV1]] + ; GCN-NEXT: [[READANYLANE2:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV2]] + ; GCN-NEXT: [[READANYLANE3:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV3]] + ; GCN-NEXT: [[READANYLANE4:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV4]] + ; GCN-NEXT: [[READANYLANE5:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV5]] + ; GCN-NEXT: [[READANYLANE6:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV6]] + ; GCN-NEXT: [[READANYLANE7:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV7]] + ; GCN-NEXT: [[READANYLANE8:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV8]] + ; GCN-NEXT: [[READANYLANE9:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV9]] + ; GCN-NEXT: [[READANYLANE10:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV10]] + ; GCN-NEXT: [[READANYLANE11:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV11]] + ; GCN-NEXT: [[READANYLANE12:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV12]] + ; GCN-NEXT: [[READANYLANE13:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV13]] + ; GCN-NEXT: [[READANYLANE14:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV14]] + ; GCN-NEXT: [[READANYLANE15:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV15]] + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<16 x s32>) = G_BUILD_VECTOR [[READANYLANE]](s32), [[READANYLANE1]](s32), [[READANYLANE2]](s32), [[READANYLANE3]](s32), [[READANYLANE4]](s32), [[READANYLANE5]](s32), [[READANYLANE6]](s32), [[READANYLANE7]](s32), [[READANYLANE8]](s32), [[READANYLANE9]](s32), [[READANYLANE10]](s32), [[READANYLANE11]](s32), [[READANYLANE12]](s32), [[READANYLANE13]](s32), [[READANYLANE14]](s32), [[READANYLANE15]](s32) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(<16 x s32>) = G_LOAD %0 :: (load (<16 x s32>) from %ir.global.not.uniform.v16i32) ... @@ -180,6 +228,7 @@ body: | --- name: load_global_v8i64_non_uniform legalized: true +tracksRegLiveness: true body: | bb.0: @@ -189,17 +238,51 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; GCN-NEXT: [[LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>) from %ir.global.not.uniform.v8i64, align 64, addrspace 1) + ; GCN-NEXT: [[LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[COPY1]](p1) :: (load (<2 x s64>) from %ir.global.not.uniform.v8i64, align 64, addrspace 1) ; GCN-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY1]], [[C]](s64) ; GCN-NEXT: [[LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR_ADD]](p1) :: (load (<2 x s64>) from %ir.global.not.uniform.v8i64 + 16, basealign 64, addrspace 1) ; GCN-NEXT: [[C1:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 32 - ; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY1]], [[C1]](s64) ; GCN-NEXT: [[LOAD2:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR_ADD1]](p1) :: (load (<2 x s64>) from %ir.global.not.uniform.v8i64 + 32, align 32, basealign 64, addrspace 1) ; GCN-NEXT: [[C2:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 48 - ; GCN-NEXT: [[PTR_ADD2:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; GCN-NEXT: [[PTR_ADD2:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY1]], [[C2]](s64) ; GCN-NEXT: [[LOAD3:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR_ADD2]](p1) :: (load (<2 x s64>) from %ir.global.not.uniform.v8i64 + 48, basealign 64, addrspace 1) ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>), [[LOAD2]](<2 x s64>), [[LOAD3]](<2 x s64>) + ; GCN-NEXT: [[UV:%[0-9]+]]:vgpr(s64), [[UV1:%[0-9]+]]:vgpr(s64), [[UV2:%[0-9]+]]:vgpr(s64), [[UV3:%[0-9]+]]:vgpr(s64), [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64), [[UV6:%[0-9]+]]:vgpr(s64), [[UV7:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s64>) + ; GCN-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GCN-NEXT: [[READANYLANE:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV8]] + ; GCN-NEXT: [[READANYLANE1:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV9]] + ; GCN-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[READANYLANE]](s32), [[READANYLANE1]](s32) + ; GCN-NEXT: [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GCN-NEXT: [[READANYLANE2:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV10]] + ; GCN-NEXT: [[READANYLANE3:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV11]] + ; GCN-NEXT: [[MV1:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[READANYLANE2]](s32), [[READANYLANE3]](s32) + ; GCN-NEXT: [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GCN-NEXT: [[READANYLANE4:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV12]] + ; GCN-NEXT: [[READANYLANE5:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV13]] + ; GCN-NEXT: [[MV2:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[READANYLANE4]](s32), [[READANYLANE5]](s32) + ; GCN-NEXT: [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GCN-NEXT: [[READANYLANE6:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV14]] + ; GCN-NEXT: [[READANYLANE7:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV15]] + ; GCN-NEXT: [[MV3:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[READANYLANE6]](s32), [[READANYLANE7]](s32) + ; GCN-NEXT: [[UV16:%[0-9]+]]:vgpr(s32), [[UV17:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV4]](s64) + ; GCN-NEXT: [[READANYLANE8:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV16]] + ; GCN-NEXT: [[READANYLANE9:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV17]] + ; GCN-NEXT: [[MV4:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[READANYLANE8]](s32), [[READANYLANE9]](s32) + ; GCN-NEXT: [[UV18:%[0-9]+]]:vgpr(s32), [[UV19:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV5]](s64) + ; GCN-NEXT: [[READANYLANE10:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV18]] + ; GCN-NEXT: [[READANYLANE11:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV19]] + ; GCN-NEXT: [[MV5:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[READANYLANE10]](s32), [[READANYLANE11]](s32) + ; GCN-NEXT: [[UV20:%[0-9]+]]:vgpr(s32), [[UV21:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV6]](s64) + ; GCN-NEXT: [[READANYLANE12:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV20]] + ; GCN-NEXT: [[READANYLANE13:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV21]] + ; GCN-NEXT: [[MV6:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[READANYLANE12]](s32), [[READANYLANE13]](s32) + ; GCN-NEXT: [[UV22:%[0-9]+]]:vgpr(s32), [[UV23:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV7]](s64) + ; GCN-NEXT: [[READANYLANE14:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV22]] + ; GCN-NEXT: [[READANYLANE15:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV23]] + ; GCN-NEXT: [[MV7:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[READANYLANE14]](s32), [[READANYLANE15]](s32) + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(<8 x s64>) = G_LOAD %0 :: (load (<8 x s64>) from %ir.global.not.uniform.v8i64) ... @@ -207,6 +290,7 @@ body: | --- name: load_global_v8i32_uniform legalized: true +tracksRegLiveness: true body: | bb.0: @@ -223,6 +307,7 @@ body: | --- name: load_global_v4i64_uniform legalized: true +tracksRegLiveness: true body: | bb.0: @@ -239,6 +324,7 @@ body: | --- name: load_global_v16i32_uniform legalized: true +tracksRegLiveness: true body: | bb.0: @@ -255,6 +341,7 @@ body: | --- name: load_global_v8i64_uniform legalized: true +tracksRegLiveness: true body: | bb.0: @@ -271,6 +358,7 @@ body: | --- name: load_constant_v8i32_non_uniform legalized: true +tracksRegLiveness: true body: | bb.0: @@ -280,11 +368,21 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; GCN-NEXT: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>) from %ir.constant.not.uniform.v8i32, align 32, addrspace 4) + ; GCN-NEXT: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[COPY1]](p4) :: (load (<4 x s32>) from %ir.constant.not.uniform.v8i32, align 32, addrspace 4) ; GCN-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY1]], [[C]](s64) ; GCN-NEXT: [[LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<4 x s32>) from %ir.constant.not.uniform.v8i32 + 16, basealign 32, addrspace 4) ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) + ; GCN-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; GCN-NEXT: [[READANYLANE:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV]] + ; GCN-NEXT: [[READANYLANE1:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV1]] + ; GCN-NEXT: [[READANYLANE2:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV2]] + ; GCN-NEXT: [[READANYLANE3:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV3]] + ; GCN-NEXT: [[READANYLANE4:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV4]] + ; GCN-NEXT: [[READANYLANE5:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV5]] + ; GCN-NEXT: [[READANYLANE6:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV6]] + ; GCN-NEXT: [[READANYLANE7:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV7]] + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[READANYLANE]](s32), [[READANYLANE1]](s32), [[READANYLANE2]](s32), [[READANYLANE3]](s32), [[READANYLANE4]](s32), [[READANYLANE5]](s32), [[READANYLANE6]](s32), [[READANYLANE7]](s32) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<8 x s32>) = G_LOAD %0 :: (load (<8 x s32>) from %ir.constant.not.uniform.v8i32) ... @@ -292,6 +390,7 @@ body: | --- name: load_constant_i256_non_uniform legalized: true +tracksRegLiveness: true body: | bb.0: @@ -301,11 +400,21 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; GCN-NEXT: [[LOAD:%[0-9]+]]:vgpr(s128) = G_LOAD [[COPY]](p4) :: (load (s128) from %ir.constant.not.uniform, align 32, addrspace 4) + ; GCN-NEXT: [[LOAD:%[0-9]+]]:vgpr(s128) = G_LOAD [[COPY1]](p4) :: (load (s128) from %ir.constant.not.uniform, align 32, addrspace 4) ; GCN-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY1]], [[C]](s64) ; GCN-NEXT: [[LOAD1:%[0-9]+]]:vgpr(s128) = G_LOAD [[PTR_ADD]](p4) :: (load (s128) from %ir.constant.not.uniform + 16, basealign 32, addrspace 4) ; GCN-NEXT: [[MV:%[0-9]+]]:vgpr(s256) = G_MERGE_VALUES [[LOAD]](s128), [[LOAD1]](s128) + ; GCN-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s256) + ; GCN-NEXT: [[READANYLANE:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV]] + ; GCN-NEXT: [[READANYLANE1:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV1]] + ; GCN-NEXT: [[READANYLANE2:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV2]] + ; GCN-NEXT: [[READANYLANE3:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV3]] + ; GCN-NEXT: [[READANYLANE4:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV4]] + ; GCN-NEXT: [[READANYLANE5:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV5]] + ; GCN-NEXT: [[READANYLANE6:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV6]] + ; GCN-NEXT: [[READANYLANE7:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV7]] + ; GCN-NEXT: [[MV1:%[0-9]+]]:sgpr(s256) = G_MERGE_VALUES [[READANYLANE]](s32), [[READANYLANE1]](s32), [[READANYLANE2]](s32), [[READANYLANE3]](s32), [[READANYLANE4]](s32), [[READANYLANE5]](s32), [[READANYLANE6]](s32), [[READANYLANE7]](s32) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(s256) = G_LOAD %0 :: (load (s256) from %ir.constant.not.uniform) ... @@ -313,6 +422,7 @@ body: | --- name: load_constant_v16i16_non_uniform legalized: true +tracksRegLiveness: true body: | bb.0: @@ -323,11 +433,21 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; GCN-NEXT: [[LOAD:%[0-9]+]]:vgpr(<8 x s16>) = G_LOAD [[COPY]](p4) :: (load (<8 x s16>) from %ir.constant.not.uniform, align 32, addrspace 4) + ; GCN-NEXT: [[LOAD:%[0-9]+]]:vgpr(<8 x s16>) = G_LOAD [[COPY1]](p4) :: (load (<8 x s16>) from %ir.constant.not.uniform, align 32, addrspace 4) ; GCN-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY1]], [[C]](s64) ; GCN-NEXT: [[LOAD1:%[0-9]+]]:vgpr(<8 x s16>) = G_LOAD [[PTR_ADD]](p4) :: (load (<8 x s16>) from %ir.constant.not.uniform + 16, basealign 32, addrspace 4) ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s16>) = G_CONCAT_VECTORS [[LOAD]](<8 x s16>), [[LOAD1]](<8 x s16>) + ; GCN-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x s16>), [[UV1:%[0-9]+]]:vgpr(<2 x s16>), [[UV2:%[0-9]+]]:vgpr(<2 x s16>), [[UV3:%[0-9]+]]:vgpr(<2 x s16>), [[UV4:%[0-9]+]]:vgpr(<2 x s16>), [[UV5:%[0-9]+]]:vgpr(<2 x s16>), [[UV6:%[0-9]+]]:vgpr(<2 x s16>), [[UV7:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s16>) + ; GCN-NEXT: [[READANYLANE:%[0-9]+]]:sgpr(<2 x s16>) = G_READANYLANE [[UV]] + ; GCN-NEXT: [[READANYLANE1:%[0-9]+]]:sgpr(<2 x s16>) = G_READANYLANE [[UV1]] + ; GCN-NEXT: [[READANYLANE2:%[0-9]+]]:sgpr(<2 x s16>) = G_READANYLANE [[UV2]] + ; GCN-NEXT: [[READANYLANE3:%[0-9]+]]:sgpr(<2 x s16>) = G_READANYLANE [[UV3]] + ; GCN-NEXT: [[READANYLANE4:%[0-9]+]]:sgpr(<2 x s16>) = G_READANYLANE [[UV4]] + ; GCN-NEXT: [[READANYLANE5:%[0-9]+]]:sgpr(<2 x s16>) = G_READANYLANE [[UV5]] + ; GCN-NEXT: [[READANYLANE6:%[0-9]+]]:sgpr(<2 x s16>) = G_READANYLANE [[UV6]] + ; GCN-NEXT: [[READANYLANE7:%[0-9]+]]:sgpr(<2 x s16>) = G_READANYLANE [[UV7]] + ; GCN-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:sgpr(<16 x s16>) = G_CONCAT_VECTORS [[READANYLANE]](<2 x s16>), [[READANYLANE1]](<2 x s16>), [[READANYLANE2]](<2 x s16>), [[READANYLANE3]](<2 x s16>), [[READANYLANE4]](<2 x s16>), [[READANYLANE5]](<2 x s16>), [[READANYLANE6]](<2 x s16>), [[READANYLANE7]](<2 x s16>) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<16 x s16>) = G_LOAD %0 :: (load (<16 x s16>) from %ir.constant.not.uniform) ... @@ -335,6 +455,7 @@ body: | --- name: load_constant_v4i64_non_uniform legalized: true +tracksRegLiveness: true body: | bb.0: @@ -344,11 +465,29 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; GCN-NEXT: [[LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (<2 x s64>) from %ir.constant.not.uniform.v4i64, align 32, addrspace 4) + ; GCN-NEXT: [[LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[COPY1]](p4) :: (load (<2 x s64>) from %ir.constant.not.uniform.v4i64, align 32, addrspace 4) ; GCN-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY1]], [[C]](s64) ; GCN-NEXT: [[LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR_ADD]](p4) :: (load (<2 x s64>) from %ir.constant.not.uniform.v4i64 + 16, basealign 32, addrspace 4) ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) + ; GCN-NEXT: [[UV:%[0-9]+]]:vgpr(s64), [[UV1:%[0-9]+]]:vgpr(s64), [[UV2:%[0-9]+]]:vgpr(s64), [[UV3:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s64>) + ; GCN-NEXT: [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GCN-NEXT: [[READANYLANE:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV4]] + ; GCN-NEXT: [[READANYLANE1:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV5]] + ; GCN-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[READANYLANE]](s32), [[READANYLANE1]](s32) + ; GCN-NEXT: [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GCN-NEXT: [[READANYLANE2:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV6]] + ; GCN-NEXT: [[READANYLANE3:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV7]] + ; GCN-NEXT: [[MV1:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[READANYLANE2]](s32), [[READANYLANE3]](s32) + ; GCN-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GCN-NEXT: [[READANYLANE4:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV8]] + ; GCN-NEXT: [[READANYLANE5:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV9]] + ; GCN-NEXT: [[MV2:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[READANYLANE4]](s32), [[READANYLANE5]](s32) + ; GCN-NEXT: [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GCN-NEXT: [[READANYLANE6:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV10]] + ; GCN-NEXT: [[READANYLANE7:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV11]] + ; GCN-NEXT: [[MV3:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[READANYLANE6]](s32), [[READANYLANE7]](s32) + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<4 x s64>) = G_LOAD %0 :: (load (<4 x s64>) from %ir.constant.not.uniform.v4i64) ... @@ -356,6 +495,7 @@ body: | --- name: load_constant_v16i32_non_uniform legalized: true +tracksRegLiveness: true body: | bb.0: @@ -365,17 +505,35 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; GCN-NEXT: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>) from %ir.constant.not.uniform.v16i32, align 64, addrspace 4) + ; GCN-NEXT: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[COPY1]](p4) :: (load (<4 x s32>) from %ir.constant.not.uniform.v16i32, align 64, addrspace 4) ; GCN-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY1]], [[C]](s64) ; GCN-NEXT: [[LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<4 x s32>) from %ir.constant.not.uniform.v16i32 + 16, basealign 64, addrspace 4) ; GCN-NEXT: [[C1:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 32 - ; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY1]], [[C1]](s64) ; GCN-NEXT: [[LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (load (<4 x s32>) from %ir.constant.not.uniform.v16i32 + 32, align 32, basealign 64, addrspace 4) ; GCN-NEXT: [[C2:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 48 - ; GCN-NEXT: [[PTR_ADD2:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; GCN-NEXT: [[PTR_ADD2:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY1]], [[C2]](s64) ; GCN-NEXT: [[LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD2]](p4) :: (load (<4 x s32>) from %ir.constant.not.uniform.v16i32 + 48, basealign 64, addrspace 4) ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>) + ; GCN-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s32>) + ; GCN-NEXT: [[READANYLANE:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV]] + ; GCN-NEXT: [[READANYLANE1:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV1]] + ; GCN-NEXT: [[READANYLANE2:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV2]] + ; GCN-NEXT: [[READANYLANE3:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV3]] + ; GCN-NEXT: [[READANYLANE4:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV4]] + ; GCN-NEXT: [[READANYLANE5:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV5]] + ; GCN-NEXT: [[READANYLANE6:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV6]] + ; GCN-NEXT: [[READANYLANE7:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV7]] + ; GCN-NEXT: [[READANYLANE8:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV8]] + ; GCN-NEXT: [[READANYLANE9:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV9]] + ; GCN-NEXT: [[READANYLANE10:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV10]] + ; GCN-NEXT: [[READANYLANE11:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV11]] + ; GCN-NEXT: [[READANYLANE12:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV12]] + ; GCN-NEXT: [[READANYLANE13:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV13]] + ; GCN-NEXT: [[READANYLANE14:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV14]] + ; GCN-NEXT: [[READANYLANE15:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV15]] + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<16 x s32>) = G_BUILD_VECTOR [[READANYLANE]](s32), [[READANYLANE1]](s32), [[READANYLANE2]](s32), [[READANYLANE3]](s32), [[READANYLANE4]](s32), [[READANYLANE5]](s32), [[READANYLANE6]](s32), [[READANYLANE7]](s32), [[READANYLANE8]](s32), [[READANYLANE9]](s32), [[READANYLANE10]](s32), [[READANYLANE11]](s32), [[READANYLANE12]](s32), [[READANYLANE13]](s32), [[READANYLANE14]](s32), [[READANYLANE15]](s32) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<16 x s32>) = G_LOAD %0 :: (load (<16 x s32>) from %ir.constant.not.uniform.v16i32) ... @@ -383,6 +541,7 @@ body: | --- name: load_constant_v8i64_non_uniform legalized: true +tracksRegLiveness: true body: | bb.0: @@ -392,17 +551,51 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; GCN-NEXT: [[LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (<2 x s64>) from %ir.constant.not.uniform.v8i64, align 64, addrspace 4) + ; GCN-NEXT: [[LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[COPY1]](p4) :: (load (<2 x s64>) from %ir.constant.not.uniform.v8i64, align 64, addrspace 4) ; GCN-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY1]], [[C]](s64) ; GCN-NEXT: [[LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR_ADD]](p4) :: (load (<2 x s64>) from %ir.constant.not.uniform.v8i64 + 16, basealign 64, addrspace 4) ; GCN-NEXT: [[C1:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 32 - ; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY1]], [[C1]](s64) ; GCN-NEXT: [[LOAD2:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR_ADD1]](p4) :: (load (<2 x s64>) from %ir.constant.not.uniform.v8i64 + 32, align 32, basealign 64, addrspace 4) ; GCN-NEXT: [[C2:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 48 - ; GCN-NEXT: [[PTR_ADD2:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; GCN-NEXT: [[PTR_ADD2:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY1]], [[C2]](s64) ; GCN-NEXT: [[LOAD3:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR_ADD2]](p4) :: (load (<2 x s64>) from %ir.constant.not.uniform.v8i64 + 48, basealign 64, addrspace 4) ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>), [[LOAD2]](<2 x s64>), [[LOAD3]](<2 x s64>) + ; GCN-NEXT: [[UV:%[0-9]+]]:vgpr(s64), [[UV1:%[0-9]+]]:vgpr(s64), [[UV2:%[0-9]+]]:vgpr(s64), [[UV3:%[0-9]+]]:vgpr(s64), [[UV4:%[0-9]+]]:vgpr(s64), [[UV5:%[0-9]+]]:vgpr(s64), [[UV6:%[0-9]+]]:vgpr(s64), [[UV7:%[0-9]+]]:vgpr(s64) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s64>) + ; GCN-NEXT: [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GCN-NEXT: [[READANYLANE:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV8]] + ; GCN-NEXT: [[READANYLANE1:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV9]] + ; GCN-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[READANYLANE]](s32), [[READANYLANE1]](s32) + ; GCN-NEXT: [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GCN-NEXT: [[READANYLANE2:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV10]] + ; GCN-NEXT: [[READANYLANE3:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV11]] + ; GCN-NEXT: [[MV1:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[READANYLANE2]](s32), [[READANYLANE3]](s32) + ; GCN-NEXT: [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GCN-NEXT: [[READANYLANE4:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV12]] + ; GCN-NEXT: [[READANYLANE5:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV13]] + ; GCN-NEXT: [[MV2:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[READANYLANE4]](s32), [[READANYLANE5]](s32) + ; GCN-NEXT: [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GCN-NEXT: [[READANYLANE6:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV14]] + ; GCN-NEXT: [[READANYLANE7:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV15]] + ; GCN-NEXT: [[MV3:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[READANYLANE6]](s32), [[READANYLANE7]](s32) + ; GCN-NEXT: [[UV16:%[0-9]+]]:vgpr(s32), [[UV17:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV4]](s64) + ; GCN-NEXT: [[READANYLANE8:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV16]] + ; GCN-NEXT: [[READANYLANE9:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV17]] + ; GCN-NEXT: [[MV4:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[READANYLANE8]](s32), [[READANYLANE9]](s32) + ; GCN-NEXT: [[UV18:%[0-9]+]]:vgpr(s32), [[UV19:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV5]](s64) + ; GCN-NEXT: [[READANYLANE10:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV18]] + ; GCN-NEXT: [[READANYLANE11:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV19]] + ; GCN-NEXT: [[MV5:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[READANYLANE10]](s32), [[READANYLANE11]](s32) + ; GCN-NEXT: [[UV20:%[0-9]+]]:vgpr(s32), [[UV21:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV6]](s64) + ; GCN-NEXT: [[READANYLANE12:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV20]] + ; GCN-NEXT: [[READANYLANE13:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV21]] + ; GCN-NEXT: [[MV6:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[READANYLANE12]](s32), [[READANYLANE13]](s32) + ; GCN-NEXT: [[UV22:%[0-9]+]]:vgpr(s32), [[UV23:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV7]](s64) + ; GCN-NEXT: [[READANYLANE14:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV22]] + ; GCN-NEXT: [[READANYLANE15:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[UV23]] + ; GCN-NEXT: [[MV7:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[READANYLANE14]](s32), [[READANYLANE15]](s32) + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<8 x s64>) = G_LOAD %0 :: (load (<8 x s64>) from %ir.constant.not.uniform.v8i64) ... @@ -410,6 +603,7 @@ body: | --- name: load_constant_v8i32_uniform legalized: true +tracksRegLiveness: true body: | bb.0: @@ -426,6 +620,7 @@ body: | --- name: load_constant_v16i16_uniform legalized: true +tracksRegLiveness: true body: | bb.0: @@ -442,6 +637,7 @@ body: | --- name: load_constant_v4i64_uniform legalized: true +tracksRegLiveness: true body: | bb.0: @@ -458,6 +654,7 @@ body: | --- name: load_constant_v16i32_uniform legalized: true +tracksRegLiveness: true body: | bb.0: @@ -474,6 +671,7 @@ body: | --- name: load_constant_v8i64_uniform legalized: true +tracksRegLiveness: true body: | bb.0: @@ -490,6 +688,7 @@ body: | --- name: load_local_uniform legalized: true +tracksRegLiveness: true body: | bb.0: liveins: $sgpr0 @@ -500,6 +699,7 @@ body: | ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) ; GCN-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p3) :: (load (s32), addrspace 3) + ; GCN-NEXT: [[READANYLANE:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[LOAD]] %0:_(p3) = COPY $sgpr0 %1:_(s32) = G_LOAD %0 :: (load (s32), addrspace 3) @@ -507,6 +707,7 @@ body: | --- name: load_region_uniform legalized: true +tracksRegLiveness: true body: | bb.0: liveins: $sgpr0 @@ -525,6 +726,7 @@ body: | --- name: extload_constant_i8_to_i32_uniform legalized: true +tracksRegLiveness: true body: | bb.0: @@ -535,6 +737,7 @@ body: | ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p4) :: (load (s8), addrspace 4) + ; GFX7-NEXT: [[READANYLANE:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[LOAD]] ; ; GFX12-LABEL: name: extload_constant_i8_to_i32_uniform ; GFX12: liveins: $sgpr0_sgpr1 @@ -548,6 +751,7 @@ body: | --- name: extload_global_i8_to_i32_uniform legalized: true +tracksRegLiveness: true body: | bb.0: @@ -559,6 +763,7 @@ body: | ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) ; GCN-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p4) :: (load (s8), addrspace 1) + ; GCN-NEXT: [[READANYLANE:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[LOAD]] %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(s32) = G_LOAD %0 :: (load (s8), addrspace 1, align 1) ... @@ -566,6 +771,7 @@ body: | --- name: extload_constant_i16_to_i32_uniform legalized: true +tracksRegLiveness: true body: | bb.0: @@ -577,6 +783,7 @@ body: | ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p4) :: (load (s16), addrspace 4) + ; GFX7-NEXT: [[READANYLANE:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[LOAD]] ; ; GFX12-LABEL: name: extload_constant_i16_to_i32_uniform ; GFX12: liveins: $sgpr0_sgpr1 @@ -590,6 +797,7 @@ body: | --- name: extload_global_i16_to_i32_uniform legalized: true +tracksRegLiveness: true body: | bb.0: @@ -601,6 +809,7 @@ body: | ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) ; GCN-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p4) :: (load (s16), addrspace 1) + ; GCN-NEXT: [[READANYLANE:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[LOAD]] %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(s32) = G_LOAD %0 :: (load (s16), addrspace 1, align 2) ... @@ -608,6 +817,7 @@ body: | --- name: load_constant_i32_uniform_align4 legalized: true +tracksRegLiveness: true body: | bb.0: @@ -624,6 +834,7 @@ body: | --- name: load_constant_i32_uniform_align2 legalized: true +tracksRegLiveness: true body: | bb.0: @@ -635,6 +846,7 @@ body: | ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) ; GCN-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p4) :: (load (s32), align 2, addrspace 4) + ; GCN-NEXT: [[READANYLANE:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[LOAD]] %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(s32) = G_LOAD %0 :: (load (s32), addrspace 4, align 2) ... @@ -642,6 +854,7 @@ body: | --- name: load_constant_i32_uniform_align1 legalized: true +tracksRegLiveness: true body: | bb.0: @@ -653,6 +866,7 @@ body: | ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) ; GCN-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p4) :: (load (s32), align 1, addrspace 4) + ; GCN-NEXT: [[READANYLANE:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[LOAD]] %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(s32) = G_LOAD %0 :: (load (s32), addrspace 4, align 1) ... @@ -660,6 +874,7 @@ body: | --- name: load_private_uniform_sgpr_i32 legalized: true +tracksRegLiveness: true body: | bb.0: @@ -706,10 +921,10 @@ body: | ; GCN-LABEL: name: load_constant_v8i32_vgpr_crash_loop_phi ; GCN: bb.0: ; GCN-NEXT: successors: %bb.1(0x80000000) - ; GCN-NEXT: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 + ; GCN-NEXT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr(p4) = COPY $sgpr2_sgpr3 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr(p4) = COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY $vgpr2_vgpr3 ; GCN-NEXT: G_BR %bb.1 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.1: @@ -721,14 +936,14 @@ body: | ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[PHI]], [[C]](s64) ; GCN-NEXT: [[LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<4 x s32>) from unknown-address + 16, addrspace 4) ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr(p4) = COPY [[COPY1]](p4) + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr(p4) = COPY [[COPY1]](p4) ; GCN-NEXT: G_BR %bb.1 bb.0: - liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 - %0:_(p4) = COPY $sgpr0_sgpr1 - %1:_(p4) = COPY $sgpr2_sgpr3 + %0:_(p4) = COPY $vgpr0_vgpr1 + %1:_(p4) = COPY $vgpr2_vgpr3 G_BR %bb.1 bb.1: @@ -741,6 +956,7 @@ body: | --- name: load_constant_v3i32_align4 legalized: true +tracksRegLiveness: true body: | bb.0: @@ -771,6 +987,7 @@ body: | --- name: load_constant_v3i32_align8 legalized: true +tracksRegLiveness: true body: | bb.0: @@ -801,6 +1018,7 @@ body: | --- name: load_constant_v3i32_align16 legalized: true +tracksRegLiveness: true body: | bb.0: @@ -828,6 +1046,7 @@ body: | --- name: load_constant_v6i16_align4 legalized: true +tracksRegLiveness: true body: | bb.0: @@ -840,10 +1059,9 @@ body: | ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 8 ; GFX7-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) ; GFX7-NEXT: [[LOAD1:%[0-9]+]]:sgpr(<2 x s16>) = G_LOAD [[PTR_ADD]](p4) :: (invariant load (<2 x s16>) from unknown-address + 8, addrspace 4) - ; GFX7-NEXT: [[UV:%[0-9]+]]:sgpr(s16), [[UV1:%[0-9]+]]:sgpr(s16), [[UV2:%[0-9]+]]:sgpr(s16), [[UV3:%[0-9]+]]:sgpr(s16) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; GFX7-NEXT: [[UV4:%[0-9]+]]:sgpr(s16), [[UV5:%[0-9]+]]:sgpr(s16) = G_UNMERGE_VALUES [[LOAD1]](<2 x s16>) - ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<6 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16) - ; GFX7-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<6 x s16>) + ; GFX7-NEXT: [[UV:%[0-9]+]]:sgpr(<2 x s16>), [[UV1:%[0-9]+]]:sgpr(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) + ; GFX7-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:sgpr(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[LOAD1]](<2 x s16>) + ; GFX7-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<6 x s16>) ; ; GFX12-LABEL: name: load_constant_v6i16_align4 ; GFX12: liveins: $sgpr0_sgpr1 @@ -859,6 +1077,7 @@ body: | --- name: load_constant_v6i16_align8 legalized: true +tracksRegLiveness: true body: | bb.0: @@ -871,10 +1090,9 @@ body: | ; GFX7-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 8 ; GFX7-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) ; GFX7-NEXT: [[LOAD1:%[0-9]+]]:sgpr(<2 x s16>) = G_LOAD [[PTR_ADD]](p4) :: (invariant load (<2 x s16>) from unknown-address + 8, align 8, addrspace 4) - ; GFX7-NEXT: [[UV:%[0-9]+]]:sgpr(s16), [[UV1:%[0-9]+]]:sgpr(s16), [[UV2:%[0-9]+]]:sgpr(s16), [[UV3:%[0-9]+]]:sgpr(s16) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; GFX7-NEXT: [[UV4:%[0-9]+]]:sgpr(s16), [[UV5:%[0-9]+]]:sgpr(s16) = G_UNMERGE_VALUES [[LOAD1]](<2 x s16>) - ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<6 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16) - ; GFX7-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<6 x s16>) + ; GFX7-NEXT: [[UV:%[0-9]+]]:sgpr(<2 x s16>), [[UV1:%[0-9]+]]:sgpr(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) + ; GFX7-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:sgpr(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[LOAD1]](<2 x s16>) + ; GFX7-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<6 x s16>) ; ; GFX12-LABEL: name: load_constant_v6i16_align8 ; GFX12: liveins: $sgpr0_sgpr1 @@ -890,6 +1108,7 @@ body: | --- name: load_constant_v6i16_align16 legalized: true +tracksRegLiveness: true body: | bb.0: @@ -899,9 +1118,9 @@ body: | ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:sgpr(<8 x s16>) = G_LOAD [[COPY]](p4) :: (invariant load (<8 x s16>), addrspace 4) - ; GFX7-NEXT: [[UV:%[0-9]+]]:sgpr(s16), [[UV1:%[0-9]+]]:sgpr(s16), [[UV2:%[0-9]+]]:sgpr(s16), [[UV3:%[0-9]+]]:sgpr(s16), [[UV4:%[0-9]+]]:sgpr(s16), [[UV5:%[0-9]+]]:sgpr(s16), [[UV6:%[0-9]+]]:sgpr(s16), [[UV7:%[0-9]+]]:sgpr(s16) = G_UNMERGE_VALUES [[LOAD]](<8 x s16>) - ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<6 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16) - ; GFX7-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<6 x s16>) + ; GFX7-NEXT: [[UV:%[0-9]+]]:sgpr(<2 x s16>), [[UV1:%[0-9]+]]:sgpr(<2 x s16>), [[UV2:%[0-9]+]]:sgpr(<2 x s16>), [[UV3:%[0-9]+]]:sgpr(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<8 x s16>) + ; GFX7-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:sgpr(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[UV2]](<2 x s16>) + ; GFX7-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<6 x s16>) ; ; GFX12-LABEL: name: load_constant_v6i16_align16 ; GFX12: liveins: $sgpr0_sgpr1 @@ -917,6 +1136,7 @@ body: | --- name: load_constant_i96_align4 legalized: true +tracksRegLiveness: true body: | bb.0: @@ -947,6 +1167,7 @@ body: | --- name: load_constant_i96_align8 legalized: true +tracksRegLiveness: true body: | bb.0: @@ -977,6 +1198,7 @@ body: | --- name: load_constant_i96_align16 legalized: true +tracksRegLiveness: true body: | bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zextload.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zextload.mir index 821b78f8810b6f..a680391258ab07 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zextload.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zextload.mir @@ -1,6 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs -o - %s | FileCheck %s -# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs -o - %s | FileCheck %s +# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass="amdgpu-regbankselect,amdgpu-regbanklegalize" -verify-machineinstrs -o - %s | FileCheck %s --- name: zextload_constant_i8_to_i32_uniform @@ -15,6 +14,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[COPY1]](p4) :: (load (s8), addrspace 4) + ; CHECK-NEXT: [[READANYLANE:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[ZEXTLOAD]] %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(s32) = G_ZEXTLOAD %0 :: (load (s8), addrspace 4, align 1) ... @@ -33,6 +33,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[COPY1]](p4) :: (load (s8), addrspace 1) + ; CHECK-NEXT: [[READANYLANE:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[ZEXTLOAD]] %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(s32) = G_ZEXTLOAD %0 :: (load (s8), addrspace 1, align 1) ... @@ -51,6 +52,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[COPY1]](p4) :: (load (s16), addrspace 4) + ; CHECK-NEXT: [[READANYLANE:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[ZEXTLOAD]] %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(s32) = G_ZEXTLOAD %0 :: (load (s16), addrspace 4, align 2) ... @@ -69,6 +71,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[COPY1]](p4) :: (load (s16), addrspace 1) + ; CHECK-NEXT: [[READANYLANE:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[ZEXTLOAD]] %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(s32) = G_ZEXTLOAD %0 :: (load (s16), addrspace 1, align 2) ... @@ -86,6 +89,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[COPY1]](p3) :: (load (s8), addrspace 3) + ; CHECK-NEXT: [[READANYLANE:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[ZEXTLOAD]] %0:_(p3) = COPY $sgpr0 %1:_(s32) = G_ZEXTLOAD %0 :: (load (s8), addrspace 3, align 1) ... @@ -104,6 +108,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:vgpr(s32) = G_ZEXTLOAD [[COPY1]](p3) :: (load (s16), addrspace 3) + ; CHECK-NEXT: [[READANYLANE:%[0-9]+]]:sgpr(s32) = G_READANYLANE [[ZEXTLOAD]] %0:_(p3) = COPY $sgpr0 %1:_(s32) = G_ZEXTLOAD %0 :: (load (s16), addrspace 3, align 2) ... _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits