https://github.com/optimisan updated https://github.com/llvm/llvm-project/pull/109939
>From b337b06e2ecd3d6bbf740ee9ec857463f32d0f1c Mon Sep 17 00:00:00 2001 From: Akshat Oke <akshat....@amd.com> Date: Tue, 24 Sep 2024 11:41:18 +0000 Subject: [PATCH 1/2] [NewPM][AMDGPU] Port SIPreAllocateWWMRegs to NPM --- llvm/lib/Target/AMDGPU/AMDGPU.h | 6 +- llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def | 1 + .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 7 ++- .../Target/AMDGPU/SIPreAllocateWWMRegs.cpp | 60 ++++++++++++------- llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.h | 25 ++++++++ .../AMDGPU/si-pre-allocate-wwm-regs.mir | 21 +++++++ 6 files changed, 93 insertions(+), 27 deletions(-) create mode 100644 llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.h diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index 342d55e828bca5..95d0ad0f9dc96a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -49,7 +49,7 @@ FunctionPass *createSIFixSGPRCopiesLegacyPass(); FunctionPass *createLowerWWMCopiesPass(); FunctionPass *createSIMemoryLegalizerPass(); FunctionPass *createSIInsertWaitcntsPass(); -FunctionPass *createSIPreAllocateWWMRegsPass(); +FunctionPass *createSIPreAllocateWWMRegsLegacyPass(); FunctionPass *createSIFormMemoryClausesPass(); FunctionPass *createSIPostRABundlerPass(); @@ -212,8 +212,8 @@ extern char &SILateBranchLoweringPassID; void initializeSIOptimizeExecMaskingPass(PassRegistry &); extern char &SIOptimizeExecMaskingID; -void initializeSIPreAllocateWWMRegsPass(PassRegistry &); -extern char &SIPreAllocateWWMRegsID; +void initializeSIPreAllocateWWMRegsLegacyPass(PassRegistry &); +extern char &SIPreAllocateWWMRegsLegacyID; void initializeAMDGPUImageIntrinsicOptimizerPass(PassRegistry &); extern char &AMDGPUImageIntrinsicOptimizerID; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def index 0ebf34c901c142..174a90f0aa419d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def +++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def @@ -102,5 +102,6 @@ MACHINE_FUNCTION_PASS("gcn-dpp-combine", GCNDPPCombinePass()) MACHINE_FUNCTION_PASS("si-load-store-opt", SILoadStoreOptimizerPass()) MACHINE_FUNCTION_PASS("si-lower-sgpr-spills", SILowerSGPRSpillsPass()) MACHINE_FUNCTION_PASS("si-peephole-sdwa", SIPeepholeSDWAPass()) +MACHINE_FUNCTION_PASS("si-pre-allocate-wwm-regs", SIPreAllocateWWMRegsPass()) MACHINE_FUNCTION_PASS("si-shrink-instructions", SIShrinkInstructionsPass()) #undef MACHINE_FUNCTION_PASS diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 23ee0c3e896eb3..f367b5fbea45af 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -41,6 +41,7 @@ #include "SIMachineFunctionInfo.h" #include "SIMachineScheduler.h" #include "SIPeepholeSDWA.h" +#include "SIPreAllocateWWMRegs.h" #include "SIShrinkInstructions.h" #include "TargetInfo/AMDGPUTargetInfo.h" #include "Utils/AMDGPUBaseInfo.h" @@ -508,7 +509,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() { initializeSILateBranchLoweringPass(*PR); initializeSIMemoryLegalizerPass(*PR); initializeSIOptimizeExecMaskingPass(*PR); - initializeSIPreAllocateWWMRegsPass(*PR); + initializeSIPreAllocateWWMRegsLegacyPass(*PR); initializeSIFormMemoryClausesPass(*PR); initializeSIPostRABundlerPass(*PR); initializeGCNCreateVOPDPass(*PR); @@ -1506,7 +1507,7 @@ bool GCNPassConfig::addRegAssignAndRewriteFast() { addPass(&SILowerSGPRSpillsLegacyID); // To Allocate wwm registers used in whole quad mode operations (for shaders). - addPass(&SIPreAllocateWWMRegsID); + addPass(&SIPreAllocateWWMRegsLegacyID); // For allocating other wwm register operands. addPass(createWWMRegAllocPass(false)); @@ -1543,7 +1544,7 @@ bool GCNPassConfig::addRegAssignAndRewriteOptimized() { addPass(&SILowerSGPRSpillsLegacyID); // To Allocate wwm registers used in whole quad mode operations (for shaders). - addPass(&SIPreAllocateWWMRegsID); + addPass(&SIPreAllocateWWMRegsLegacyID); // For allocating other whole wave mode registers. addPass(createWWMRegAllocPass(true)); diff --git a/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp b/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp index 07303e2aa726c5..f9109c01c8085b 100644 --- a/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp +++ b/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "SIPreAllocateWWMRegs.h" #include "AMDGPU.h" #include "GCNSubtarget.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" @@ -34,7 +35,7 @@ static cl::opt<bool> namespace { -class SIPreAllocateWWMRegs : public MachineFunctionPass { +class SIPreAllocateWWMRegs { private: const SIInstrInfo *TII; const SIRegisterInfo *TRI; @@ -48,13 +49,21 @@ class SIPreAllocateWWMRegs : public MachineFunctionPass { #ifndef NDEBUG void printWWMInfo(const MachineInstr &MI); #endif + bool processDef(MachineOperand &MO); + void rewriteRegs(MachineFunction &MF); + +public: + SIPreAllocateWWMRegs(LiveIntervals *LIS, LiveRegMatrix *Matrix, + VirtRegMap *VRM) + : LIS(LIS), Matrix(Matrix), VRM(VRM) {} + bool run(MachineFunction &MF); +}; +class SIPreAllocateWWMRegsLegacy : public MachineFunctionPass { public: static char ID; - SIPreAllocateWWMRegs() : MachineFunctionPass(ID) { - initializeSIPreAllocateWWMRegsPass(*PassRegistry::getPassRegistry()); - } + SIPreAllocateWWMRegsLegacy() : MachineFunctionPass(ID) {} bool runOnMachineFunction(MachineFunction &MF) override; @@ -65,28 +74,24 @@ class SIPreAllocateWWMRegs : public MachineFunctionPass { AU.setPreservesAll(); MachineFunctionPass::getAnalysisUsage(AU); } - -private: - bool processDef(MachineOperand &MO); - void rewriteRegs(MachineFunction &MF); }; } // End anonymous namespace. -INITIALIZE_PASS_BEGIN(SIPreAllocateWWMRegs, DEBUG_TYPE, - "SI Pre-allocate WWM Registers", false, false) +INITIALIZE_PASS_BEGIN(SIPreAllocateWWMRegsLegacy, DEBUG_TYPE, + "SI Pre-allocate WWM Registers", false, false) INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass) INITIALIZE_PASS_DEPENDENCY(VirtRegMapWrapperLegacy) INITIALIZE_PASS_DEPENDENCY(LiveRegMatrixWrapperLegacy) -INITIALIZE_PASS_END(SIPreAllocateWWMRegs, DEBUG_TYPE, - "SI Pre-allocate WWM Registers", false, false) +INITIALIZE_PASS_END(SIPreAllocateWWMRegsLegacy, DEBUG_TYPE, + "SI Pre-allocate WWM Registers", false, false) -char SIPreAllocateWWMRegs::ID = 0; +char SIPreAllocateWWMRegsLegacy::ID = 0; -char &llvm::SIPreAllocateWWMRegsID = SIPreAllocateWWMRegs::ID; +char &llvm::SIPreAllocateWWMRegsLegacyID = SIPreAllocateWWMRegsLegacy::ID; -FunctionPass *llvm::createSIPreAllocateWWMRegsPass() { - return new SIPreAllocateWWMRegs(); +FunctionPass *llvm::createSIPreAllocateWWMRegsLegacyPass() { + return new SIPreAllocateWWMRegsLegacy(); } bool SIPreAllocateWWMRegs::processDef(MachineOperand &MO) { @@ -184,7 +189,14 @@ SIPreAllocateWWMRegs::printWWMInfo(const MachineInstr &MI) { #endif -bool SIPreAllocateWWMRegs::runOnMachineFunction(MachineFunction &MF) { +bool SIPreAllocateWWMRegsLegacy::runOnMachineFunction(MachineFunction &MF) { + auto *LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS(); + auto *Matrix = &getAnalysis<LiveRegMatrixWrapperLegacy>().getLRM(); + auto *VRM = &getAnalysis<VirtRegMapWrapperLegacy>().getVRM(); + return SIPreAllocateWWMRegs(LIS, Matrix, VRM).run(MF); +} + +bool SIPreAllocateWWMRegs::run(MachineFunction &MF) { LLVM_DEBUG(dbgs() << "SIPreAllocateWWMRegs: function " << MF.getName() << "\n"); const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); @@ -193,10 +205,6 @@ bool SIPreAllocateWWMRegs::runOnMachineFunction(MachineFunction &MF) { TRI = &TII->getRegisterInfo(); MRI = &MF.getRegInfo(); - LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS(); - Matrix = &getAnalysis<LiveRegMatrixWrapperLegacy>().getLRM(); - VRM = &getAnalysis<VirtRegMapWrapperLegacy>().getVRM(); - RegClassInfo.runOnMachineFunction(MF); bool PreallocateSGPRSpillVGPRs = @@ -254,3 +262,13 @@ bool SIPreAllocateWWMRegs::runOnMachineFunction(MachineFunction &MF) { rewriteRegs(MF); return true; } + +PreservedAnalyses +SIPreAllocateWWMRegsPass::run(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM) { + auto *LIS = &MFAM.getResult<LiveIntervalsAnalysis>(MF); + auto *Matrix = &MFAM.getResult<LiveRegMatrixAnalysis>(MF); + auto *VRM = &MFAM.getResult<VirtRegMapAnalysis>(MF); + SIPreAllocateWWMRegs(LIS, Matrix, VRM).run(MF); + return PreservedAnalyses::all(); +} diff --git a/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.h b/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.h new file mode 100644 index 00000000000000..a0acde3afa77ce --- /dev/null +++ b/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.h @@ -0,0 +1,25 @@ +//===--- SIPreAllocateWWMRegs.h -------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_AMDGPU_SIPREALLOCATEWWMREGS_H +#define LLVM_LIB_TARGET_AMDGPU_SIPREALLOCATEWWMREGS_H + +#include "llvm/CodeGen/MachinePassManager.h" + +namespace llvm { + +class SIPreAllocateWWMRegsPass + : public PassInfoMixin<SIPreAllocateWWMRegsPass> { +public: + PreservedAnalyses run(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM); +}; + +} // namespace llvm + +#endif // LLVM_LIB_TARGET_AMDGPU_SIPREALLOCATEWWMREGS_H diff --git a/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-regs.mir b/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-regs.mir index 2ca275cf950564..8d9da95384c0a8 100644 --- a/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-regs.mir +++ b/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-regs.mir @@ -2,7 +2,11 @@ # RUN: llc -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs -run-pass=si-pre-allocate-wwm-regs -o - %s | FileCheck %s # RUN: llc -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs -amdgpu-prealloc-sgpr-spill-vgprs -run-pass=si-pre-allocate-wwm-regs -o - %s | FileCheck %s --check-prefix=CHECK2 +# RUN: llc -mtriple=amdgcn -passes=si-pre-allocate-wwm-regs -o - -mcpu=tahiti %s | FileCheck %s +# RUN: llc -mtriple=amdgcn -verify-machineinstrs -amdgpu-prealloc-sgpr-spill-vgprs -passes=si-pre-allocate-wwm-regs -o - -mcpu=tahiti %s | FileCheck %s --check-prefix=CHECK2 + # COM: auto-generated updates might remove checks for MachineFunctionInfo reserved registers. + --- name: pre_allocate_wwm_regs_strict @@ -21,6 +25,16 @@ body: | ; CHECK-NEXT: dead $vgpr0 = V_MOV_B32_dpp $vgpr0, [[DEF]], 323, 12, 15, 0, implicit $exec ; CHECK-NEXT: $exec = EXIT_STRICT_WWM killed renamable $sgpr4_sgpr5 ; CHECK-NEXT: dead [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]] + ; + ; CHECK2-LABEL: name: pre_allocate_wwm_regs_strict + ; CHECK2: liveins: $sgpr1 + ; CHECK2-NEXT: {{ $}} + ; CHECK2-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK2-NEXT: renamable $sgpr4_sgpr5 = ENTER_STRICT_WWM -1, implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK2-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec + ; CHECK2-NEXT: dead $vgpr0 = V_MOV_B32_dpp $vgpr0, [[DEF]], 323, 12, 15, 0, implicit $exec + ; CHECK2-NEXT: $exec = EXIT_STRICT_WWM killed renamable $sgpr4_sgpr5 + ; CHECK2-NEXT: dead [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]] %0:vgpr_32 = IMPLICIT_DEF renamable $sgpr4_sgpr5 = ENTER_STRICT_WWM -1, implicit-def $exec, implicit-def $scc, implicit $exec %1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec @@ -35,6 +49,13 @@ tracksRegLiveness: true body: | bb.0: liveins: $sgpr1 + ; CHECK-LABEL: name: pre_allocate_wwm_spill_to_vgpr + ; CHECK: liveins: $sgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: dead [[SI_SPILL_S32_TO_VGPR:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr1, 0, [[DEF]] + ; CHECK-NEXT: dead [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]] + ; ; CHECK2-LABEL: name: pre_allocate_wwm_spill_to_vgpr ; CHECK2: wwmReservedRegs: ; CHECK2-NEXT: - '$vgpr0' >From 127ae2652242f4270052d89143d411e9fe506ef4 Mon Sep 17 00:00:00 2001 From: Akshat Oke <akshat....@amd.com> Date: Mon, 7 Oct 2024 09:23:15 +0000 Subject: [PATCH 2/2] C++ mode --- llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.h b/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.h index a0acde3afa77ce..99648176491687 100644 --- a/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.h +++ b/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.h @@ -1,4 +1,4 @@ -//===--- SIPreAllocateWWMRegs.h -------------------------------------------===// +//===--- SIPreAllocateWWMRegs.h ---------------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits