https://github.com/Akshat-Oke updated https://github.com/llvm/llvm-project/pull/109939
>From 646d2d1a54ca0ac3bc312f4038826fb431890bf6 Mon Sep 17 00:00:00 2001 From: Akshat Oke <akshat....@amd.com> Date: Tue, 24 Sep 2024 11:41:18 +0000 Subject: [PATCH] [NewPM][AMDGPU] Port SIPreAllocateWWMRegs to NPM --- .../llvm/Passes/MachinePassRegistry.def | 4 +- llvm/lib/Target/AMDGPU/AMDGPU.h | 6 +- .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 6 +- .../Target/AMDGPU/SIPreAllocateWWMRegs.cpp | 60 ++++++++++++------- llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.h | 30 ++++++++++ .../AMDGPU/si-pre-allocate-wwm-regs.mir | 26 ++++++++ .../si-pre-allocate-wwm-sgpr-spills.mir | 21 +++++++ 7 files changed, 124 insertions(+), 29 deletions(-) create mode 100644 llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.h create mode 100644 llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-regs.mir create mode 100644 llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-sgpr-spills.mir diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def index bdc56ca03f392a..72e2cf232bfd17 100644 --- a/llvm/include/llvm/Passes/MachinePassRegistry.def +++ b/llvm/include/llvm/Passes/MachinePassRegistry.def @@ -96,6 +96,7 @@ LOOP_PASS("loop-term-fold", LoopTermFoldPass()) // computed. (We still either need to regenerate kill flags after regalloc, or // preferably fix the scavenger to not depend on them). MACHINE_FUNCTION_ANALYSIS("live-intervals", LiveIntervalsAnalysis()) +MACHINE_FUNCTION_ANALYSIS("live-reg-matrix", LiveRegMatrixAnalysis()) MACHINE_FUNCTION_ANALYSIS("live-vars", LiveVariablesAnalysis()) MACHINE_FUNCTION_ANALYSIS("machine-block-freq", MachineBlockFrequencyAnalysis()) MACHINE_FUNCTION_ANALYSIS("machine-branch-prob", @@ -122,8 +123,7 @@ MACHINE_FUNCTION_ANALYSIS("virtregmap", VirtRegMapAnalysis()) // MachineRegionInfoPassAnalysis()) // MACHINE_FUNCTION_ANALYSIS("machine-trace-metrics", // MachineTraceMetricsAnalysis()) MACHINE_FUNCTION_ANALYSIS("reaching-def", -// ReachingDefAnalysisAnalysis()) MACHINE_FUNCTION_ANALYSIS("live-reg-matrix", -// LiveRegMatrixAnalysis()) MACHINE_FUNCTION_ANALYSIS("gc-analysis", +// ReachingDefAnalysisAnalysis()) MACHINE_FUNCTION_ANALYSIS("gc-analysis", // GCMachineCodeAnalysisPass()) #undef MACHINE_FUNCTION_ANALYSIS diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index b2dd354e496a2e..c0fd5e4625895a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -49,7 +49,7 @@ FunctionPass *createSIFixSGPRCopiesLegacyPass(); FunctionPass *createLowerWWMCopiesPass(); FunctionPass *createSIMemoryLegalizerPass(); FunctionPass *createSIInsertWaitcntsPass(); -FunctionPass *createSIPreAllocateWWMRegsPass(); +FunctionPass *createSIPreAllocateWWMRegsLegacyPass(); FunctionPass *createSIFormMemoryClausesPass(); FunctionPass *createSIPostRABundlerPass(); @@ -208,8 +208,8 @@ extern char &SILateBranchLoweringPassID; void initializeSIOptimizeExecMaskingPass(PassRegistry &); extern char &SIOptimizeExecMaskingID; -void initializeSIPreAllocateWWMRegsPass(PassRegistry &); -extern char &SIPreAllocateWWMRegsID; +void initializeSIPreAllocateWWMRegsLegacyPass(PassRegistry &); +extern char &SIPreAllocateWWMRegsLegacyID; void initializeAMDGPUImageIntrinsicOptimizerPass(PassRegistry &); extern char &AMDGPUImageIntrinsicOptimizerID; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 04fdee0819b502..a39293863d1c54 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -461,7 +461,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() { initializeSILateBranchLoweringPass(*PR); initializeSIMemoryLegalizerPass(*PR); initializeSIOptimizeExecMaskingPass(*PR); - initializeSIPreAllocateWWMRegsPass(*PR); + initializeSIPreAllocateWWMRegsLegacyPass(*PR); initializeSIFormMemoryClausesPass(*PR); initializeSIPostRABundlerPass(*PR); initializeGCNCreateVOPDPass(*PR); @@ -1443,7 +1443,7 @@ bool GCNPassConfig::addRegAssignAndRewriteFast() { // Equivalent of PEI for SGPRs. addPass(&SILowerSGPRSpillsLegacyID); - addPass(&SIPreAllocateWWMRegsID); + addPass(&SIPreAllocateWWMRegsLegacyID); addPass(createVGPRAllocPass(false)); @@ -1467,7 +1467,7 @@ bool GCNPassConfig::addRegAssignAndRewriteOptimized() { // Equivalent of PEI for SGPRs. addPass(&SILowerSGPRSpillsLegacyID); - addPass(&SIPreAllocateWWMRegsID); + addPass(&SIPreAllocateWWMRegsLegacyID); addPass(createVGPRAllocPass(true)); diff --git a/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp b/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp index 0635cab7b872e2..c1d7a464a81537 100644 --- a/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp +++ b/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "SIPreAllocateWWMRegs.h" #include "AMDGPU.h" #include "GCNSubtarget.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" @@ -34,7 +35,7 @@ static cl::opt<bool> namespace { -class SIPreAllocateWWMRegs : public MachineFunctionPass { +class SIPreAllocateWWMRegs { private: const SIInstrInfo *TII; const SIRegisterInfo *TRI; @@ -48,13 +49,21 @@ class SIPreAllocateWWMRegs : public MachineFunctionPass { #ifndef NDEBUG void printWWMInfo(const MachineInstr &MI); #endif + bool processDef(MachineOperand &MO); + void rewriteRegs(MachineFunction &MF); + +public: + SIPreAllocateWWMRegs(LiveIntervals *LIS, LiveRegMatrix *Matrix, + VirtRegMap *VRM) + : LIS(LIS), Matrix(Matrix), VRM(VRM) {} + bool run(MachineFunction &MF); +}; +class SIPreAllocateWWMRegsLegacy : public MachineFunctionPass { public: static char ID; - SIPreAllocateWWMRegs() : MachineFunctionPass(ID) { - initializeSIPreAllocateWWMRegsPass(*PassRegistry::getPassRegistry()); - } + SIPreAllocateWWMRegsLegacy() : MachineFunctionPass(ID) {} bool runOnMachineFunction(MachineFunction &MF) override; @@ -65,28 +74,24 @@ class SIPreAllocateWWMRegs : public MachineFunctionPass { AU.setPreservesAll(); MachineFunctionPass::getAnalysisUsage(AU); } - -private: - bool processDef(MachineOperand &MO); - void rewriteRegs(MachineFunction &MF); }; } // End anonymous namespace. -INITIALIZE_PASS_BEGIN(SIPreAllocateWWMRegs, DEBUG_TYPE, - "SI Pre-allocate WWM Registers", false, false) +INITIALIZE_PASS_BEGIN(SIPreAllocateWWMRegsLegacy, DEBUG_TYPE, + "SI Pre-allocate WWM Registers", false, false) INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass) INITIALIZE_PASS_DEPENDENCY(VirtRegMapWrapperPass) INITIALIZE_PASS_DEPENDENCY(LiveRegMatrixWrapperPass) -INITIALIZE_PASS_END(SIPreAllocateWWMRegs, DEBUG_TYPE, - "SI Pre-allocate WWM Registers", false, false) +INITIALIZE_PASS_END(SIPreAllocateWWMRegsLegacy, DEBUG_TYPE, + "SI Pre-allocate WWM Registers", false, false) -char SIPreAllocateWWMRegs::ID = 0; +char SIPreAllocateWWMRegsLegacy::ID = 0; -char &llvm::SIPreAllocateWWMRegsID = SIPreAllocateWWMRegs::ID; +char &llvm::SIPreAllocateWWMRegsLegacyID = SIPreAllocateWWMRegsLegacy::ID; -FunctionPass *llvm::createSIPreAllocateWWMRegsPass() { - return new SIPreAllocateWWMRegs(); +FunctionPass *llvm::createSIPreAllocateWWMRegsLegacyPass() { + return new SIPreAllocateWWMRegsLegacy(); } bool SIPreAllocateWWMRegs::processDef(MachineOperand &MO) { @@ -184,7 +189,14 @@ SIPreAllocateWWMRegs::printWWMInfo(const MachineInstr &MI) { #endif -bool SIPreAllocateWWMRegs::runOnMachineFunction(MachineFunction &MF) { +bool SIPreAllocateWWMRegsLegacy::runOnMachineFunction(MachineFunction &MF) { + auto *LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS(); + auto *Matrix = &getAnalysis<LiveRegMatrixWrapperPass>().getLRM(); + auto *VRM = &getAnalysis<VirtRegMapWrapperPass>().getVRM(); + return SIPreAllocateWWMRegs(LIS, Matrix, VRM).run(MF); +} + +bool SIPreAllocateWWMRegs::run(MachineFunction &MF) { LLVM_DEBUG(dbgs() << "SIPreAllocateWWMRegs: function " << MF.getName() << "\n"); const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); @@ -193,10 +205,6 @@ bool SIPreAllocateWWMRegs::runOnMachineFunction(MachineFunction &MF) { TRI = &TII->getRegisterInfo(); MRI = &MF.getRegInfo(); - LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS(); - Matrix = &getAnalysis<LiveRegMatrixWrapperPass>().getLRM(); - VRM = &getAnalysis<VirtRegMapWrapperPass>().getVRM(); - RegClassInfo.runOnMachineFunction(MF); bool PreallocateSGPRSpillVGPRs = @@ -254,3 +262,13 @@ bool SIPreAllocateWWMRegs::runOnMachineFunction(MachineFunction &MF) { rewriteRegs(MF); return true; } + +PreservedAnalyses +SIPreAllocateWWMRegsPass::run(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM) { + auto *LIS = &MFAM.getResult<LiveIntervalsAnalysis>(MF); + auto *Matrix = &MFAM.getResult<LiveRegMatrixAnalysis>(MF); + auto *VRM = &MFAM.getResult<VirtRegMapAnalysis>(MF); + SIPreAllocateWWMRegs(LIS, Matrix, VRM).run(MF); + return PreservedAnalyses::all(); +} diff --git a/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.h b/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.h new file mode 100644 index 00000000000000..b86f7fe9213af0 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.h @@ -0,0 +1,30 @@ +//===--- SIPreAllocateWWMRegs.h -------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_AMDGPU_SIPREALLOCATEWWMREGS_H +#define LLVM_LIB_TARGET_AMDGPU_SIPREALLOCATEWWMREGS_H + +#include "llvm/CodeGen/MachinePassManager.h" + +namespace llvm { + +class SIPreAllocateWWMRegsPass + : public PassInfoMixin<SIPreAllocateWWMRegsPass> { +public: + PreservedAnalyses run(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM); + + MachineFunctionProperties getRequiredProperties() { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::IsSSA); + } +}; + +} // namespace llvm + +#endif // LLVM_LIB_TARGET_AMDGPU_SIPREALLOCATEWWMREGS_H diff --git a/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-regs.mir b/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-regs.mir new file mode 100644 index 00000000000000..f2db299f575f5e --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-regs.mir @@ -0,0 +1,26 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn -verify-machineinstrs -run-pass=si-pre-allocate-wwm-regs -o - -mcpu=tahiti %s | FileCheck %s + +--- + +name: pre_allocate_wwm_regs_strict +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr1 + ; CHECK-LABEL: name: pre_allocate_wwm_regs_strict + ; CHECK: liveins: $sgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: renamable $sgpr4_sgpr5 = ENTER_STRICT_WWM -1, implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec + ; CHECK-NEXT: dead $vgpr0 = V_MOV_B32_dpp $vgpr0, [[DEF]], 323, 12, 15, 0, implicit $exec + ; CHECK-NEXT: $exec = EXIT_STRICT_WWM killed renamable $sgpr4_sgpr5 + ; CHECK-NEXT: dead [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]] + %0:vgpr_32 = IMPLICIT_DEF + renamable $sgpr4_sgpr5 = ENTER_STRICT_WWM -1, implicit-def $exec, implicit-def $scc, implicit $exec + %24:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + %25:vgpr_32 = V_MOV_B32_dpp %24:vgpr_32(tied-def 0), %0:vgpr_32, 323, 12, 15, 0, implicit $exec + $exec = EXIT_STRICT_WWM killed renamable $sgpr4_sgpr5 + %2:vgpr_32 = COPY %0:vgpr_32 +... diff --git a/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-sgpr-spills.mir b/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-sgpr-spills.mir new file mode 100644 index 00000000000000..bb42900f3cf52a --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-sgpr-spills.mir @@ -0,0 +1,21 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn -verify-machineinstrs -amdgpu-prealloc-sgpr-spill-vgprs -run-pass=si-pre-allocate-wwm-regs -o - -mcpu=tahiti %s | FileCheck %s +# This +--- + +name: pre_allocate_wwm_spill_to_vgpr +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr1 + ; CHECK-LABEL: name: pre_allocate_wwm_spill_to_vgpr + ; CHECK: liveins: $sgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: dead $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr1, 0, [[DEF]] + ; CHECK-NEXT: dead [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]] + %0:vgpr_32 = IMPLICIT_DEF + %23:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr1, 0, %0:vgpr_32 + %2:vgpr_32 = COPY %0:vgpr_32 +... + _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits