https://github.com/optimisan updated https://github.com/llvm/llvm-project/pull/120557
>From e968c0891474c9145a32bdb51465588bd9bab3ea Mon Sep 17 00:00:00 2001 From: Akshat Oke <akshat....@amd.com> Date: Wed, 11 Dec 2024 10:57:21 +0000 Subject: [PATCH 1/4] [RegAlloc][NewPM] Plug Greedy RA in codegen pipeline --- llvm/include/llvm/Passes/CodeGenPassBuilder.h | 18 +++++++++++++++- .../llvm/Passes/MachinePassRegistry.def | 4 ++-- .../include/llvm/Target/CGPassBuilderOption.h | 2 +- llvm/lib/Passes/PassBuilder.cpp | 13 ++++++++++++ ...plicit-def-remat-requires-impdef-check.mir | 1 + ...implicit-def-with-impdef-greedy-assert.mir | 1 + llvm/test/CodeGen/AArch64/pr51516.mir | 1 + llvm/test/CodeGen/AArch64/spill-fold.mir | 2 ++ .../extend-phi-subrange-not-in-parent.mir | 1 + llvm/test/CodeGen/MIR/Generic/runPass.mir | 1 + .../SystemZ/clear-liverange-spillreg.mir | 1 + llvm/test/CodeGen/Thumb/high-reg-clobber.mir | 1 + llvm/test/CodeGen/X86/limit-split-cost.mir | 1 + .../test/tools/llc/new-pm/regalloc-amdgpu.mir | 17 +++++++++------ llvm/tools/llc/NewPMDriver.cpp | 21 +++++++++++++++---- 15 files changed, 71 insertions(+), 14 deletions(-) diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h index aca9b3b888acc3..971217923f7ef1 100644 --- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h +++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h @@ -1056,7 +1056,7 @@ void CodeGenPassBuilder<Derived, TargetMachineT>::addMachineSSAOptimization( /// /// A target that uses the standard regalloc pass order for fast or optimized /// allocation may still override this for per-target regalloc -/// selection. But -regalloc=... always takes precedence. +/// selection. But -regalloc-npm=... always takes precedence. template <typename Derived, typename TargetMachineT> void CodeGenPassBuilder<Derived, TargetMachineT>::addTargetRegisterAllocator( AddMachinePass &addPass, bool Optimized) const { @@ -1073,6 +1073,22 @@ template <typename Derived, typename TargetMachineT> void CodeGenPassBuilder<Derived, TargetMachineT>::addRegAllocPass( AddMachinePass &addPass, bool Optimized) const { // TODO: Parse Opt.RegAlloc to add register allocator. + // Use the specified -regalloc-npm={basic|greedy|fast|pbqp} + if (Opt.RegAlloc > RegAllocType::Default) { + switch (Opt.RegAlloc) { + case RegAllocType::Fast: + addPass(RegAllocFastPass()); + break; + case RegAllocType::Greedy: + addPass(RAGreedyPass()); + break; + default: + llvm_unreachable("Register allocator not supported yet."); + } + return; + } + // -regalloc=default or unspecified, so pick based on the optimization level. + derived().addTargetRegisterAllocator(addPass, Optimized); } template <typename Derived, typename TargetMachineT> diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def index fa7f769f31fdde..1c89fb0eb8dbb1 100644 --- a/llvm/include/llvm/Passes/MachinePassRegistry.def +++ b/llvm/include/llvm/Passes/MachinePassRegistry.def @@ -188,12 +188,12 @@ MACHINE_FUNCTION_PASS_WITH_PARAMS( }, "filter=reg-filter;no-clear-vregs") +// 'all' is the default filter MACHINE_FUNCTION_PASS_WITH_PARAMS( "greedy", "RAGreedyPass", [](RAGreedyPass::Options Opts) { return RAGreedyPass(Opts); }, [PB = this](StringRef Params) { - // TODO: parseRegAllocGreedyFilterFunc(*PB, Params); - return Expected<RAGreedyPass::Options>(RAGreedyPass::Options{}); + return parseRegAllocGreedyFilterFunc(*PB, Params); }, "reg-filter" ) #undef MACHINE_FUNCTION_PASS_WITH_PARAMS diff --git a/llvm/include/llvm/Target/CGPassBuilderOption.h b/llvm/include/llvm/Target/CGPassBuilderOption.h index d3d19c8a7dc9f2..c7c1572bcde603 100644 --- a/llvm/include/llvm/Target/CGPassBuilderOption.h +++ b/llvm/include/llvm/Target/CGPassBuilderOption.h @@ -52,7 +52,7 @@ struct CGPassBuilderOption { bool RequiresCodeGenSCCOrder = false; RunOutliner EnableMachineOutliner = RunOutliner::TargetDefault; - StringRef RegAlloc = "default"; + RegAllocType RegAlloc = RegAllocType::Default; std::optional<GlobalISelAbortMode> EnableGlobalISelAbort; std::string FSProfileFile; std::string FSRemappingFile; diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 3a9db2dbd59226..769d3b0a20f964 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -1316,6 +1316,19 @@ parseBoundsCheckingOptions(StringRef Params) { return Options; } +Expected<RAGreedyPass::Options> parseRegAllocGreedyFilterFunc(PassBuilder &PB, StringRef Params) { + if (Params.empty() || Params == "all") { + return RAGreedyPass::Options(); + } + std::optional<RegAllocFilterFunc> Filter = PB.parseRegAllocFilter(Params); + if (!Filter) { + return make_error<StringError>( + formatv("invalid regallocgreedy register filter '{0}' ", Params).str(), + inconvertibleErrorCode()); + } + return RAGreedyPass::Options{*Filter, Params}; +} + } // namespace /// Tests whether a pass name starts with a valid prefix for a default pipeline diff --git a/llvm/test/CodeGen/AArch64/implicit-def-remat-requires-impdef-check.mir b/llvm/test/CodeGen/AArch64/implicit-def-remat-requires-impdef-check.mir index 47aa34e3c01156..a168c2891c7d6f 100644 --- a/llvm/test/CodeGen/AArch64/implicit-def-remat-requires-impdef-check.mir +++ b/llvm/test/CodeGen/AArch64/implicit-def-remat-requires-impdef-check.mir @@ -1,5 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4 # RUN: llc -mtriple=arm64-apple-macosx -mcpu=apple-m1 -stress-regalloc=4 -verify-regalloc -run-pass=greedy -o - %s | FileCheck %s +# RUN: llc -mtriple=arm64-apple-macosx -mcpu=apple-m1 -stress-regalloc=4 -verify-regalloc -passes=regallocgreedy -o - %s | FileCheck %s --- | define void @inst_stores_to_dead_spill_implicit_def_impdef() { diff --git a/llvm/test/CodeGen/AArch64/implicit-def-with-impdef-greedy-assert.mir b/llvm/test/CodeGen/AArch64/implicit-def-with-impdef-greedy-assert.mir index a5d74ef75f0a0a..d9edda47638a3f 100644 --- a/llvm/test/CodeGen/AArch64/implicit-def-with-impdef-greedy-assert.mir +++ b/llvm/test/CodeGen/AArch64/implicit-def-with-impdef-greedy-assert.mir @@ -1,5 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3 # RUN: llc -mtriple=arm64-apple-ios -run-pass=greedy -o - %s | FileCheck %s +# RUN: llc -mtriple=arm64-apple-ios -passes=regallocgreedy -o - %s | FileCheck %s --- name: widget diff --git a/llvm/test/CodeGen/AArch64/pr51516.mir b/llvm/test/CodeGen/AArch64/pr51516.mir index 910bfb858b50f6..e84f0ca2015ce5 100644 --- a/llvm/test/CodeGen/AArch64/pr51516.mir +++ b/llvm/test/CodeGen/AArch64/pr51516.mir @@ -1,4 +1,5 @@ # RUN: llc -mtriple=aarch64-unknown-fuchsia -run-pass=greedy -verify-machineinstrs -o - %s | FileCheck %s +# RUN: llc -mtriple=aarch64-unknown-fuchsia -passes=regallocgreedy -verify-machineinstrs -o - %s | FileCheck %s # Check that we spill %31 and do not rematerialize it since the use operand # of ADDXri is killed by the STRXui in this block. diff --git a/llvm/test/CodeGen/AArch64/spill-fold.mir b/llvm/test/CodeGen/AArch64/spill-fold.mir index b1e7ebe3a7e82b..2773b5f19618a9 100644 --- a/llvm/test/CodeGen/AArch64/spill-fold.mir +++ b/llvm/test/CodeGen/AArch64/spill-fold.mir @@ -1,5 +1,7 @@ # RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass greedy -verify-machineinstrs -o - %s | FileCheck %s # RUN: llc -mtriple=aarch64_be-none-linux-gnu -run-pass greedy -verify-machineinstrs -o - %s | FileCheck %s +# RUN: llc -mtriple=aarch64-none-linux-gnu -passes=regallocgreedy -o - %s | FileCheck %s +# RUN: llc -mtriple=aarch64_be-none-linux-gnu -passes=regallocgreedy -o - %s | FileCheck %s --- | define i64 @test_subreg_spill_fold() { ret i64 0 } define i64 @test_subreg_spill_fold2() { ret i64 0 } diff --git a/llvm/test/CodeGen/AMDGPU/extend-phi-subrange-not-in-parent.mir b/llvm/test/CodeGen/AMDGPU/extend-phi-subrange-not-in-parent.mir index 760ae6032230f5..42bba4d1504013 100644 --- a/llvm/test/CodeGen/AMDGPU/extend-phi-subrange-not-in-parent.mir +++ b/llvm/test/CodeGen/AMDGPU/extend-phi-subrange-not-in-parent.mir @@ -1,5 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-regalloc -run-pass=greedy -o - %s | FileCheck %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-regalloc -passes=regallocgreedy -o - %s | FileCheck %s # Initially %2 starts out with 2 subranges (one for sub0, and one for # the rest of the lanes). After %2 is split, after refineSubRanges the diff --git a/llvm/test/CodeGen/MIR/Generic/runPass.mir b/llvm/test/CodeGen/MIR/Generic/runPass.mir index 75763c5389b09e..41dd98ff909b0c 100644 --- a/llvm/test/CodeGen/MIR/Generic/runPass.mir +++ b/llvm/test/CodeGen/MIR/Generic/runPass.mir @@ -2,6 +2,7 @@ # RUN: llc -run-pass=regallocbasic -debug-pass=Arguments -o - %s | FileCheck %s # RUN: llc -run-pass=regallocfast -debug-pass=Arguments -o - %s | FileCheck %s # RUN: llc -passes=regallocfast -o - %s | FileCheck %s +# RUN: llc -passes=regallocgreedy -o - %s | FileCheck %s # Check that passes are initialized correctly, so that it's possible to # use -run-pass. diff --git a/llvm/test/CodeGen/SystemZ/clear-liverange-spillreg.mir b/llvm/test/CodeGen/SystemZ/clear-liverange-spillreg.mir index 197c3d8551fc38..de0db97f14bf3c 100644 --- a/llvm/test/CodeGen/SystemZ/clear-liverange-spillreg.mir +++ b/llvm/test/CodeGen/SystemZ/clear-liverange-spillreg.mir @@ -1,4 +1,5 @@ #RUN: llc -o - %s -mtriple=s390x-ibm-linux -run-pass=greedy +#RUN: llc -o - %s -mtriple=s390x-ibm-linux -passes=regallocgreedy #PR34502. Check HoistSpill works properly after the live range of spilled #virtual register is cleared. --- | diff --git a/llvm/test/CodeGen/Thumb/high-reg-clobber.mir b/llvm/test/CodeGen/Thumb/high-reg-clobber.mir index 1402c7c2cbca36..e085e38ae5fe31 100644 --- a/llvm/test/CodeGen/Thumb/high-reg-clobber.mir +++ b/llvm/test/CodeGen/Thumb/high-reg-clobber.mir @@ -3,6 +3,7 @@ # RUN: llc -mtriple thumbv6m-arm-none-eabi -run-pass greedy %s -o - | FileCheck %s # RUN: llc -mtriple thumbv6m-arm-none-eabi -run-pass regallocfast %s -o - | FileCheck %s --check-prefix=FAST # RUN: llc -mtriple thumbv6m-arm-none-eabi -passes=regallocfast %s -o - | FileCheck %s --check-prefix=FAST +# RUN: llc -mtriple thumbv6m-arm-none-eabi -passes=regallocgreedy %s -o - | FileCheck %s ... --- diff --git a/llvm/test/CodeGen/X86/limit-split-cost.mir b/llvm/test/CodeGen/X86/limit-split-cost.mir index 7ec0404e0f737c..eec8a3939151a6 100644 --- a/llvm/test/CodeGen/X86/limit-split-cost.mir +++ b/llvm/test/CodeGen/X86/limit-split-cost.mir @@ -1,5 +1,6 @@ # REQUIRES: asserts # RUN: llc -mtriple=x86_64-- -run-pass=greedy %s -debug-only=regalloc -huge-size-for-split=0 -o /dev/null 2>&1 | FileCheck %s +# RUN: llc -mtriple=x86_64-- -passes=regallocgreedy %s -debug-only=regalloc -huge-size-for-split=0 -o /dev/null 2>&1 | FileCheck %s # Check no global region split is needed because the live range to split is trivially rematerializable. # CHECK-NOT: Compact region bundles --- | diff --git a/llvm/test/tools/llc/new-pm/regalloc-amdgpu.mir b/llvm/test/tools/llc/new-pm/regalloc-amdgpu.mir index 07f2d350ffd9c0..66c9d8942f3da4 100644 --- a/llvm/test/tools/llc/new-pm/regalloc-amdgpu.mir +++ b/llvm/test/tools/llc/new-pm/regalloc-amdgpu.mir @@ -1,12 +1,17 @@ # REQUIRES: amdgpu-registered-target -# RUN: llc -mtriple=amdgcn --passes='regallocfast<filter=sgpr>,regallocfast<filter=wwm>,regallocfast<filter=vgpr>' --print-pipeline-passes --filetype=null %s | FileCheck %s --check-prefix=PASS -# RUN: not llc -mtriple=amdgcn --passes='regallocfast<filter=bad-filter>' --print-pipeline-passes --filetype=null %s 2>&1 | FileCheck %s --check-prefix=BAD-FILTER +# RUN: llc -mtriple=amdgcn --passes='regallocfast<filter=sgpr>,regallocfast<filter=wwm>,regallocfast<filter=vgpr>' --print-pipeline-passes --filetype=null %s | FileCheck %s --check-prefix=RAFAST +# RUN: not llc -mtriple=amdgcn --passes='regallocfast<filter=bad-filter>' --print-pipeline-passes --filetype=null %s 2>&1 | FileCheck %s --check-prefix=RAFAST-BAD-FILTER -# PASS: regallocfast<filter=sgpr> -# PASS: regallocfast<filter=wwm> -# PASS: regallocfast<filter=vgpr> -# BAD-FILTER: invalid regallocfast register filter 'bad-filter' +# RUN: llc -mtriple=amdgcn -passes='regallocgreedy<sgpr>' --print-pipeline-passes --filetype=null %s | FileCheck %s --check-prefix=RAGREEDY +# RUN: not llc -mtriple=amdgcn -passes='regallocgreedy<bad-filter>' --print-pipeline-passes --filetype=null %s 2>&1 | FileCheck %s --check-prefix=RAGREEDY-BAD-FILTER +# RAFAST: regallocfast<filter=sgpr> +# RAFAST: regallocfast<filter=wwm> +# RAFAST: regallocfast<filter=vgpr> +# RAFAST-BAD-FILTER: invalid regallocfast register filter 'bad-filter' + +# RAGREEDY: regallocgreedy<sgpr> +# RAGREEDY-BAD-FILTER: invalid regallocgreedy register filter 'bad-filter' --- name: f ... diff --git a/llvm/tools/llc/NewPMDriver.cpp b/llvm/tools/llc/NewPMDriver.cpp index 3892fbb8c74f78..06b7fa7ac351b0 100644 --- a/llvm/tools/llc/NewPMDriver.cpp +++ b/llvm/tools/llc/NewPMDriver.cpp @@ -48,10 +48,23 @@ using namespace llvm; -static cl::opt<std::string> - RegAlloc("regalloc-npm", - cl::desc("Register allocator to use for new pass manager"), - cl::Hidden, cl::init("default")); +// static cl::opt<std::string> +// RegAlloc("regalloc-npm", +// cl::desc("Register allocator to use for new pass manager"), +// cl::Hidden, cl::init("default")); + +// create option for RegAllocType enum +static cl::opt<RegAllocType> RegAlloc( + "regalloc-npm", cl::desc("Register allocator to use for new pass manager"), + cl::Hidden, cl::init(RegAllocType::Default), + cl::values( + clEnumValN(RegAllocType::Default, "default", + "Default register allocator"), + clEnumValN(RegAllocType::PBQP, "pbqp", "PBQP register allocator"), + clEnumValN(RegAllocType::Fast, "fast", "Fast register allocator"), + clEnumValN(RegAllocType::Basic, "basic", "Basic register allocator"), + clEnumValN(RegAllocType::Greedy, "greedy", + "Greedy register allocator"))); static cl::opt<bool> DebugPM("debug-pass-manager", cl::Hidden, >From 44af111fd4983268dcc3ba3ae48cae8accc4ca1f Mon Sep 17 00:00:00 2001 From: Akshat Oke <akshat....@amd.com> Date: Thu, 19 Dec 2024 11:00:46 +0000 Subject: [PATCH 2/4] Error out on AMDGPU for regalloc-npm flag --- llvm/include/llvm/Passes/CodeGenPassBuilder.h | 29 ++++++++++--------- .../include/llvm/Target/CGPassBuilderOption.h | 4 +-- .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 23 +++++++++++++++ llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h | 2 ++ .../CodeGen/AMDGPU/sgpr-regalloc-flags.ll | 3 ++ llvm/tools/llc/NewPMDriver.cpp | 2 +- 6 files changed, 47 insertions(+), 16 deletions(-) diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h index 971217923f7ef1..b0083aa6e58afa 100644 --- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h +++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h @@ -1083,7 +1083,7 @@ void CodeGenPassBuilder<Derived, TargetMachineT>::addRegAllocPass( addPass(RAGreedyPass()); break; default: - llvm_unreachable("Register allocator not supported yet."); + report_fatal_error("Register allocator not supported yet.", false); } return; } @@ -1159,20 +1159,23 @@ void CodeGenPassBuilder<Derived, TargetMachineT>::addOptimizedRegAlloc( // PreRA instruction scheduling. addPass(MachineSchedulerPass()); - if (derived().addRegAssignmentOptimized(addPass)) { - // Allow targets to expand pseudo instructions depending on the choice of - // registers before MachineCopyPropagation. - derived().addPostRewrite(addPass); + if (auto E = derived().addRegAssignmentOptimized(addPass)) { + // addRegAssignmentOptimized did not add a reg alloc pass, so do nothing. + // FIXME: This is not really an error. + return; + } + // Allow targets to expand pseudo instructions depending on the choice of + // registers before MachineCopyPropagation. + derived().addPostRewrite(addPass); - // Copy propagate to forward register uses and try to eliminate COPYs that - // were not coalesced. - addPass(MachineCopyPropagationPass()); + // Copy propagate to forward register uses and try to eliminate COPYs that + // were not coalesced. + addPass(MachineCopyPropagationPass()); - // Run post-ra machine LICM to hoist reloads / remats. - // - // FIXME: can this move into MachineLateOptimization? - addPass(MachineLICMPass()); - } + // Run post-ra machine LICM to hoist reloads / remats. + // + // FIXME: can this move into MachineLateOptimization? + addPass(MachineLICMPass()); } //===---------------------------------------------------------------------===// diff --git a/llvm/include/llvm/Target/CGPassBuilderOption.h b/llvm/include/llvm/Target/CGPassBuilderOption.h index c7c1572bcde603..b496a9f66296f2 100644 --- a/llvm/include/llvm/Target/CGPassBuilderOption.h +++ b/llvm/include/llvm/Target/CGPassBuilderOption.h @@ -20,7 +20,7 @@ namespace llvm { enum class RunOutliner { TargetDefault, AlwaysOutline, NeverOutline }; -enum class RegAllocType { Default, Basic, Fast, Greedy, PBQP }; +enum class RegAllocType { Unset, Default, Basic, Fast, Greedy, PBQP }; // Not one-on-one but mostly corresponding to commandline options in // TargetPassConfig.cpp. @@ -52,7 +52,7 @@ struct CGPassBuilderOption { bool RequiresCodeGenSCCOrder = false; RunOutliner EnableMachineOutliner = RunOutliner::TargetDefault; - RegAllocType RegAlloc = RegAllocType::Default; + RegAllocType RegAlloc = RegAllocType::Unset; std::optional<GlobalISelAbortMode> EnableGlobalISelAbort; std::string FSProfileFile; std::string FSRemappingFile; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 7256eec89008a5..952bf479827f48 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -68,6 +68,7 @@ #include "llvm/MC/TargetRegistry.h" #include "llvm/Passes/PassBuilder.h" #include "llvm/Support/FormatVariadic.h" +#include "llvm/Target/CGPassBuilderOption.h" #include "llvm/Transforms/HipStdPar/HipStdPar.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/AlwaysInliner.h" @@ -2099,6 +2100,28 @@ void AMDGPUCodeGenPassBuilder::addMachineSSAOptimization( addPass(SIShrinkInstructionsPass()); } +static const char RegAllocNPMNotSupportedMessage[] = + "-regalloc-npm not supported with amdgcn. Use -sgpr-regalloc-npm, " + "-wwm-regalloc-npm, and -vgpr-regalloc-npm"; + +Error AMDGPUCodeGenPassBuilder::addRegAssignmentOptimized( + AddMachinePass &addPass) const { + if (Opt.RegAlloc != RegAllocType::Unset) + report_fatal_error(RegAllocNPMNotSupportedMessage, false); + + return make_error<StringError>("not implemented yet", + inconvertibleErrorCode()); +} + +Error AMDGPUCodeGenPassBuilder::addRegAssignmentFast( + AddMachinePass &addPass) const { + if (Opt.RegAlloc != RegAllocType::Unset) + report_fatal_error(RegAllocNPMNotSupportedMessage, false); + + return make_error<StringError>("not implemented yet", + inconvertibleErrorCode()); +} + bool AMDGPUCodeGenPassBuilder::isPassEnabled(const cl::opt<bool> &Opt, CodeGenOptLevel Level) const { if (Opt.getNumOccurrences()) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h index 5ba58a92621edb..197476a0f80574 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h @@ -176,6 +176,8 @@ class AMDGPUCodeGenPassBuilder void addAsmPrinter(AddMachinePass &, CreateMCStreamer) const; Error addInstSelector(AddMachinePass &) const; void addMachineSSAOptimization(AddMachinePass &) const; + Error addRegAssignmentOptimized(AddMachinePass &) const; + Error addRegAssignmentFast(AddMachinePass &) const; /// Check if a pass is enabled given \p Opt option. The option always /// overrides defaults if explicitly used. Otherwise its default will be used diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-regalloc-flags.ll b/llvm/test/CodeGen/AMDGPU/sgpr-regalloc-flags.ll index 52ad7e5355207d..a54fee3a0f964c 100644 --- a/llvm/test/CodeGen/AMDGPU/sgpr-regalloc-flags.ll +++ b/llvm/test/CodeGen/AMDGPU/sgpr-regalloc-flags.ll @@ -12,8 +12,11 @@ ; RUN: not --crash llc -verify-machineinstrs=0 -regalloc=basic -mtriple=amdgcn-amd-amdhsa -debug-pass=Structure -o /dev/null %s 2>&1 | FileCheck -check-prefix=REGALLOC %s ; RUN: not --crash llc -verify-machineinstrs=0 -regalloc=fast -O0 -mtriple=amdgcn-amd-amdhsa -debug-pass=Structure -o /dev/null %s 2>&1 | FileCheck -check-prefix=REGALLOC %s +; RUN: not llc -enable-new-pm -verify-machineinstrs=0 -regalloc-npm=fast -O0 -mtriple=amdgcn-amd-amdhsa -o /dev/null %s 2>&1 | FileCheck -check-prefix=REGALLOC-NPM %s +; RUN: not llc -enable-new-pm -verify-machineinstrs=0 -regalloc-npm=basic -O3 -mtriple=amdgcn-amd-amdhsa -o /dev/null %s 2>&1 | FileCheck -check-prefix=REGALLOC-NPM %s ; REGALLOC: -regalloc not supported with amdgcn. Use -sgpr-regalloc, -wwm-regalloc, and -vgpr-regalloc +; REGALLOC-NPM: -regalloc-npm not supported with amdgcn. Use -sgpr-regalloc-npm, -wwm-regalloc-npm, and -vgpr-regalloc-npm ; DEFAULT: Greedy Register Allocator ; DEFAULT-NEXT: Virtual Register Rewriter diff --git a/llvm/tools/llc/NewPMDriver.cpp b/llvm/tools/llc/NewPMDriver.cpp index 06b7fa7ac351b0..de15b3717da699 100644 --- a/llvm/tools/llc/NewPMDriver.cpp +++ b/llvm/tools/llc/NewPMDriver.cpp @@ -56,7 +56,7 @@ using namespace llvm; // create option for RegAllocType enum static cl::opt<RegAllocType> RegAlloc( "regalloc-npm", cl::desc("Register allocator to use for new pass manager"), - cl::Hidden, cl::init(RegAllocType::Default), + cl::Hidden, cl::init(RegAllocType::Unset), cl::values( clEnumValN(RegAllocType::Default, "default", "Default register allocator"), >From 55236e506339104a688368e7f76a739e99014a0d Mon Sep 17 00:00:00 2001 From: Akshat Oke <akshat....@amd.com> Date: Thu, 19 Dec 2024 14:24:53 +0000 Subject: [PATCH 3/4] Remove the TODO as it's implemented --- llvm/include/llvm/Passes/CodeGenPassBuilder.h | 1 - llvm/tools/llc/NewPMDriver.cpp | 6 ------ 2 files changed, 7 deletions(-) diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h index b0083aa6e58afa..91340a4fe9c35c 100644 --- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h +++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h @@ -1072,7 +1072,6 @@ void CodeGenPassBuilder<Derived, TargetMachineT>::addTargetRegisterAllocator( template <typename Derived, typename TargetMachineT> void CodeGenPassBuilder<Derived, TargetMachineT>::addRegAllocPass( AddMachinePass &addPass, bool Optimized) const { - // TODO: Parse Opt.RegAlloc to add register allocator. // Use the specified -regalloc-npm={basic|greedy|fast|pbqp} if (Opt.RegAlloc > RegAllocType::Default) { switch (Opt.RegAlloc) { diff --git a/llvm/tools/llc/NewPMDriver.cpp b/llvm/tools/llc/NewPMDriver.cpp index de15b3717da699..0f7aa6284962a2 100644 --- a/llvm/tools/llc/NewPMDriver.cpp +++ b/llvm/tools/llc/NewPMDriver.cpp @@ -48,12 +48,6 @@ using namespace llvm; -// static cl::opt<std::string> -// RegAlloc("regalloc-npm", -// cl::desc("Register allocator to use for new pass manager"), -// cl::Hidden, cl::init("default")); - -// create option for RegAllocType enum static cl::opt<RegAllocType> RegAlloc( "regalloc-npm", cl::desc("Register allocator to use for new pass manager"), cl::Hidden, cl::init(RegAllocType::Unset), >From 8f6b898879430c3f1d0129f0d7d8c0040561933d Mon Sep 17 00:00:00 2001 From: Akshat Oke <akshat....@amd.com> Date: Thu, 19 Dec 2024 14:49:58 +0000 Subject: [PATCH 4/4] clang format files --- llvm/include/llvm/Passes/CodeGenPassBuilder.h | 16 ++++++++-------- llvm/lib/Passes/PassBuilder.cpp | 3 ++- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h index 91340a4fe9c35c..c10cc121883595 100644 --- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h +++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h @@ -1075,14 +1075,14 @@ void CodeGenPassBuilder<Derived, TargetMachineT>::addRegAllocPass( // Use the specified -regalloc-npm={basic|greedy|fast|pbqp} if (Opt.RegAlloc > RegAllocType::Default) { switch (Opt.RegAlloc) { - case RegAllocType::Fast: - addPass(RegAllocFastPass()); - break; - case RegAllocType::Greedy: - addPass(RAGreedyPass()); - break; - default: - report_fatal_error("Register allocator not supported yet.", false); + case RegAllocType::Fast: + addPass(RegAllocFastPass()); + break; + case RegAllocType::Greedy: + addPass(RAGreedyPass()); + break; + default: + report_fatal_error("Register allocator not supported yet.", false); } return; } diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 769d3b0a20f964..f0177cfb569f92 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -1316,7 +1316,8 @@ parseBoundsCheckingOptions(StringRef Params) { return Options; } -Expected<RAGreedyPass::Options> parseRegAllocGreedyFilterFunc(PassBuilder &PB, StringRef Params) { +Expected<RAGreedyPass::Options> +parseRegAllocGreedyFilterFunc(PassBuilder &PB, StringRef Params) { if (Params.empty() || Params == "all") { return RAGreedyPass::Options(); } _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits