https://github.com/shiltian updated https://github.com/llvm/llvm-project/pull/114547
>From c271a3252ad3bbcf7c251c68244be2be55a9d387 Mon Sep 17 00:00:00 2001 From: Shilei Tian <i...@tianshilei.me> Date: Fri, 1 Nov 2024 12:22:06 -0400 Subject: [PATCH 1/2] [PassBuilder] Replace `bool LTOPreLink` with `ThinOrFullLTOPhase Phase` This will allow more fine-grained control in the future. --- llvm/include/llvm/Passes/PassBuilder.h | 10 ++++++---- llvm/lib/Passes/PassBuilderPipelines.cpp | 24 +++++++++++++----------- 2 files changed, 19 insertions(+), 15 deletions(-) diff --git a/llvm/include/llvm/Passes/PassBuilder.h b/llvm/include/llvm/Passes/PassBuilder.h index e6ced0cccb9b3c..0ebfdbb7865fdd 100644 --- a/llvm/include/llvm/Passes/PassBuilder.h +++ b/llvm/include/llvm/Passes/PassBuilder.h @@ -245,8 +245,9 @@ class PassBuilder { /// optimization and code generation without any link-time optimization. It /// typically correspond to frontend "-O[123]" options for optimization /// levels \c O1, \c O2 and \c O3 resp. - ModulePassManager buildPerModuleDefaultPipeline(OptimizationLevel Level, - bool LTOPreLink = false); + ModulePassManager buildPerModuleDefaultPipeline( + OptimizationLevel Level, + ThinOrFullLTOPhase Phase = ThinOrFullLTOPhase::None); /// Build a fat object default optimization pipeline. /// @@ -296,8 +297,9 @@ class PassBuilder { /// Build an O0 pipeline with the minimal semantically required passes. /// /// This should only be used for non-LTO and LTO pre-link pipelines. - ModulePassManager buildO0DefaultPipeline(OptimizationLevel Level, - bool LTOPreLink = false); + ModulePassManager + buildO0DefaultPipeline(OptimizationLevel Level, + ThinOrFullLTOPhase Phase = ThinOrFullLTOPhase::None); /// Build the default `AAManager` with the default alias analysis pipeline /// registered. diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index 3f28dd39911f79..7c512ab15a6d38 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -1601,9 +1601,9 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, ModulePassManager PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level, - bool LTOPreLink) { + ThinOrFullLTOPhase Phase) { if (Level == OptimizationLevel::O0) - return buildO0DefaultPipeline(Level, LTOPreLink); + return buildO0DefaultPipeline(Level, Phase); ModulePassManager MPM; @@ -1619,14 +1619,11 @@ PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level, // Apply module pipeline start EP callback. invokePipelineStartEPCallbacks(MPM, Level); - const ThinOrFullLTOPhase LTOPhase = LTOPreLink - ? ThinOrFullLTOPhase::FullLTOPreLink - : ThinOrFullLTOPhase::None; // Add the core simplification pipeline. - MPM.addPass(buildModuleSimplificationPipeline(Level, LTOPhase)); + MPM.addPass(buildModuleSimplificationPipeline(Level, Phase)); // Now add the optimization pipeline. - MPM.addPass(buildModuleOptimizationPipeline(Level, LTOPhase)); + MPM.addPass(buildModuleOptimizationPipeline(Level, Phase)); if (PGOOpt && PGOOpt->PseudoProbeForProfiling && PGOOpt->Action == PGOOptions::SampleUse) @@ -1635,6 +1632,8 @@ PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level, // Emit annotation remarks. addAnnotationRemarksPass(MPM); + bool LTOPreLink = Phase == ThinOrFullLTOPhase::FullLTOPreLink || + Phase == ThinOrFullLTOPhase::ThinLTOPreLink; if (LTOPreLink) addRequiredLTOPreLinkPasses(MPM); return MPM; @@ -1673,7 +1672,7 @@ PassBuilder::buildFatLTODefaultPipeline(OptimizationLevel Level, bool ThinLTO, ModulePassManager PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level) { if (Level == OptimizationLevel::O0) - return buildO0DefaultPipeline(Level, /*LTOPreLink*/true); + return buildO0DefaultPipeline(Level, ThinOrFullLTOPhase::ThinLTOPreLink); ModulePassManager MPM; @@ -1794,7 +1793,7 @@ ModulePassManager PassBuilder::buildLTOPreLinkDefaultPipeline(OptimizationLevel Level) { // FIXME: We should use a customized pre-link pipeline! return buildPerModuleDefaultPipeline(Level, - /* LTOPreLink */ true); + ThinOrFullLTOPhase::FullLTOPreLink); } ModulePassManager @@ -2124,8 +2123,9 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, return MPM; } -ModulePassManager PassBuilder::buildO0DefaultPipeline(OptimizationLevel Level, - bool LTOPreLink) { +ModulePassManager +PassBuilder::buildO0DefaultPipeline(OptimizationLevel Level, + ThinOrFullLTOPhase Phase) { assert(Level == OptimizationLevel::O0 && "buildO0DefaultPipeline should only be used with O0"); @@ -2220,6 +2220,8 @@ ModulePassManager PassBuilder::buildO0DefaultPipeline(OptimizationLevel Level, invokeOptimizerLastEPCallbacks(MPM, Level); + bool LTOPreLink = Phase == ThinOrFullLTOPhase::FullLTOPreLink || + Phase == ThinOrFullLTOPhase::ThinLTOPreLink; if (LTOPreLink) addRequiredLTOPreLinkPasses(MPM); >From c9be622e4f6f2c59bb3371ddd42fe6f4ff844c77 Mon Sep 17 00:00:00 2001 From: Shilei Tian <i...@tianshilei.me> Date: Fri, 1 Nov 2024 10:51:20 -0400 Subject: [PATCH 2/2] [PassBuilder] Add `LTOPreLink` to early simplication EP call backs The early simplication pipeline is used in non-LTO and (Thin/Full)LTO pre-link stage. There are some passes that we want them in non-LTO mode, but not at LTO pre-link stage. The control is missing currently. This PR adds the support. To demonstrate the use, we only enable the internalization pass in non-LTO mode for AMDGPU because having it run in pre-link stage causes some issues. --- clang/lib/CodeGen/BackendUtil.cpp | 3 ++- llvm/include/llvm/Passes/PassBuilder.h | 12 ++++++++---- llvm/lib/Passes/PassBuilderPipelines.cpp | 8 ++++---- .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 19 +++++++++++++++---- llvm/lib/Target/BPF/BPFTargetMachine.cpp | 2 +- .../CodeGen/AMDGPU/print-pipeline-passes.ll | 8 ++++++++ llvm/tools/opt/NewPMDriver.cpp | 2 +- 7 files changed, 39 insertions(+), 15 deletions(-) diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index ae33554a66b6b5..47a30f00612eb7 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -993,7 +993,8 @@ void EmitAssemblyHelper::RunOptimizationPipeline( createModuleToFunctionPassAdaptor(ObjCARCExpandPass())); }); PB.registerPipelineEarlySimplificationEPCallback( - [](ModulePassManager &MPM, OptimizationLevel Level) { + [](ModulePassManager &MPM, OptimizationLevel Level, + ThinOrFullLTOPhase) { if (Level != OptimizationLevel::O0) MPM.addPass(ObjCARCAPElimPass()); }); diff --git a/llvm/include/llvm/Passes/PassBuilder.h b/llvm/include/llvm/Passes/PassBuilder.h index 0ebfdbb7865fdd..268df03615db23 100644 --- a/llvm/include/llvm/Passes/PassBuilder.h +++ b/llvm/include/llvm/Passes/PassBuilder.h @@ -480,7 +480,8 @@ class PassBuilder { /// This extension point allows adding optimization right after passes that do /// basic simplification of the input IR. void registerPipelineEarlySimplificationEPCallback( - const std::function<void(ModulePassManager &, OptimizationLevel)> &C) { + const std::function<void(ModulePassManager &, OptimizationLevel, + ThinOrFullLTOPhase)> &C) { PipelineEarlySimplificationEPCallbacks.push_back(C); } @@ -638,8 +639,9 @@ class PassBuilder { OptimizationLevel Level); void invokePipelineStartEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level); - void invokePipelineEarlySimplificationEPCallbacks(ModulePassManager &MPM, - OptimizationLevel Level); + void invokePipelineEarlySimplificationEPCallbacks( + ModulePassManager &MPM, OptimizationLevel Level, + ThinOrFullLTOPhase Phase = ThinOrFullLTOPhase::None); static bool checkParametrizedPassName(StringRef Name, StringRef PassName) { if (!Name.consume_front(PassName)) @@ -764,7 +766,9 @@ class PassBuilder { FullLinkTimeOptimizationLastEPCallbacks; SmallVector<std::function<void(ModulePassManager &, OptimizationLevel)>, 2> PipelineStartEPCallbacks; - SmallVector<std::function<void(ModulePassManager &, OptimizationLevel)>, 2> + SmallVector<std::function<void(ModulePassManager &, OptimizationLevel, + ThinOrFullLTOPhase)>, + 2> PipelineEarlySimplificationEPCallbacks; SmallVector<std::function<void(ModuleAnalysisManager &)>, 2> diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index 7c512ab15a6d38..bfb9678678f18a 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -384,9 +384,9 @@ void PassBuilder::invokePipelineStartEPCallbacks(ModulePassManager &MPM, C(MPM, Level); } void PassBuilder::invokePipelineEarlySimplificationEPCallbacks( - ModulePassManager &MPM, OptimizationLevel Level) { + ModulePassManager &MPM, OptimizationLevel Level, ThinOrFullLTOPhase Phase) { for (auto &C : PipelineEarlySimplificationEPCallbacks) - C(MPM, Level); + C(MPM, Level, Phase); } // Helper to add AnnotationRemarksPass. @@ -1140,7 +1140,7 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, lowertypetests::DropTestKind::Assume)); - invokePipelineEarlySimplificationEPCallbacks(MPM, Level); + invokePipelineEarlySimplificationEPCallbacks(MPM, Level, Phase); // Interprocedural constant propagation now that basic cleanup has occurred // and prior to optimizing globals. @@ -2155,7 +2155,7 @@ PassBuilder::buildO0DefaultPipeline(OptimizationLevel Level, if (PGOOpt && PGOOpt->DebugInfoForProfiling) MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass())); - invokePipelineEarlySimplificationEPCallbacks(MPM, Level); + invokePipelineEarlySimplificationEPCallbacks(MPM, Level, Phase); // Build a minimal pipeline based on the semantics required by LLVM, // which is just that always inlining occurs. Further, disable generating diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index ad03017aae1c17..5492d0e589973a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -745,7 +745,8 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) { }); PB.registerPipelineEarlySimplificationEPCallback( - [](ModulePassManager &PM, OptimizationLevel Level) { + [](ModulePassManager &PM, OptimizationLevel Level, + ThinOrFullLTOPhase Phase) { PM.addPass(AMDGPUPrintfRuntimeBindingPass()); if (Level == OptimizationLevel::O0) @@ -753,7 +754,10 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) { PM.addPass(AMDGPUUnifyMetadataPass()); - if (InternalizeSymbols) { + // We don't want to run internalization at per-module stage. + bool LTOPreLink = Phase == ThinOrFullLTOPhase::FullLTOPreLink || + Phase == ThinOrFullLTOPhase::ThinLTOPreLink; + if (InternalizeSymbols && !LTOPreLink) { PM.addPass(InternalizePass(mustPreserveGV)); PM.addPass(GlobalDCEPass()); } @@ -821,8 +825,15 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) { PM.addPass(AMDGPUSwLowerLDSPass(*this)); if (EnableLowerModuleLDS) PM.addPass(AMDGPULowerModuleLDSPass(*this)); - if (EnableAMDGPUAttributor && Level != OptimizationLevel::O0) - PM.addPass(AMDGPUAttributorPass(*this)); + if (Level != OptimizationLevel::O0) { + if (EnableAMDGPUAttributor) + PM.addPass(AMDGPUAttributorPass(*this)); + // Do we really need internalization in LTO? + if (InternalizeSymbols) { + PM.addPass(InternalizePass(mustPreserveGV)); + PM.addPass(GlobalDCEPass()); + } + } }); PB.registerRegClassFilterParsingCallback( diff --git a/llvm/lib/Target/BPF/BPFTargetMachine.cpp b/llvm/lib/Target/BPF/BPFTargetMachine.cpp index 7d91fa8bb824cf..578b2d607036b5 100644 --- a/llvm/lib/Target/BPF/BPFTargetMachine.cpp +++ b/llvm/lib/Target/BPF/BPFTargetMachine.cpp @@ -138,7 +138,7 @@ void BPFTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) { FPM.addPass(BPFPreserveStaticOffsetPass(false)); }); PB.registerPipelineEarlySimplificationEPCallback( - [=](ModulePassManager &MPM, OptimizationLevel) { + [=](ModulePassManager &MPM, OptimizationLevel, ThinOrFullLTOPhase) { MPM.addPass(BPFAdjustOptPass()); }); } diff --git a/llvm/test/CodeGen/AMDGPU/print-pipeline-passes.ll b/llvm/test/CodeGen/AMDGPU/print-pipeline-passes.ll index b9eda0c1cd3bb6..792e83095efaa7 100644 --- a/llvm/test/CodeGen/AMDGPU/print-pipeline-passes.ll +++ b/llvm/test/CodeGen/AMDGPU/print-pipeline-passes.ll @@ -3,9 +3,17 @@ ; RUN: opt -mtriple=amdgcn--amdhsa -S -passes="lto<O2>" -print-pipeline-passes %s -o - | FileCheck %s ; RUN: opt -mtriple=amdgcn--amdhsa -S -passes="lto<O3>" -print-pipeline-passes %s -o - | FileCheck %s +; RUN: opt -mtriple=amdgcn--amdhsa -S -passes="lto-pre-link<O0>" -print-pipeline-passes %s -o - | FileCheck --check-prefix=PRE %s +; RUN: opt -mtriple=amdgcn--amdhsa -S -passes="lto-pre-link<O1>" -print-pipeline-passes %s -o - | FileCheck --check-prefix=PRE %s +; RUN: opt -mtriple=amdgcn--amdhsa -S -passes="lto-pre-link<O2>" -print-pipeline-passes %s -o - | FileCheck --check-prefix=PRE %s +; RUN: opt -mtriple=amdgcn--amdhsa -S -passes="lto-pre-link<O3>" -print-pipeline-passes %s -o - | FileCheck --check-prefix=PRE %s + + ; CHECK: amdgpu-attributor ; O0-NOT: amdgpu-attributor +; PRE-NOT: internalize + define amdgpu_kernel void @kernel() { entry: ret void diff --git a/llvm/tools/opt/NewPMDriver.cpp b/llvm/tools/opt/NewPMDriver.cpp index 9a477193a29365..3f1092433d9f31 100644 --- a/llvm/tools/opt/NewPMDriver.cpp +++ b/llvm/tools/opt/NewPMDriver.cpp @@ -294,7 +294,7 @@ static void registerEPCallbacks(PassBuilder &PB) { if (tryParsePipelineText<ModulePassManager>( PB, PipelineEarlySimplificationEPPipeline)) PB.registerPipelineEarlySimplificationEPCallback( - [&PB](ModulePassManager &PM, OptimizationLevel) { + [&PB](ModulePassManager &PM, OptimizationLevel, ThinOrFullLTOPhase) { ExitOnError Err("Unable to parse EarlySimplification pipeline: "); Err(PB.parsePassPipeline(PM, PipelineEarlySimplificationEPPipeline)); }); _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits