This is an automated email from the ASF dual-hosted git repository.
michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
The following commit(s) were added to refs/heads/master by this push:
new f4c3a1e5a IMPALA-11459: Use new LLVM Pass Manager
f4c3a1e5a is described below
commit f4c3a1e5a3783706be034fddbd7264ca86eb40d7
Author: Michael Smith <[email protected]>
AuthorDate: Mon Jun 5 16:25:30 2023 -0700
IMPALA-11459: Use new LLVM Pass Manager
LLVM developed a new pass manager -
https://llvm.org/docs/NewPassManager.html - to overcome some of the
limitations of LegacyPassManager. It offers improved optimization
performance by reusing analysis across all types and levels of
optimization passes. It also appears to be better maintained in future
releases of LLVM.
Switches to using the new PassManager via PassBuilder and a
ModulePassManager. Breaks out PruneModule into a separate
FunctionPruneTime timer to more easily track any regressions there.
Change-Id: I947a5b067da50c18f62c3f9af9876463e542f58a
Reviewed-on: http://gerrit.cloudera.org:8080/20014
Tested-by: Impala Public Jenkins <[email protected]>
Reviewed-by: Michael Smith <[email protected]>
---
be/src/codegen/llvm-codegen.cc | 89 +++++++++++++++++-------------------------
be/src/codegen/llvm-codegen.h | 5 ++-
be/src/service/fe-support.cc | 3 +-
be/src/service/impalad-main.cc | 2 +-
cmake_modules/FindLlvm.cmake | 2 +-
5 files changed, 43 insertions(+), 58 deletions(-)
diff --git a/be/src/codegen/llvm-codegen.cc b/be/src/codegen/llvm-codegen.cc
index 5a0d608ff..8bc449602 100644
--- a/be/src/codegen/llvm-codegen.cc
+++ b/be/src/codegen/llvm-codegen.cc
@@ -26,10 +26,6 @@
#include <boost/assert/source_location.hpp>
#include <gutil/strings/substitute.h>
-#include <llvm/ADT/Triple.h>
-#include <llvm/Analysis/InstructionSimplify.h>
-#include <llvm/Analysis/Passes.h>
-#include <llvm/Analysis/TargetTransformInfo.h>
#include <llvm/Bitcode/BitcodeReader.h>
#include <llvm/Bitcode/BitcodeWriter.h>
#include <llvm/ExecutionEngine/ExecutionEngine.h>
@@ -41,18 +37,17 @@
#include <llvm/IR/Function.h>
#include <llvm/IR/GlobalVariable.h>
#include <llvm/IR/InstIterator.h>
-#include <llvm/IR/LegacyPassManager.h>
-#include <llvm/IR/NoFolder.h>
#include <llvm/IR/Verifier.h>
#include <llvm/Linker/Linker.h>
+#include <llvm/Passes/PassBuilder.h>
+#include <llvm/Support/CommandLine.h>
#include <llvm/Support/DynamicLibrary.h>
#include <llvm/Support/ErrorHandling.h>
#include <llvm/Support/Host.h>
-#include <llvm/Support/TargetRegistry.h>
#include <llvm/Support/TargetSelect.h>
#include <llvm/Support/raw_ostream.h>
-#include <llvm/Transforms/IPO.h>
-#include <llvm/Transforms/IPO/PassManagerBuilder.h>
+#include <llvm/Transforms/IPO/GlobalDCE.h>
+#include <llvm/Transforms/IPO/Internalize.h>
#include <llvm/Transforms/Scalar.h>
#include <llvm/Transforms/Utils/BasicBlockUtils.h>
#include <llvm/Transforms/Utils/Cloning.h>
@@ -155,8 +150,11 @@ const map<int64_t, std::string>
LlvmCodeGen::cpu_flag_mappings_{
LOG(FATAL) << "LLVM hit fatal error: " << reason.c_str();
}
-Status LlvmCodeGen::InitializeLlvm(bool load_backend) {
+Status LlvmCodeGen::InitializeLlvm(const char* procname, bool load_backend) {
DCHECK(!llvm_initialized_);
+ // Treat all functions as having the inline hint
+ std::array<const char*, 2> argv = { { procname, "-inline-threshold=325" } };
+ CHECK(llvm::cl::ParseCommandLineOptions(argv.size(), argv.data()));
llvm::remove_fatal_error_handler();
llvm::install_fatal_error_handler(LlvmCodegenHandleError);
// These functions can *only* be called once per process and are used to set
up
@@ -234,6 +232,7 @@ LlvmCodeGen::LlvmCodeGen(FragmentState* state, ObjectPool*
pool,
module_bitcode_gen_timer_ = ADD_TIMER(profile_, "ModuleBitcodeGenTime");
module_bitcode_size_ = ADD_COUNTER(profile_, "ModuleBitcodeSize",
TUnit::BYTES);
ir_generation_timer_ = ADD_TIMER(profile_, "IrGenerationTime");
+ function_prune_timer_ = ADD_TIMER(profile_, "FunctionPruneTime");
optimization_timer_ = ADD_TIMER(profile_, "OptimizationTime");
compile_timer_ = ADD_TIMER(profile_, "CompileTime");
main_thread_timer_ = ADD_TIMER(profile_, "MainThreadCodegenTime");
@@ -1239,10 +1238,7 @@ Status LlvmCodeGen::StoreCache(CodeGenCacheKey&
cache_key) {
}
void LlvmCodeGen::PruneModule() {
- SCOPED_TIMER(optimization_timer_);
- llvm::TargetIRAnalysis target_analysis =
- execution_engine_->getTargetMachine()->getTargetIRAnalysis();
-
+ SCOPED_TIMER(function_prune_timer_);
// Before running any other optimization passes, run the internalize pass,
giving it
// the names of all functions registered by AddFunctionToJit(), followed by
the
// global dead code elimination pass. This causes all functions not
registered to be
@@ -1252,15 +1248,18 @@ void LlvmCodeGen::PruneModule() {
for (auto& entry : fns_to_jit_compile_) {
exported_fn_names.insert(entry.first->getName().str());
}
- unique_ptr<llvm::legacy::PassManager> module_pass_manager(
- new llvm::legacy::PassManager());
-
module_pass_manager->add(llvm::createTargetTransformInfoWrapperPass(target_analysis));
- module_pass_manager->add(
- llvm::createInternalizePass([&exported_fn_names](const
llvm::GlobalValue& gv) {
+
+ llvm::ModuleAnalysisManager module_analysis_manager;
+ llvm::PassBuilder pass_builder(execution_engine_->getTargetMachine());
+ pass_builder.registerModuleAnalyses(module_analysis_manager);
+
+ llvm::ModulePassManager module_pass_manager;
+ module_pass_manager.addPass(
+ llvm::InternalizePass([&exported_fn_names](const llvm::GlobalValue& gv) {
return exported_fn_names.find(gv.getName().str()) !=
exported_fn_names.end();
}));
- module_pass_manager->add(llvm::createGlobalDCEPass());
- module_pass_manager->run(*module_);
+ module_pass_manager.addPass(llvm::GlobalDCEPass());
+ module_pass_manager.run(*module_, module_analysis_manager);
}
Status LlvmCodeGen::FinalizeModule() {
@@ -1404,28 +1403,25 @@ Status
LlvmCodeGen::FinalizeModuleAsync(RuntimeProfile::EventSequence* event_seq
Status LlvmCodeGen::OptimizeModule() {
SCOPED_TIMER(optimization_timer_);
+ llvm::LoopAnalysisManager LAM;
+ llvm::FunctionAnalysisManager FAM;
+ llvm::CGSCCAnalysisManager CGAM;
+ llvm::ModuleAnalysisManager MAM;
+
// This pass manager will construct optimizations passes that are "typical"
for
// c/c++ programs. We're relying on llvm to pick the best passes for us.
// TODO: we can likely muck with this to get better compile speeds or write
// our own passes. Our subexpression elimination optimization can be rolled
into
// a pass.
- llvm::PassManagerBuilder pass_builder;
- // 2 maps to -O2
- // TODO: should we switch to 3? (3 may not produce different IR than 2 while
taking
- // longer, but we should check)
- pass_builder.OptLevel = 2;
- // Don't optimize for code size (this corresponds to -O2/-O3)
- pass_builder.SizeLevel = 0;
- // Use a threshold equivalent to adding InlineHint on all functions.
- // This results in slightly better performance than the default threshold
(225).
- pass_builder.Inliner = llvm::createFunctionInliningPass(325);
-
- // The TargetIRAnalysis pass is required to provide information about the
target
- // machine to optimisation passes, e.g. the cost model.
- llvm::TargetIRAnalysis target_analysis =
- execution_engine_->getTargetMachine()->getTargetIRAnalysis();
- unique_ptr<llvm::legacy::PassManager> module_pass_manager(
- new llvm::legacy::PassManager());
+ llvm::PassBuilder pass_builder(execution_engine_->getTargetMachine());
+ pass_builder.registerModuleAnalyses(MAM);
+ pass_builder.registerCGSCCAnalyses(CGAM);
+ pass_builder.registerFunctionAnalyses(FAM);
+ pass_builder.registerLoopAnalyses(LAM);
+ pass_builder.crossRegisterProxies(LAM, FAM, CGAM, MAM);
+
+ llvm::ModulePassManager pass_manager =
pass_builder.buildPerModuleDefaultPipeline(
+ llvm::PassBuilder::OptimizationLevel::O2);
// Update counters before final optimization, but after removing unused
functions. This
// gives us a rough measure of how much work the optimization and
compilation must do.
@@ -1442,23 +1438,8 @@ Status LlvmCodeGen::OptimizeModule() {
return mem_tracker_->MemLimitExceeded(NULL, msg, estimated_memory);
}
- // Create and run function pass manager
- unique_ptr<llvm::legacy::FunctionPassManager> fn_pass_manager(
- new llvm::legacy::FunctionPassManager(module_));
-
fn_pass_manager->add(llvm::createTargetTransformInfoWrapperPass(target_analysis));
- pass_builder.populateFunctionPassManager(*fn_pass_manager);
- fn_pass_manager->doInitialization();
- for (llvm::Module::iterator it = module_->begin(), end = module_->end(); it
!= end;
- ++it) {
- if (!it->isDeclaration()) fn_pass_manager->run(*it);
- }
- fn_pass_manager->doFinalization();
-
// Create and run module pass manager
- module_pass_manager.reset(new llvm::legacy::PassManager());
-
module_pass_manager->add(llvm::createTargetTransformInfoWrapperPass(target_analysis));
- pass_builder.populateModulePassManager(*module_pass_manager);
- module_pass_manager->run(*module_);
+ pass_manager.run(*module_, MAM);
if (FLAGS_print_llvm_ir_instruction_count) {
for (int i = 0; i < fns_to_jit_compile_.size(); ++i) {
InstructionCounter counter;
diff --git a/be/src/codegen/llvm-codegen.h b/be/src/codegen/llvm-codegen.h
index af93e3f1d..9175ddc05 100644
--- a/be/src/codegen/llvm-codegen.h
+++ b/be/src/codegen/llvm-codegen.h
@@ -167,7 +167,7 @@ class LlvmCodeGen {
/// the current object and not be able to find the backend symbols
/// TODO: this can probably be removed after impalad refactor where the java
/// side is not loading the be explicitly anymore.
- static Status InitializeLlvm(bool load_backend = false);
+ static Status InitializeLlvm(const char* procname = "main", bool
load_backend = false);
/// Creates a codegen instance for Impala initialized with the
cross-compiled Impala IR.
/// 'codegen' will contain the created object on success.
@@ -843,6 +843,9 @@ class LlvmCodeGen {
/// FragmentInstanceState during its 'CODEGEN_START' state.
RuntimeProfile::Counter* ir_generation_timer_;
+ /// Time spent pruning unused functions.
+ RuntimeProfile::Counter* function_prune_timer_;
+
/// Time spent optimizing the module.
RuntimeProfile::Counter* optimization_timer_;
diff --git a/be/src/service/fe-support.cc b/be/src/service/fe-support.cc
index c0eb3f083..4cb359455 100644
--- a/be/src/service/fe-support.cc
+++ b/be/src/service/fe-support.cc
@@ -78,7 +78,8 @@ Java_org_apache_impala_service_FeSupport_NativeFeInit(
// exceptions to the FE.
InitCommonRuntime(1, &name, true,
external_fe ? TestInfo::NON_TEST : TestInfo::FE_TEST, external_fe);
- THROW_IF_ERROR(LlvmCodeGen::InitializeLlvm(true), env,
JniUtil::internal_exc_class());
+ THROW_IF_ERROR(
+ LlvmCodeGen::InitializeLlvm(name, true), env,
JniUtil::internal_exc_class());
ExecEnv* exec_env = new ExecEnv(external_fe); // This also caches it from
the process.
THROW_IF_ERROR(exec_env->InitForFeSupport(), env,
JniUtil::internal_exc_class());
}
diff --git a/be/src/service/impalad-main.cc b/be/src/service/impalad-main.cc
index df51e8570..0390714de 100644
--- a/be/src/service/impalad-main.cc
+++ b/be/src/service/impalad-main.cc
@@ -59,7 +59,7 @@ DECLARE_bool(is_coordinator);
int ImpaladMain(int argc, char** argv) {
InitCommonRuntime(argc, argv, true);
- ABORT_IF_ERROR(LlvmCodeGen::InitializeLlvm());
+ ABORT_IF_ERROR(LlvmCodeGen::InitializeLlvm(argv[0]));
JniUtil::InitLibhdfs();
ABORT_IF_ERROR(HBaseTableScanner::Init());
ABORT_IF_ERROR(HBaseTable::InitJNI());
diff --git a/cmake_modules/FindLlvm.cmake b/cmake_modules/FindLlvm.cmake
index 6db74442d..e1c833b0e 100644
--- a/cmake_modules/FindLlvm.cmake
+++ b/cmake_modules/FindLlvm.cmake
@@ -85,7 +85,7 @@ execute_process(
# Get the link libs we need. llvm has many and we don't want to link all of
the libs
# if we don't need them.
execute_process(
- COMMAND ${LLVM_CONFIG_EXECUTABLE} --libnames core mcjit native ipo bitreader
target linker analysis debuginfodwarf
+ COMMAND ${LLVM_CONFIG_EXECUTABLE} --libnames core mcjit native ipo bitreader
target linker analysis debuginfodwarf passes
OUTPUT_VARIABLE LLVM_MODULE_LIBS
OUTPUT_STRIP_TRAILING_WHITESPACE
)