--- src/gallium/drivers/radeon/radeon_llvm_util.c | 35 -------------- .../state_trackers/clover/core/compiler.hpp | 3 +- src/gallium/state_trackers/clover/core/device.cpp | 6 +++ src/gallium/state_trackers/clover/core/device.hpp | 1 + src/gallium/state_trackers/clover/core/program.cpp | 3 +- .../state_trackers/clover/llvm/invocation.cpp | 55 +++++++++++++++++----- 6 files changed, 55 insertions(+), 48 deletions(-)
diff --git a/src/gallium/drivers/radeon/radeon_llvm_util.c b/src/gallium/drivers/radeon/radeon_llvm_util.c index 2ace91f..fe7f9a6 100644 --- a/src/gallium/drivers/radeon/radeon_llvm_util.c +++ b/src/gallium/drivers/radeon/radeon_llvm_util.c @@ -53,40 +53,6 @@ unsigned radeon_llvm_get_num_kernels(LLVMContextRef ctx, return LLVMGetNamedMetadataNumOperands(mod, "opencl.kernels"); } -static void radeon_llvm_optimize(LLVMModuleRef mod) -{ - const char *data_layout = LLVMGetDataLayout(mod); - LLVMTargetDataRef TD = LLVMCreateTargetData(data_layout); - LLVMPassManagerBuilderRef builder = LLVMPassManagerBuilderCreate(); - LLVMPassManagerRef pass_manager = LLVMCreatePassManager(); - - /* Functions calls are not supported yet, so we need to inline - * everything. The most efficient way to do this is to add - * the always_inline attribute to all non-kernel functions - * and then run the Always Inline pass. The Always Inline - * pass will automaically inline functions with this attribute - * and does not perform the expensive cost analysis that the normal - * inliner does. - */ - - LLVMValueRef fn; - for (fn = LLVMGetFirstFunction(mod); fn; fn = LLVMGetNextFunction(fn)) { - /* All the non-kernel functions have internal linkage */ - if (LLVMGetLinkage(fn) == LLVMInternalLinkage) { - LLVMAddFunctionAttr(fn, LLVMAlwaysInlineAttribute); - } - } - - LLVMAddTargetData(TD, pass_manager); - LLVMAddAlwaysInlinerPass(pass_manager); - LLVMPassManagerBuilderPopulateModulePassManager(builder, pass_manager); - - LLVMRunPassManager(pass_manager, mod); - LLVMPassManagerBuilderDispose(builder); - LLVMDisposePassManager(pass_manager); - LLVMDisposeTargetData(TD); -} - LLVMModuleRef radeon_llvm_get_kernel_module(LLVMContextRef ctx, unsigned index, const unsigned char *bitcode, unsigned bitcode_len) { @@ -109,6 +75,5 @@ LLVMModuleRef radeon_llvm_get_kernel_module(LLVMContextRef ctx, unsigned index, LLVMDeleteFunction(kernel_function); } FREE(kernel_metadata); - radeon_llvm_optimize(mod); return mod; } diff --git a/src/gallium/state_trackers/clover/core/compiler.hpp b/src/gallium/state_trackers/clover/core/compiler.hpp index 49cd022..5035a6b 100644 --- a/src/gallium/state_trackers/clover/core/compiler.hpp +++ b/src/gallium/state_trackers/clover/core/compiler.hpp @@ -32,7 +32,8 @@ namespace clover { module compile_program_llvm(const compat::string &source, pipe_shader_ir ir, const compat::string &target, - const compat::string &opts); + const compat::string &opts, + bool subroutines_supported); module compile_program_tgsi(const compat::string &source); } diff --git a/src/gallium/state_trackers/clover/core/device.cpp b/src/gallium/state_trackers/clover/core/device.cpp index 2c5f9b7..6820f56 100644 --- a/src/gallium/state_trackers/clover/core/device.cpp +++ b/src/gallium/state_trackers/clover/core/device.cpp @@ -187,3 +187,9 @@ enum pipe_endian device::endianness() const { return (enum pipe_endian)pipe->get_param(pipe, PIPE_CAP_ENDIANNESS); } + +bool +device::subroutines_supported() const { + return pipe->get_shader_param(pipe, PIPE_SHADER_COMPUTE, + PIPE_SHADER_CAP_SUBROUTINES); +} diff --git a/src/gallium/state_trackers/clover/core/device.hpp b/src/gallium/state_trackers/clover/core/device.hpp index 433ac81..b187a93 100644 --- a/src/gallium/state_trackers/clover/core/device.hpp +++ b/src/gallium/state_trackers/clover/core/device.hpp @@ -68,6 +68,7 @@ namespace clover { enum pipe_shader_ir ir_format() const; std::string ir_target() const; enum pipe_endian endianness() const; + bool subroutines_supported() const; friend class command_queue; friend class root_resource; diff --git a/src/gallium/state_trackers/clover/core/program.cpp b/src/gallium/state_trackers/clover/core/program.cpp index 3aaa652..b547023 100644 --- a/src/gallium/state_trackers/clover/core/program.cpp +++ b/src/gallium/state_trackers/clover/core/program.cpp @@ -56,7 +56,8 @@ program::build(const ref_vector<device> &devs, const char *opts) { auto module = (dev.ir_format() == PIPE_SHADER_IR_TGSI ? compile_program_tgsi(_source) : compile_program_llvm(_source, dev.ir_format(), - dev.ir_target(), build_opts(dev))); + dev.ir_target(), build_opts(dev), + dev.subroutines_supported())); _binaries.insert({ &dev, module }); } catch (build_error &e) { diff --git a/src/gallium/state_trackers/clover/llvm/invocation.cpp b/src/gallium/state_trackers/clover/llvm/invocation.cpp index cdf32b6..c46e85e 100644 --- a/src/gallium/state_trackers/clover/llvm/invocation.cpp +++ b/src/gallium/state_trackers/clover/llvm/invocation.cpp @@ -30,6 +30,7 @@ #include <llvm/Bitcode/BitstreamWriter.h> #include <llvm/Bitcode/ReaderWriter.h> #include <llvm/Linker.h> +#include <llvm/Target/TargetLibraryInfo.h> #if HAVE_LLVM < 0x0303 #include <llvm/DerivedTypes.h> #include <llvm/LLVMContext.h> @@ -42,6 +43,7 @@ #include <llvm/IRReader/IRReader.h> #endif #include <llvm/PassManager.h> +#include <llvm/Support/CodeGen.h> #include <llvm/Support/TargetSelect.h> #include <llvm/Support/MemoryBuffer.h> #if HAVE_LLVM < 0x0303 @@ -212,6 +214,9 @@ namespace { // that is no executed by all threads) during its optimizaton passes. c.getCodeGenOpts().LinkBitcodeFile = libclc_path; + // Compile at -O0. We will do optimizations later. + c.getCodeGenOpts().OptimizationLevel = llvm::CodeGenOpt::None; + // Compile the code if (!c.ExecuteAction(act)) throw build_error(log); @@ -241,10 +246,39 @@ namespace { } void - internalize_functions(llvm::Module *mod, - const std::vector<llvm::Function *> &kernels) { + optimize(llvm::Module *mod, const std::vector<llvm::Function *> &kernels, + bool subroutines_supported) { + llvm::PassManagerBuilder builder; llvm::PassManager PM; + std::vector<const char*> export_list; + +#if HAVE_LLVM < 0x0305 + PM.add(new llvm::DataLayout(mod)); +#else + PM.add(new llvm::DataLayoutPass(mod)); +#endif + // For targets that don't support subroutines, we need to inline + // everything. The most efficient way to do this is to add + // the always_inline attribute to all non-kernel functions + // and then run the Always Inline pass. The Always Inline + // pass will automaically inline functions with this attribute + // and does not perform the expensive cost analysis that the normal + // inliner does. + for (llvm::Module::iterator f = mod->begin(), e = mod->end(); + f != e; ++f) { + if (std::find(kernels.begin(), kernels.end(), f) != kernels.end()) { + export_list.push_back(f->getName().data()); + } else if (!subroutines_supported) { + f->addFnAttr(llvm::Attribute::AlwaysInline); + } + } + + if (!subroutines_supported) + builder.Inliner = llvm::createAlwaysInlinerPass(); + else + builder.Inliner = llvm::createFunctionInliningPass(); + // Add a function internalizer pass. // // By default, the function internalizer pass will look for a function @@ -259,14 +293,12 @@ namespace { // list of kernel functions to the internalizer. The internalizer will // treat the functions in the list as "main" functions and internalize // all of the other functions. - std::vector<const char*> export_list; - for (std::vector<llvm::Function *>::const_iterator I = kernels.begin(), - E = kernels.end(); - I != E; ++I) { - llvm::Function *kernel = *I; - export_list.push_back(kernel->getName().data()); - } PM.add(llvm::createInternalizePass(export_list)); + + builder.LibraryInfo = + new llvm::TargetLibraryInfo(llvm::Triple(mod->getTargetTriple())); + + builder.populateModulePassManager(PM); PM.run(*mod); } @@ -372,7 +404,8 @@ module clover::compile_program_llvm(const compat::string &source, enum pipe_shader_ir ir, const compat::string &target, - const compat::string &opts) { + const compat::string &opts, + bool subroutines_supported) { std::vector<llvm::Function *> kernels; size_t processor_str_len = std::string(target.begin()).find_first_of("-"); @@ -388,7 +421,7 @@ clover::compile_program_llvm(const compat::string &source, find_kernels(mod, kernels); - internalize_functions(mod, kernels); + optimize(mod, kernels, subroutines_supported); // Build the clover::module switch (ir) { -- 1.8.1.5 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev