On Wed, Jun 27, 2018 at 5:58 AM, Dave Airlie <airl...@gmail.com> wrote: > From: Dave Airlie <airl...@redhat.com> > > I'd like to encourage people to test this to see if it helps (like > does it make app startup better or less hitching in dxvk). > > The basic idea is to store a bunch of LLVM related data structs > in thread local storage so we can avoid reiniting them every time > we compile a shader. Since we know llvm objects aren't thread safe > it has to be stored using TLS to avoid any collisions. > > This should remove all the fixed overheads setup costs of creating > the pass manager each time. > > This takes a demo app time to compile the radv meta shaders on nocache > and exit from 1.7s to 1s. > > TODO: this doesn't work for radeonsi yet, but I'm not sure how TLS > works if you have radeonsi and radv loaded at the same time, if > they'll magically try and use the same tls stuff, in which case > this might explode all over the place. > > v2: fix llvm6 build, inline emit function, handle multiple targets > in one thread > v3: rebase and port onto new structure > --- > src/amd/common/ac_llvm_helper.cpp | 120 ++++++++++++++++++++++++++++-- > src/amd/common/ac_llvm_util.c | 10 +-- > src/amd/common/ac_llvm_util.h | 9 +++ > src/amd/vulkan/radv_debug.h | 1 + > src/amd/vulkan/radv_device.c | 1 + > src/amd/vulkan/radv_shader.c | 2 + > 6 files changed, 132 insertions(+), 11 deletions(-) > > diff --git a/src/amd/common/ac_llvm_helper.cpp > b/src/amd/common/ac_llvm_helper.cpp > index 27403dbe085..f1f1399b3fb 100644 > --- a/src/amd/common/ac_llvm_helper.cpp > +++ b/src/amd/common/ac_llvm_helper.cpp > @@ -31,12 +31,21 @@ > > #include "ac_llvm_util.h" > #include <llvm-c/Core.h> > -#include <llvm/Target/TargetOptions.h> > -#include <llvm/ExecutionEngine/ExecutionEngine.h> > -#include <llvm/IR/Attributes.h> > -#include <llvm/IR/CallSite.h> > +#include <llvm/Target/TargetMachine.h> > #include <llvm/IR/IRBuilder.h> > #include <llvm/Analysis/TargetLibraryInfo.h> > +#include <llvm/IR/LegacyPassManager.h> > + > +#include <llvm-c/Transforms/IPO.h> > +#include <llvm-c/Transforms/Scalar.h> > +#if HAVE_LLVM >= 0x0700 > +#include <llvm-c/Transforms/Utils.h> > +#endif > + > +#if HAVE_LLVM < 0x0700 > +#include "llvm/Support/raw_ostream.h" > +#endif > +#include <list> > > void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes) > { > @@ -101,11 +110,110 @@ > ac_dispose_target_library_info(LLVMTargetLibraryInfoRef library_info) > delete reinterpret_cast<llvm::TargetLibraryInfoImpl *>(library_info); > } > > +class ac_llvm_per_thread_info { > +public: > + ac_llvm_per_thread_info(enum radeon_family arg_family, > + enum ac_target_machine_options arg_tm_options) > + : family(arg_family), tm_options(arg_tm_options), > + OStream(CodeString) {} > + ~ac_llvm_per_thread_info() { > + ac_llvm_compiler_dispose_internal(&llvm_info); > + } > + > + struct ac_llvm_compiler_info llvm_info; > + enum radeon_family family; > + enum ac_target_machine_options tm_options; > + llvm::SmallString<0> CodeString; > + llvm::raw_svector_ostream OStream;
Can we keep the naming style for variables consistent? Otherwise this series is Reviewed-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Thanks! > + llvm::legacy::PassManager pass; > +}; > + > +/* we have to store a linked list per thread due to the possiblity of > multiple gpus being required */ > +static thread_local std::list<ac_llvm_per_thread_info> > ac_llvm_per_thread_list; > + > bool ac_compile_to_memory_buffer(struct ac_llvm_compiler_info *info, > LLVMModuleRef M, > char **ErrorMessage, > LLVMMemoryBufferRef *OutMemBuf) > { > - return LLVMTargetMachineEmitToMemoryBuffer(info->tm, M, > LLVMObjectFile, > - ErrorMessage, OutMemBuf); > + ac_llvm_per_thread_info *thread_info = nullptr; > + if (info->thread_stored) { > + for (auto &I : ac_llvm_per_thread_list) { > + if (I.llvm_info.tm == info->tm) { > + thread_info = &I; > + break; > + } > + } > + > + if (!thread_info) { > + assert(0); > + return false; > + } > + } else { > + return LLVMTargetMachineEmitToMemoryBuffer(info->tm, M, > LLVMObjectFile, > + ErrorMessage, > OutMemBuf); > + } > + > + llvm::TargetMachine *TM = > reinterpret_cast<llvm::TargetMachine*>(thread_info->llvm_info.tm); > + llvm::Module *Mod = llvm::unwrap(M); > + llvm::StringRef Data; > + > + Mod->setDataLayout(TM->createDataLayout()); > + > + thread_info->pass.run(*Mod); > + > + Data = thread_info->OStream.str(); > + *OutMemBuf = LLVMCreateMemoryBufferWithMemoryRangeCopy(Data.data(), > Data.size(), ""); > + thread_info->CodeString = ""; > + return false; > +} > + > +bool ac_llvm_compiler_init(struct ac_llvm_compiler_info *info, > + bool add_target_library_info, > + enum radeon_family family, > + enum ac_target_machine_options tm_options) > +{ > + if (tm_options & AC_TM_THREAD_LLVM) { > + for (auto &I : ac_llvm_per_thread_list) { > + if (I.family == family && > + I.tm_options == tm_options) { > + *info = I.llvm_info; > + return true; > + } > + } > + > + ac_llvm_per_thread_list.emplace_back(family, tm_options); > + ac_llvm_per_thread_info &tinfo = > ac_llvm_per_thread_list.back(); > + if (!ac_llvm_compiler_init_internal(&tinfo.llvm_info, > + true, > + family, > + tm_options)) > + return false; > + > + tinfo.llvm_info.thread_stored = true; > + *info = tinfo.llvm_info; > + > + llvm::TargetMachine *TM = > reinterpret_cast<llvm::TargetMachine*>(tinfo.llvm_info.tm); > + if (TM->addPassesToEmitFile(tinfo.pass, tinfo.OStream, > +#if HAVE_LLVM >= 0x0700 > + nullptr, > +#endif > + > llvm::TargetMachine::CGFT_ObjectFile)) { > + assert(0); > + return false; > + } > + } else { > + if (!ac_llvm_compiler_init_internal(info, > + add_target_library_info, > + family, > + tm_options)) > + return false; > + } > + return true; > +} > + > +void ac_llvm_compiler_dispose(struct ac_llvm_compiler_info *info) > +{ > + if (!info->thread_stored) > + ac_llvm_compiler_dispose_internal(info); > } > diff --git a/src/amd/common/ac_llvm_util.c b/src/amd/common/ac_llvm_util.c > index 7f8c5746b37..d8ec4ee0092 100644 > --- a/src/amd/common/ac_llvm_util.c > +++ b/src/amd/common/ac_llvm_util.c > @@ -188,10 +188,10 @@ static LLVMPassManagerRef > ac_init_passmgr(LLVMTargetLibraryInfoRef target_librar > return passmgr; > } > > -bool ac_llvm_compiler_init(struct ac_llvm_compiler_info *info, > - bool add_target_library_info, > - enum radeon_family family, > - enum ac_target_machine_options tm_options) > +bool ac_llvm_compiler_init_internal(struct ac_llvm_compiler_info *info, > + bool add_target_library_info, > + enum radeon_family family, > + enum ac_target_machine_options tm_options) > { > memset(info, 0, sizeof(*info)); > info->tm = ac_create_target_machine(family, tm_options, > &info->triple); > @@ -223,7 +223,7 @@ fail: > return false; > } > > -void ac_llvm_compiler_dispose(struct ac_llvm_compiler_info *info) > +void ac_llvm_compiler_dispose_internal(struct ac_llvm_compiler_info *info) > { > if (info->data_layout) > LLVMDisposeMessage((char*)info->data_layout); > diff --git a/src/amd/common/ac_llvm_util.h b/src/amd/common/ac_llvm_util.h > index 1ba972dab87..c579e6a9ec3 100644 > --- a/src/amd/common/ac_llvm_util.h > +++ b/src/amd/common/ac_llvm_util.h > @@ -60,6 +60,7 @@ enum ac_target_machine_options { > AC_TM_FORCE_DISABLE_XNACK = (1 << 3), > AC_TM_PROMOTE_ALLOCA_TO_SCRATCH = (1 << 4), > AC_TM_CHECK_IR = (1 << 5), > + AC_TM_THREAD_LLVM = (1 << 6), > }; > > enum ac_float_mode { > @@ -74,6 +75,7 @@ struct ac_llvm_compiler_info { > LLVMTargetLibraryInfoRef target_library_info; > const char *triple; > const char *data_layout; > + bool thread_stored; > }; > > const char *ac_get_llvm_processor_name(enum radeon_family family); > @@ -114,6 +116,12 @@ ac_get_store_intr_attribs(bool writeonly_memory) > unsigned > ac_count_scratch_private_memory(LLVMValueRef function); > > +bool ac_llvm_compiler_init_internal(struct ac_llvm_compiler_info *info, > + bool add_target_library_info, /* crash > workaround */ > + enum radeon_family family, > + enum ac_target_machine_options > tm_options); > +void ac_llvm_compiler_dispose_internal(struct ac_llvm_compiler_info *info); > + > bool ac_llvm_compiler_init(struct ac_llvm_compiler_info *info, > bool add_target_library_info, /* crash workaround > */ > enum radeon_family family, > @@ -127,6 +135,7 @@ bool ac_compile_to_memory_buffer(struct > ac_llvm_compiler_info *info, > LLVMModuleRef M, > char **ErrorMessage, > LLVMMemoryBufferRef *OutMemBuf); > + > #ifdef __cplusplus > } > #endif > diff --git a/src/amd/vulkan/radv_debug.h b/src/amd/vulkan/radv_debug.h > index f1b0dc26a63..9fe4c3b7404 100644 > --- a/src/amd/vulkan/radv_debug.h > +++ b/src/amd/vulkan/radv_debug.h > @@ -49,6 +49,7 @@ enum { > RADV_DEBUG_ERRORS = 0x80000, > RADV_DEBUG_STARTUP = 0x100000, > RADV_DEBUG_CHECKIR = 0x200000, > + RADV_DEBUG_NOTHREADLLVM = 0x400000, > }; > > enum { > diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c > index ad3465f594e..73c48cef1f0 100644 > --- a/src/amd/vulkan/radv_device.c > +++ b/src/amd/vulkan/radv_device.c > @@ -436,6 +436,7 @@ static const struct debug_control radv_debug_options[] = { > {"errors", RADV_DEBUG_ERRORS}, > {"startup", RADV_DEBUG_STARTUP}, > {"checkir", RADV_DEBUG_CHECKIR}, > + {"nothreadllvm", RADV_DEBUG_NOTHREADLLVM}, > {NULL, 0} > }; > > diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c > index 3bbb7867640..f7033aff771 100644 > --- a/src/amd/vulkan/radv_shader.c > +++ b/src/amd/vulkan/radv_shader.c > @@ -547,6 +547,8 @@ shader_variant_create(struct radv_device *device, > tm_options |= AC_TM_SISCHED; > if (options->check_ir) > tm_options |= AC_TM_CHECK_IR; > + if (!(device->instance->debug_flags & RADV_DEBUG_NOTHREADLLVM)) > + tm_options |= AC_TM_THREAD_LLVM; > > radv_init_llvm_once(); > > -- > 2.17.1 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev