radv: using tls to store llvm related info and speed up compiles (v3)

Bas Nieuwenhuizen Wed, 27 Jun 2018 16:25:59 -0700

On Wed, Jun 27, 2018 at 5:58 AM, Dave Airlie <airl...@gmail.com> wrote:
> From: Dave Airlie <airl...@redhat.com>
>
> I'd like to encourage people to test this to see if it helps (like
> does it make app startup better or less hitching in dxvk).
>
> The basic idea is to store a bunch of LLVM related data structs
> in thread local storage so we can avoid reiniting them every time
> we compile a shader. Since we know llvm objects aren't thread safe
> it has to be stored using TLS to avoid any collisions.
>
> This should remove all the fixed overheads setup costs of creating
> the pass manager each time.
>
> This takes a demo app time to compile the radv meta shaders on nocache
> and exit from 1.7s to 1s.
>
> TODO: this doesn't work for radeonsi yet, but I'm not sure how TLS
> works if you have radeonsi and radv loaded at the same time, if
> they'll magically try and use the same tls stuff, in which case
> this might explode all over the place.
>
> v2: fix llvm6 build, inline emit function, handle multiple targets
> in one thread
> v3: rebase and port onto new structure
> ---
>  src/amd/common/ac_llvm_helper.cpp | 120 ++++++++++++++++++++++++++++--
>  src/amd/common/ac_llvm_util.c     |  10 +--
>  src/amd/common/ac_llvm_util.h     |   9 +++
>  src/amd/vulkan/radv_debug.h       |   1 +
>  src/amd/vulkan/radv_device.c      |   1 +
>  src/amd/vulkan/radv_shader.c      |   2 +
>  6 files changed, 132 insertions(+), 11 deletions(-)
>
> diff --git a/src/amd/common/ac_llvm_helper.cpp 
> b/src/amd/common/ac_llvm_helper.cpp
> index 27403dbe085..f1f1399b3fb 100644
> --- a/src/amd/common/ac_llvm_helper.cpp
> +++ b/src/amd/common/ac_llvm_helper.cpp
> @@ -31,12 +31,21 @@
>
>  #include "ac_llvm_util.h"
>  #include <llvm-c/Core.h>
> -#include <llvm/Target/TargetOptions.h>
> -#include <llvm/ExecutionEngine/ExecutionEngine.h>
> -#include <llvm/IR/Attributes.h>
> -#include <llvm/IR/CallSite.h>
> +#include <llvm/Target/TargetMachine.h>
>  #include <llvm/IR/IRBuilder.h>
>  #include <llvm/Analysis/TargetLibraryInfo.h>
> +#include <llvm/IR/LegacyPassManager.h>
> +
> +#include <llvm-c/Transforms/IPO.h>
> +#include <llvm-c/Transforms/Scalar.h>
> +#if HAVE_LLVM >= 0x0700
> +#include <llvm-c/Transforms/Utils.h>
> +#endif
> +
> +#if HAVE_LLVM < 0x0700
> +#include "llvm/Support/raw_ostream.h"
> +#endif
> +#include <list>
>
>  void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes)
>  {
> @@ -101,11 +110,110 @@ 
> ac_dispose_target_library_info(LLVMTargetLibraryInfoRef library_info)
>         delete reinterpret_cast<llvm::TargetLibraryInfoImpl *>(library_info);
>  }
>
> +class ac_llvm_per_thread_info {
> +public:
> +       ac_llvm_per_thread_info(enum radeon_family arg_family,
> +                               enum ac_target_machine_options arg_tm_options)
> +               : family(arg_family), tm_options(arg_tm_options),
> +                 OStream(CodeString) {}
> +       ~ac_llvm_per_thread_info() {
> +               ac_llvm_compiler_dispose_internal(&llvm_info);
> +       }
> +
> +       struct ac_llvm_compiler_info llvm_info;
> +       enum radeon_family family;
> +       enum ac_target_machine_options tm_options;
> +       llvm::SmallString<0> CodeString;
> +       llvm::raw_svector_ostream OStream;


Can we keep the naming style for variables consistent?

Otherwise this series is

Reviewed-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl>

Thanks!

> +       llvm::legacy::PassManager pass;
> +};
> +
> +/* we have to store a linked list per thread due to the possiblity of 
> multiple gpus being required */
> +static thread_local std::list<ac_llvm_per_thread_info> 
> ac_llvm_per_thread_list;
> +
>  bool ac_compile_to_memory_buffer(struct ac_llvm_compiler_info *info,
>                                  LLVMModuleRef M,
>                                  char **ErrorMessage,
>                                  LLVMMemoryBufferRef *OutMemBuf)
>  {
> -       return LLVMTargetMachineEmitToMemoryBuffer(info->tm, M, 
> LLVMObjectFile,
> -                                                  ErrorMessage, OutMemBuf);
> +       ac_llvm_per_thread_info *thread_info = nullptr;
> +       if (info->thread_stored) {
> +               for (auto &I : ac_llvm_per_thread_list) {
> +                       if (I.llvm_info.tm == info->tm) {
> +                               thread_info = &I;
> +                               break;
> +                       }
> +               }
> +
> +               if (!thread_info) {
> +                       assert(0);
> +                       return false;
> +               }
> +       } else {
> +               return LLVMTargetMachineEmitToMemoryBuffer(info->tm, M, 
> LLVMObjectFile,
> +                                                          ErrorMessage, 
> OutMemBuf);
> +       }
> +
> +       llvm::TargetMachine *TM = 
> reinterpret_cast<llvm::TargetMachine*>(thread_info->llvm_info.tm);
> +       llvm::Module *Mod = llvm::unwrap(M);
> +       llvm::StringRef Data;
> +
> +       Mod->setDataLayout(TM->createDataLayout());
> +
> +       thread_info->pass.run(*Mod);
> +
> +       Data = thread_info->OStream.str();
> +       *OutMemBuf = LLVMCreateMemoryBufferWithMemoryRangeCopy(Data.data(), 
> Data.size(), "");
> +       thread_info->CodeString = "";
> +       return false;
> +}
> +
> +bool ac_llvm_compiler_init(struct ac_llvm_compiler_info *info,
> +                          bool add_target_library_info,
> +                          enum radeon_family family,
> +                          enum ac_target_machine_options tm_options)
> +{
> +       if (tm_options & AC_TM_THREAD_LLVM) {
> +               for (auto &I : ac_llvm_per_thread_list) {
> +                       if (I.family == family &&
> +                           I.tm_options == tm_options) {
> +                               *info = I.llvm_info;
> +                               return true;
> +                       }
> +               }
> +
> +               ac_llvm_per_thread_list.emplace_back(family, tm_options);
> +               ac_llvm_per_thread_info &tinfo = 
> ac_llvm_per_thread_list.back();
> +               if (!ac_llvm_compiler_init_internal(&tinfo.llvm_info,
> +                                                   true,
> +                                                   family,
> +                                                   tm_options))
> +                       return false;
> +
> +               tinfo.llvm_info.thread_stored = true;
> +               *info = tinfo.llvm_info;
> +
> +               llvm::TargetMachine *TM = 
> reinterpret_cast<llvm::TargetMachine*>(tinfo.llvm_info.tm);
> +               if (TM->addPassesToEmitFile(tinfo.pass, tinfo.OStream,
> +#if HAVE_LLVM >= 0x0700
> +                                           nullptr,
> +#endif
> +                                       
> llvm::TargetMachine::CGFT_ObjectFile)) {
> +                       assert(0);
> +                       return false;
> +               }
> +       } else {
> +               if (!ac_llvm_compiler_init_internal(info,
> +                                                   add_target_library_info,
> +                                                   family,
> +                                                   tm_options))
> +                       return false;
> +       }
> +       return true;
> +}
> +
> +void ac_llvm_compiler_dispose(struct ac_llvm_compiler_info *info)
> +{
> +       if (!info->thread_stored)
> +               ac_llvm_compiler_dispose_internal(info);
>  }
> diff --git a/src/amd/common/ac_llvm_util.c b/src/amd/common/ac_llvm_util.c
> index 7f8c5746b37..d8ec4ee0092 100644
> --- a/src/amd/common/ac_llvm_util.c
> +++ b/src/amd/common/ac_llvm_util.c
> @@ -188,10 +188,10 @@ static LLVMPassManagerRef 
> ac_init_passmgr(LLVMTargetLibraryInfoRef target_librar
>         return passmgr;
>  }
>
> -bool ac_llvm_compiler_init(struct ac_llvm_compiler_info *info,
> -                          bool add_target_library_info,
> -                          enum radeon_family family,
> -                          enum ac_target_machine_options tm_options)
> +bool ac_llvm_compiler_init_internal(struct ac_llvm_compiler_info *info,
> +                                   bool add_target_library_info,
> +                                   enum radeon_family family,
> +                                   enum ac_target_machine_options tm_options)
>  {
>         memset(info, 0, sizeof(*info));
>         info->tm = ac_create_target_machine(family, tm_options, 
> &info->triple);
> @@ -223,7 +223,7 @@ fail:
>         return false;
>  }
>
> -void ac_llvm_compiler_dispose(struct ac_llvm_compiler_info *info)
> +void ac_llvm_compiler_dispose_internal(struct ac_llvm_compiler_info *info)
>  {
>         if (info->data_layout)
>                 LLVMDisposeMessage((char*)info->data_layout);
> diff --git a/src/amd/common/ac_llvm_util.h b/src/amd/common/ac_llvm_util.h
> index 1ba972dab87..c579e6a9ec3 100644
> --- a/src/amd/common/ac_llvm_util.h
> +++ b/src/amd/common/ac_llvm_util.h
> @@ -60,6 +60,7 @@ enum ac_target_machine_options {
>         AC_TM_FORCE_DISABLE_XNACK = (1 << 3),
>         AC_TM_PROMOTE_ALLOCA_TO_SCRATCH = (1 << 4),
>         AC_TM_CHECK_IR = (1 << 5),
> +       AC_TM_THREAD_LLVM = (1 << 6),
>  };
>
>  enum ac_float_mode {
> @@ -74,6 +75,7 @@ struct ac_llvm_compiler_info {
>         LLVMTargetLibraryInfoRef target_library_info;
>         const char *triple;
>         const char *data_layout;
> +       bool thread_stored;
>  };
>
>  const char *ac_get_llvm_processor_name(enum radeon_family family);
> @@ -114,6 +116,12 @@ ac_get_store_intr_attribs(bool writeonly_memory)
>  unsigned
>  ac_count_scratch_private_memory(LLVMValueRef function);
>
> +bool ac_llvm_compiler_init_internal(struct ac_llvm_compiler_info *info,
> +                                   bool add_target_library_info, /* crash 
> workaround */
> +                                   enum radeon_family family,
> +                                   enum ac_target_machine_options 
> tm_options);
> +void ac_llvm_compiler_dispose_internal(struct ac_llvm_compiler_info *info);
> +
>  bool ac_llvm_compiler_init(struct ac_llvm_compiler_info *info,
>                            bool add_target_library_info, /* crash workaround 
> */
>                            enum radeon_family family,
> @@ -127,6 +135,7 @@ bool ac_compile_to_memory_buffer(struct 
> ac_llvm_compiler_info *info,
>                                  LLVMModuleRef M,
>                                  char **ErrorMessage,
>                                  LLVMMemoryBufferRef *OutMemBuf);
> +
>  #ifdef __cplusplus
>  }
>  #endif
> diff --git a/src/amd/vulkan/radv_debug.h b/src/amd/vulkan/radv_debug.h
> index f1b0dc26a63..9fe4c3b7404 100644
> --- a/src/amd/vulkan/radv_debug.h
> +++ b/src/amd/vulkan/radv_debug.h
> @@ -49,6 +49,7 @@ enum {
>         RADV_DEBUG_ERRORS            = 0x80000,
>         RADV_DEBUG_STARTUP           = 0x100000,
>         RADV_DEBUG_CHECKIR           = 0x200000,
> +       RADV_DEBUG_NOTHREADLLVM      = 0x400000,
>  };
>
>  enum {
> diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
> index ad3465f594e..73c48cef1f0 100644
> --- a/src/amd/vulkan/radv_device.c
> +++ b/src/amd/vulkan/radv_device.c
> @@ -436,6 +436,7 @@ static const struct debug_control radv_debug_options[] = {
>         {"errors", RADV_DEBUG_ERRORS},
>         {"startup", RADV_DEBUG_STARTUP},
>         {"checkir", RADV_DEBUG_CHECKIR},
> +       {"nothreadllvm", RADV_DEBUG_NOTHREADLLVM},
>         {NULL, 0}
>  };
>
> diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
> index 3bbb7867640..f7033aff771 100644
> --- a/src/amd/vulkan/radv_shader.c
> +++ b/src/amd/vulkan/radv_shader.c
> @@ -547,6 +547,8 @@ shader_variant_create(struct radv_device *device,
>                 tm_options |= AC_TM_SISCHED;
>         if (options->check_ir)
>                 tm_options |= AC_TM_CHECK_IR;
> +       if (!(device->instance->debug_flags & RADV_DEBUG_NOTHREADLLVM))
> +               tm_options |= AC_TM_THREAD_LLVM;
>
>         radv_init_llvm_once();
>
> --
> 2.17.1
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 11/11] ac/radv: using tls to store llvm related info and speed up compiles (v3)

Reply via email to