https://github.com/ajwock created https://github.com/llvm/llvm-project/pull/95494
LLVM can now generate increments to counters in thread local storage. Use a new compiler-rt runtime to atomically add thread local counters to global counters on thread exit. The clang driver will link the new runtime libraries in when the new option -fprofile-thread-local is specified. More details available in the RFC on discourse. >From 44e2159636efd601c90aced44856d17d77728caa Mon Sep 17 00:00:00 2001 From: Andrew Wock <ajw...@gmail.com> Date: Tue, 4 Jun 2024 09:45:31 -0400 Subject: [PATCH] Created Thread local counter instrumentation. LLVM can now generate increments to counters in thread local storage. Use a new compiler-rt runtime to atomically add thread local counters to global counters on thread exit. The clang driver will link the new runtime libraries in when the new option -fprofile-thread-local is specified. Signed-off-by: Andrew Wock <ajw...@gmail.com> --- clang/docs/UsersManual.rst | 8 ++ clang/include/clang/Basic/CodeGenOptions.def | 1 + clang/include/clang/Driver/Options.td | 3 + clang/include/clang/Driver/ToolChain.h | 6 + clang/lib/Driver/ToolChain.cpp | 10 ++ clang/lib/Driver/ToolChains/Clang.cpp | 12 ++ clang/lib/Driver/ToolChains/Linux.cpp | 7 + compiler-rt/include/profile/InstrProfData.inc | 4 + compiler-rt/lib/profile/CMakeLists.txt | 35 +++++ .../lib/profile/InstrProfilingDyLibLinux.cpp | 63 +++++++++ compiler-rt/lib/profile/InstrProfilingFile.c | 6 + .../lib/profile/InstrProfilingPlatformLinux.c | 1 + .../profile/InstrProfilingStaticTLSLinux.cpp | 123 ++++++++++++++++++ compiler-rt/lib/profile/InstrProfilingTLS.c | 29 +++++ compiler-rt/lib/profile/InstrProfilingTLS.h | 39 ++++++ .../lib/profile/InstrProfilingTLSDyLib.c | 100 ++++++++++++++ .../lib/profile/InstrProfilingTLSDyLib.h | 4 + compiler-rt/lib/tsan/rtl/CMakeLists.txt | 2 +- .../Inputs/instrprof-tls-dlclose-lib.c | 7 + .../Inputs/instrprof-tls-dlclose-main.c | 93 +++++++++++++ .../Inputs/instrprof-tls-dlopen-func.c | 9 ++ .../Inputs/instrprof-tls-dlopen-func2.c | 9 ++ .../Inputs/instrprof-tls-dlopen-main.c | 105 +++++++++++++++ .../test/profile/Inputs/instrprof-tls-exit.c | 37 ++++++ .../Linux/instrprof-tls-dlclose-memfault.test | 27 ++++ .../instrprof-tls-dlclose-mix-subset.test | 41 ++++++ .../Linux/instrprof-tls-dlclose-mix.test | 48 +++++++ .../Linux/instrprof-tls-dlclose-nodelete.test | 24 ++++ .../profile/Linux/instrprof-tls-dlopen.test | 32 +++++ .../profile/Linux/instrprof-tls-exit.test | 17 +++ .../Linux/instrprof-tls-noclose-mix.test | 51 ++++++++ .../instrprof-tls-shared-mix-subset.test | 35 +++++ .../Linux/instrprof-tls-shared-mix.test | 48 +++++++ llvm/include/llvm/ProfileData/InstrProf.h | 3 + .../llvm/ProfileData/InstrProfData.inc | 4 + .../Instrumentation/InstrProfiling.cpp | 71 +++++++++- 36 files changed, 1110 insertions(+), 4 deletions(-) create mode 100644 compiler-rt/lib/profile/InstrProfilingDyLibLinux.cpp create mode 100644 compiler-rt/lib/profile/InstrProfilingStaticTLSLinux.cpp create mode 100644 compiler-rt/lib/profile/InstrProfilingTLS.c create mode 100644 compiler-rt/lib/profile/InstrProfilingTLS.h create mode 100644 compiler-rt/lib/profile/InstrProfilingTLSDyLib.c create mode 100644 compiler-rt/lib/profile/InstrProfilingTLSDyLib.h create mode 100644 compiler-rt/test/profile/Inputs/instrprof-tls-dlclose-lib.c create mode 100644 compiler-rt/test/profile/Inputs/instrprof-tls-dlclose-main.c create mode 100644 compiler-rt/test/profile/Inputs/instrprof-tls-dlopen-func.c create mode 100644 compiler-rt/test/profile/Inputs/instrprof-tls-dlopen-func2.c create mode 100644 compiler-rt/test/profile/Inputs/instrprof-tls-dlopen-main.c create mode 100644 compiler-rt/test/profile/Inputs/instrprof-tls-exit.c create mode 100644 compiler-rt/test/profile/Linux/instrprof-tls-dlclose-memfault.test create mode 100644 compiler-rt/test/profile/Linux/instrprof-tls-dlclose-mix-subset.test create mode 100644 compiler-rt/test/profile/Linux/instrprof-tls-dlclose-mix.test create mode 100644 compiler-rt/test/profile/Linux/instrprof-tls-dlclose-nodelete.test create mode 100644 compiler-rt/test/profile/Linux/instrprof-tls-dlopen.test create mode 100644 compiler-rt/test/profile/Linux/instrprof-tls-exit.test create mode 100644 compiler-rt/test/profile/Linux/instrprof-tls-noclose-mix.test create mode 100644 compiler-rt/test/profile/Linux/instrprof-tls-shared-mix-subset.test create mode 100644 compiler-rt/test/profile/Linux/instrprof-tls-shared-mix.test diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst index f954857b0235a..f7db513b92909 100644 --- a/clang/docs/UsersManual.rst +++ b/clang/docs/UsersManual.rst @@ -2932,6 +2932,14 @@ indexed format, regardeless whether it is produced by frontend or the IR pass. overhead. ``prefer-atomic`` will be transformed to ``atomic`` when supported by the target, or ``single`` otherwise. +.. option:: -fprofile-thread-local + + Increment profile counters in thread local storage and atomically add their + values to global counters on thread exit. This has the potential to deliver + both accuracy and high performance whenever there is high thread contention + on profile counters. This is an experimental option and it is only supported + on 64-bit linux. + Fine Tuning Profile Collection ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def index 7ffc40a00504f..7cd0bfb6d71b5 100644 --- a/clang/include/clang/Basic/CodeGenOptions.def +++ b/clang/include/clang/Basic/CodeGenOptions.def @@ -218,6 +218,7 @@ ENUM_CODEGENOPT(ProfileUse, ProfileInstrKind, 2, ProfileNone) /// instrumented. Selected group numbers can be 0 to N-1 inclusive. VALUE_CODEGENOPT(ProfileTotalFunctionGroups, 32, 1) VALUE_CODEGENOPT(ProfileSelectedFunctionGroup, 32, 0) +CODEGENOPT(InstrProfileThreadLocal, 1, 0) ///< Counters are updated on a per-thread basis CODEGENOPT(CoverageMapping , 1, 0) ///< Generate coverage mapping regions to ///< enable code coverage analysis. CODEGENOPT(DumpCoverageMapping , 1, 0) ///< Dump the generated coverage mapping diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index d44faa55c456f..aab5b63c991f1 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1768,6 +1768,9 @@ def fprofile_instr_generate : Flag<["-"], "fprofile-instr-generate">, def fprofile_instr_generate_EQ : Joined<["-"], "fprofile-instr-generate=">, Group<f_Group>, Visibility<[ClangOption, CLOption]>, MetaVarName<"<file>">, HelpText<"Generate instrumented code to collect execution counts into <file> (overridden by LLVM_PROFILE_FILE env var)">; +def fprofile_thread_local : Flag<["-"], "fprofile-thread-local">, + Group<f_Group>, Visibility<[ClangOption, CLOption]>, + HelpText<"Generage profile counters in thread local storage">; def fprofile_instr_use : Flag<["-"], "fprofile-instr-use">, Group<f_Group>, Visibility<[ClangOption, CLOption]>; def fprofile_instr_use_EQ : Joined<["-"], "fprofile-instr-use=">, diff --git a/clang/include/clang/Driver/ToolChain.h b/clang/include/clang/Driver/ToolChain.h index 9789cfacafd78..162c730782afb 100644 --- a/clang/include/clang/Driver/ToolChain.h +++ b/clang/include/clang/Driver/ToolChain.h @@ -752,6 +752,12 @@ class ToolChain { virtual void addProfileRTLibs(const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs) const; + /// addThreadLocalProfileRTLibs - With -fprofile-threadlocal, add the + /// threadlocal profile runtime static + shared library pair. + virtual void + addThreadLocalProfileRTLibs(const llvm::opt::ArgList &Args, + llvm::opt::ArgStringList &CmdArgs) const; + /// Add arguments to use system-specific CUDA includes. virtual void AddCudaIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const; diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp index 40ab2e91125d1..4708cb7df5044 100644 --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -1078,6 +1078,16 @@ void ToolChain::addProfileRTLibs(const llvm::opt::ArgList &Args, CmdArgs.push_back(getCompilerRTArgString(Args, "profile")); } +void ToolChain::addThreadLocalProfileRTLibs( + const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs) const { + if (needsProfileRT(Args) && Args.hasArg(options::OPT_fprofile_thread_local)) { + // Static first, so we can specify '-u' where needed + CmdArgs.push_back(getCompilerRTArgString(Args, "profile_threadlocal")); + CmdArgs.push_back(getCompilerRTArgString(Args, "profile_threadlocal", + ToolChain::FT_Shared)); + } +} + ToolChain::RuntimeLibType ToolChain::GetRuntimeLibType( const ArgList &Args) const { if (runtimeLibType) diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index b8d8ff3db5d1f..cd63ac56fecf6 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -720,6 +720,18 @@ static void addPGOAndCoverageFlags(const ToolChain &TC, Compilation &C, CmdArgs.push_back("-fcoverage-mcdc"); } + if (Args.hasArg(options::OPT_fprofile_thread_local)) { + if (!ProfileGenerateArg) + D.Diag(clang::diag::err_drv_argument_only_allowed_with) + << "-fprofile-thread-local" + << "-fprofile-instr-generate"; + + // Clang cc1 is not in the know about thread local coverage, but llvm + // should be + CmdArgs.push_back("-mllvm"); + CmdArgs.push_back("-instr-prof-thread-local"); + } + if (Arg *A = Args.getLastArg(options::OPT_ffile_compilation_dir_EQ, options::OPT_fcoverage_compilation_dir_EQ)) { if (A->getOption().matches(options::OPT_ffile_compilation_dir_EQ)) diff --git a/clang/lib/Driver/ToolChains/Linux.cpp b/clang/lib/Driver/ToolChains/Linux.cpp index 2222dea431c3c..0a889f957786a 100644 --- a/clang/lib/Driver/ToolChains/Linux.cpp +++ b/clang/lib/Driver/ToolChains/Linux.cpp @@ -843,6 +843,13 @@ void Linux::addProfileRTLibs(const llvm::opt::ArgList &Args, CmdArgs.push_back(Args.MakeArgString( Twine("-u", llvm::getInstrProfRuntimeHookVarName()))); ToolChain::addProfileRTLibs(Args, CmdArgs); + + if (needsProfileRT(Args) && Args.hasArg(options::OPT_fprofile_thread_local)) { + CmdArgs.push_back(Args.MakeArgString(Twine( + "-u", + llvm::StringRef("__llvm_profile_tls_register_thread_exit_handler")))); + } + ToolChain::addThreadLocalProfileRTLibs(Args, CmdArgs); } void Linux::addExtraOpts(llvm::opt::ArgStringList &CmdArgs) const { diff --git a/compiler-rt/include/profile/InstrProfData.inc b/compiler-rt/include/profile/InstrProfData.inc index e9866d94b762c..8655bcf498437 100644 --- a/compiler-rt/include/profile/InstrProfData.inc +++ b/compiler-rt/include/profile/InstrProfData.inc @@ -312,6 +312,9 @@ INSTR_PROF_SECT_ENTRY(IPSK_data, \ INSTR_PROF_SECT_ENTRY(IPSK_cnts, \ INSTR_PROF_QUOTE(INSTR_PROF_CNTS_COMMON), \ INSTR_PROF_CNTS_COFF, "__DATA,") +INSTR_PROF_SECT_ENTRY(IPSK_tls_cnts, \ + INSTR_PROF_QUOTE(INSTR_PROF_TLS_CNTS_COMMON), \ + INSTR_PROF_CNTS_COFF, "__DATA,") INSTR_PROF_SECT_ENTRY(IPSK_bitmap, \ INSTR_PROF_QUOTE(INSTR_PROF_BITS_COMMON), \ INSTR_PROF_BITS_COFF, "__DATA,") @@ -750,6 +753,7 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure, #define INSTR_PROF_NAME_COMMON __llvm_prf_names #define INSTR_PROF_VNAME_COMMON __llvm_prf_vns #define INSTR_PROF_CNTS_COMMON __llvm_prf_cnts +#define INSTR_PROF_TLS_CNTS_COMMON __llvm_tls_prf_cnts #define INSTR_PROF_BITS_COMMON __llvm_prf_bits #define INSTR_PROF_VALS_COMMON __llvm_prf_vals #define INSTR_PROF_VNODES_COMMON __llvm_prf_vnds diff --git a/compiler-rt/lib/profile/CMakeLists.txt b/compiler-rt/lib/profile/CMakeLists.txt index 45e5164891751..b9f3a20bb328d 100644 --- a/compiler-rt/lib/profile/CMakeLists.txt +++ b/compiler-rt/lib/profile/CMakeLists.txt @@ -70,14 +70,25 @@ set(PROFILE_SOURCES InstrProfilingUtil.c ) +set(PROFILE_STATIC_TLS_SOURCES + InstrProfilingTLS.c + InstrProfilingStaticTLSLinux.cpp) + +set(PROFILE_SHARED_TLS_SOURCES + InstrProfilingTLSDyLib.c + InstrProfilingDyLibLinux.cpp) + set(PROFILE_HEADERS InstrProfiling.h InstrProfilingInternal.h InstrProfilingPort.h InstrProfilingUtil.h + InstrProfilingTLS.h WindowsMMap.h ) +set(PROFILE_LINK_LIBS ${SANITIZER_COMMON_LINK_LIBS}) + if(WIN32) list(APPEND PROFILE_SOURCES WindowsMMap.c @@ -134,6 +145,30 @@ if(APPLE) ADDITIONAL_HEADERS ${PROFILE_HEADERS} PARENT_TARGET profile) else() + #if(UNIX AND NOT APPLE AND NOT ANDROID) + if(OS_NAME MATCHES "Linux") + add_compiler_rt_runtime(clang_rt.profile_threadlocal + STATIC + OS ${PROFILE_SUPPORTED_OS} + ARCHS ${PROFILE_SUPPORTED_ARCH} + CFLAGS ${EXTRA_FLAGS} + SOURCES ${PROFILE_STATIC_TLS_SOURCES} + ADDITIONAL_HEADERS ${PROFILE_HEADERS} + PARENT_TARGET profile) + + add_compiler_rt_runtime(clang_rt.profile_threadlocal + SHARED + OS ${PROFILE_SUPPORTED_OS} + ARCHS ${PROFILE_SUPPORTED_ARCH} + CFLAGS ${EXTRA_FLAGS} + SOURCES ${PROFILE_SHARED_TLS_SOURCES} + ADDITIONAL_HEADERS ${PROFILE_HEADERS} + OBJECT_LIBS RTInterception + RTSanitizerCommon + RTSanitizerCommonLibc + PARENT_TARGET profile) + endif() + add_compiler_rt_runtime(clang_rt.profile STATIC ARCHS ${PROFILE_SUPPORTED_ARCH} diff --git a/compiler-rt/lib/profile/InstrProfilingDyLibLinux.cpp b/compiler-rt/lib/profile/InstrProfilingDyLibLinux.cpp new file mode 100644 index 0000000000000..47f2baa6a5815 --- /dev/null +++ b/compiler-rt/lib/profile/InstrProfilingDyLibLinux.cpp @@ -0,0 +1,63 @@ +#if defined(__linux__) || defined(__FreeBSD__) || defined(__Fuchsia__) || \ + (defined(__sun__) && defined(__svr4__)) || defined(__NetBSD__) || \ + defined(_AIX) + +#include <elf.h> +#include <link.h> +#endif +#include <stdlib.h> +#include <string.h> + +extern "C" { + +#include "InstrProfiling.h" +#include "InstrProfilingInternal.h" +#include "InstrProfilingTLS.h" +#include "InstrProfilingTLSDyLib.h" +} + +#include "interception/interception.h" + +extern "C" { + +struct pthread_wrapper_arg { + void *(*fn)(void *); + void *arg; + uint32_t arg_keepalive; +}; + +void *pthread_fn_wrapper(void *arg_ptr) { + struct pthread_wrapper_arg *wrapper_arg = + (struct pthread_wrapper_arg *)arg_ptr; + void *(*fn)(void *) = __atomic_load_n(&wrapper_arg->fn, __ATOMIC_RELAXED); + void *arg = __atomic_load_n(&wrapper_arg->arg, __ATOMIC_RELAXED); + __atomic_store_n(&wrapper_arg->arg_keepalive, 0, __ATOMIC_RELEASE); + + // startup + // Do nothing (TLS is automatically loaded and zeroed) + void *retval = fn(arg); + // cleanup + run_thread_exit_handlers(); + // Combine counters with main counters + return retval; +} + +void __llvm_register_profile_intercepts() { register_profile_intercepts(); } + +} // end extern "C" + +INTERCEPTOR(int, pthread_create, void *thread, void *attr, + void *(*start_routine)(void *), void *arg) { + int res = -1; + struct pthread_wrapper_arg wrapper_arg = {(void *(*)(void *))start_routine, + arg, 1}; + + // do pthread + res = REAL(pthread_create)(thread, attr, pthread_fn_wrapper, &wrapper_arg); + // Spin wait for child thread to copy arguments + while (__atomic_load_n(&wrapper_arg.arg_keepalive, __ATOMIC_ACQUIRE) == 1) + ; + return res; +} + +void register_profile_intercepts() { INTERCEPT_FUNCTION(pthread_create); } diff --git a/compiler-rt/lib/profile/InstrProfilingFile.c b/compiler-rt/lib/profile/InstrProfilingFile.c index e4d99ef4872bd..64775f24fd83c 100644 --- a/compiler-rt/lib/profile/InstrProfilingFile.c +++ b/compiler-rt/lib/profile/InstrProfilingFile.c @@ -34,6 +34,7 @@ #include "InstrProfiling.h" #include "InstrProfilingInternal.h" #include "InstrProfilingPort.h" +#include "InstrProfilingTLS.h" #include "InstrProfilingUtil.h" /* From where is profile name specified. @@ -1084,6 +1085,8 @@ void __llvm_profile_set_filename(const char *FilenamePat) { parseAndSetFilename(FilenamePat, PNS_runtime_api, 1); } +void (*on_main_thread_exit)(void) = NULL; + /* The public API for writing profile data into the file with name * set by previous calls to __llvm_profile_set_filename or * __llvm_profile_override_default_filename or @@ -1097,6 +1100,9 @@ int __llvm_profile_write_file(void) { // Temporarily suspend getting SIGKILL when the parent exits. int PDeathSig = lprofSuspendSigKill(); + if (on_main_thread_exit) + on_main_thread_exit(); + if (lprofProfileDumped() || __llvm_profile_is_continuous_mode_enabled()) { PROF_NOTE("Profile data not written to file: %s.\n", "already written"); if (PDeathSig == 1) diff --git a/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c b/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c index b766436497b74..4f96523a56a37 100644 --- a/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c +++ b/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c @@ -45,6 +45,7 @@ extern __llvm_profile_data PROF_DATA_STOP COMPILER_RT_VISIBILITY COMPILER_RT_WEAK; extern char PROF_CNTS_START COMPILER_RT_VISIBILITY COMPILER_RT_WEAK; extern char PROF_CNTS_STOP COMPILER_RT_VISIBILITY COMPILER_RT_WEAK; + extern VTableProfData PROF_VTABLE_START COMPILER_RT_VISIBILITY COMPILER_RT_WEAK; extern VTableProfData PROF_VTABLE_STOP COMPILER_RT_VISIBILITY COMPILER_RT_WEAK; extern char PROF_VNAME_START COMPILER_RT_VISIBILITY COMPILER_RT_WEAK; diff --git a/compiler-rt/lib/profile/InstrProfilingStaticTLSLinux.cpp b/compiler-rt/lib/profile/InstrProfilingStaticTLSLinux.cpp new file mode 100644 index 0000000000000..fc5f785e1ab40 --- /dev/null +++ b/compiler-rt/lib/profile/InstrProfilingStaticTLSLinux.cpp @@ -0,0 +1,123 @@ +#if defined(__linux__) || defined(__FreeBSD__) || defined(__Fuchsia__) || \ + (defined(__sun__) && defined(__svr4__)) || defined(__NetBSD__) || \ + defined(_AIX) + +#include <elf.h> +#include <link.h> +#endif +#include <stdlib.h> +#include <string.h> + +extern "C" { + +#include "InstrProfiling.h" +#include "InstrProfilingInternal.h" +#include "InstrProfilingTLS.h" +} + +extern "C" { + +#define PROF_TLS_CNTS_START INSTR_PROF_SECT_START(INSTR_PROF_TLS_CNTS_COMMON) +#define PROF_TLS_CNTS_STOP INSTR_PROF_SECT_STOP(INSTR_PROF_TLS_CNTS_COMMON) + +extern char PROF_TLS_CNTS_START COMPILER_RT_VISIBILITY COMPILER_RT_WEAK; +extern char PROF_TLS_CNTS_STOP COMPILER_RT_VISIBILITY COMPILER_RT_WEAK; + +COMPILER_RT_VISIBILITY char *__llvm_profile_begin_tls_counters(void) { + return &PROF_TLS_CNTS_START; +} +COMPILER_RT_VISIBILITY char *__llvm_profile_end_tls_counters(void) { + return &PROF_TLS_CNTS_STOP; +} + +struct finalization_data { + char *mod_begin; + char *tls_img_begin; + char *tls_img_end; + char *cnts_begin; + char *cnts_end; +}; + +// This is O(num_modules + num_counters) unfortunately. If there were a +// mechanism to calculate the thread-local start of a thread-local section like +// there is a mechanism to calculate the static start of a static section (i.e. +// __start_$sectionname), that would simplify implementation a lot and make this +// just O(num_counters). +static int FindAndAddCounters_cb(struct dl_phdr_info *info, size_t size, + void *data) { + finalization_data *fdata = (finalization_data *)data; + char *mod_begin = fdata->mod_begin; + // We're looking for a match to the dladdr calculated based on PROF_CNTS_START + if (mod_begin != (char *)info->dlpi_addr) { + return 0; + } + + if (info->dlpi_tls_data == NULL) { + return 1; + } + + const Elf64_Phdr *hdr = info->dlpi_phdr; + const Elf64_Phdr *last_hdr = hdr + info->dlpi_phnum; + + const Elf64_Phdr *tls_hdr; + for (; hdr != last_hdr; ++hdr) { + if (hdr->p_type == PT_TLS) { + tls_hdr = hdr; + goto found_tls_ph; + } + } + return 1; +found_tls_ph: + uint64_t num_counters = + __llvm_profile_get_num_counters(fdata->tls_img_begin, fdata->tls_img_end); + uint64_t counter_size = __llvm_profile_counter_entry_size(); + + // Calculate the offset of __llvm_prf_tls_cnts into the tls block for this + // module. The addresses in use below correspond to the tls initialization + // image, which is statically allocated for the module, rather than the TLS + // block itself. + uint64_t ph_true_vaddr = + (uint64_t)info->dlpi_addr + (uint64_t)tls_hdr->p_vaddr; + uint64_t tls_cnts_tlsblk_offset = + (uint64_t)fdata->tls_img_begin - ph_true_vaddr; + + // Calculate the thread local copy of __llvm_prf_tls_cnts for this module. + uint64_t tls_prf_cnts_modlocal_begin = + (uint64_t)info->dlpi_tls_data + tls_cnts_tlsblk_offset; + + // We don't support single byte counters because they are also resilient to + // thread synchronization issues and they are designed to avoid memory + // overhead, which is the opposite of what TL counters do. + // TODO: warn? + if (counter_size == sizeof(uint64_t)) { + uint64_t *tls_cnt = (uint64_t *)tls_prf_cnts_modlocal_begin; + uint64_t *tls_end = (uint64_t *)tls_cnt + num_counters; + uint64_t *cnt = (uint64_t *)fdata->cnts_begin; + for (; tls_cnt != tls_end; tls_cnt++, cnt++) { + __atomic_fetch_add(cnt, *tls_cnt, __ATOMIC_RELAXED); + } + } + return 1; +} + +COMPILER_RT_VISIBILITY +void __llvm_profile_tls_counters_finalize(void) { + struct finalization_data fdata = {0}; + fdata.tls_img_begin = __llvm_profile_begin_tls_counters(); + fdata.tls_img_end = __llvm_profile_end_tls_counters(); + fdata.cnts_begin = __llvm_profile_begin_counters(); + fdata.cnts_end = __llvm_profile_end_counters(); + + if (!fdata.tls_img_begin || !fdata.tls_img_end || !fdata.cnts_begin || + !fdata.cnts_end) { + return; + } + + Dl_info info; + if (dladdr(fdata.cnts_begin, &info) == 0) { + return; + } + fdata.mod_begin = (char *)info.dli_fbase; + dl_iterate_phdr(FindAndAddCounters_cb, &fdata); +} +} diff --git a/compiler-rt/lib/profile/InstrProfilingTLS.c b/compiler-rt/lib/profile/InstrProfilingTLS.c new file mode 100644 index 0000000000000..029ed9e542e5a --- /dev/null +++ b/compiler-rt/lib/profile/InstrProfilingTLS.c @@ -0,0 +1,29 @@ +#include "InstrProfilingTLS.h" +#include "InstrProfiling.h" + +struct texit_fn_node module_node COMPILER_RT_VISIBILITY; + +// We act as a shim between the profile_threadlocal sharedlib +// and the profile static lib. We need to the tell the static lib +// to add all of the counters up on main thread exit, but the +// shared lib is the one who knows how to do that and whether its +// already been done. +// +// In the constructor we pass flush_main_thread_counters from the +// sharedlib to the non-tls statlib's on_main_thread_exit fnptr. +extern void flush_main_thread_counters(void); +extern void (*on_main_thread_exit)(void); + +__attribute__((constructor)) COMPILER_RT_VISIBILITY void +__llvm_profile_tls_register_thread_exit_handler(void) { + module_node.prev = NULL; + module_node.next = NULL; + module_node.fn = __llvm_profile_tls_counters_finalize; + register_tls_prfcnts_module_thread_exit_handler(&module_node); + if (!on_main_thread_exit) { + on_main_thread_exit = flush_main_thread_counters; + } +} + +// TODO: Add destructor +// (But not yet, I'm scared) diff --git a/compiler-rt/lib/profile/InstrProfilingTLS.h b/compiler-rt/lib/profile/InstrProfilingTLS.h new file mode 100644 index 0000000000000..1b6001d27d375 --- /dev/null +++ b/compiler-rt/lib/profile/InstrProfilingTLS.h @@ -0,0 +1,39 @@ +#ifndef INSTR_PROFILING_TLS_H +#define INSTR_PROFILING_TLS_H + +char *__llvm_profile_begin_tls_counters(void); +char *__llvm_profile_end_tls_counters(void); + +/*! + * \brief Add counter values from TLS to the global counters for the program + * + * On thread exit, atomically add the values in TLS counters to the static + * counters for the whole process. + */ +void __llvm_profile_tls_counters_finalize(void); + +/* + * Dylib stuff + */ +typedef void (*texit_fnc)(void); + +typedef struct texit_fn_node { + struct texit_fn_node *prev; + texit_fnc fn; + struct texit_fn_node *next; +} texit_fn_node; + +// TODO: really this should be write-preferring rwlocked +struct texit_fn_registry { + int texit_mtx; + texit_fn_node head; + texit_fn_node tail; +}; + +void register_tls_prfcnts_module_thread_exit_handler(texit_fn_node *new_node); +void unregister_tls_prfcnts_module_thread_exit_handler(texit_fn_node *new_node); +void run_thread_exit_handlers(void); + +void register_profile_intercepts(); + +#endif diff --git a/compiler-rt/lib/profile/InstrProfilingTLSDyLib.c b/compiler-rt/lib/profile/InstrProfilingTLSDyLib.c new file mode 100644 index 0000000000000..e82780dbcf6ab --- /dev/null +++ b/compiler-rt/lib/profile/InstrProfilingTLSDyLib.c @@ -0,0 +1,100 @@ +#include "InstrProfiling.h" +#include "InstrProfilingTLS.h" +#include <stdlib.h> + +// Maintain a linked list of handlers to run on thread exit. +// This is broken out into a dylib so that the registry is truly global across +// dlopen et. al. +// +// Each module has a statically allocated node that gets linked into the +// registry on the constructor and that gets linked out of the registry on +// destroy. +// +// This node is defined in the static portion of the tls counts extension. + +struct texit_fn_registry texit_registry; + +static void lock_texit_registry(void) { + int expected = 0; + while (!__atomic_compare_exchange_n(&texit_registry.texit_mtx, &expected, 1, + 0, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) { + expected = 0; + } +} + +static void unlock_texit_registry(void) { + __atomic_store_n(&texit_registry.texit_mtx, 0, __ATOMIC_RELEASE); +} + +static void wlock_texit_registry(void) { lock_texit_registry(); } + +static void wunlock_texit_registry(void) { unlock_texit_registry(); } + +static void rlock_texit_registry(void) { lock_texit_registry(); } + +static void runlock_texit_registry(void) { unlock_texit_registry(); } + +static inline texit_fn_node *take_nodep(texit_fn_node **nodepp) { + texit_fn_node *nodep = *nodepp; + *nodepp = NULL; + return nodep; +} + +static inline texit_fn_node *replace_nodep(texit_fn_node **nodepp, + texit_fn_node *new_nodep) { + texit_fn_node *nodep = *nodepp; + *nodepp = new_nodep; + return nodep; +} + +void flush_main_thread_counters(void) { + static int flushed = 0; + if (!flushed) { + run_thread_exit_handlers(); + flushed = 1; + } +} + +__attribute__((constructor)) static void __initialize_tls_exit_registry() { + register_profile_intercepts(); + texit_registry.texit_mtx = 0; + texit_registry.head.prev = NULL; + texit_registry.head.fn = NULL; + texit_registry.head.next = &texit_registry.tail; + texit_registry.tail.prev = &texit_registry.head; + texit_registry.tail.fn = NULL; + texit_registry.tail.next = NULL; +} + +// Should run from module constructor +void register_tls_prfcnts_module_thread_exit_handler(texit_fn_node *new_nodep) { + wlock_texit_registry(); + texit_fn_node *prev = replace_nodep(&texit_registry.tail.prev, new_nodep); + texit_fn_node *next = replace_nodep(&prev->next, new_nodep); + new_nodep->next = next; + new_nodep->prev = prev; + wunlock_texit_registry(); +} + +// Should run from module destructor +// Also, this destructor/constructor pair should be outermost. At least outside +// of the regular llvm_profile stuff. +void unregister_tls_prfcnts_module_thread_exit_handler( + texit_fn_node *old_nodep) { + wlock_texit_registry(); + texit_fn_node *prev = take_nodep(&old_nodep->prev); + texit_fn_node *next = take_nodep(&old_nodep->next); + prev->next = next; + next->prev = prev; + wunlock_texit_registry(); +} + +void run_thread_exit_handlers(void) { + rlock_texit_registry(); + for (texit_fn_node *node = texit_registry.head.next; + node != &texit_registry.tail; node = node->next) { + if (node->fn != NULL) + node->fn(); + } + runlock_texit_registry(); +} diff --git a/compiler-rt/lib/profile/InstrProfilingTLSDyLib.h b/compiler-rt/lib/profile/InstrProfilingTLSDyLib.h new file mode 100644 index 0000000000000..3c429d81129ec --- /dev/null +++ b/compiler-rt/lib/profile/InstrProfilingTLSDyLib.h @@ -0,0 +1,4 @@ +#ifndef INSTR_PROFILING_TLS_DYLIB_H +#define INSTR_PROFILING_TLS_DYLIB_H + +#endif diff --git a/compiler-rt/lib/tsan/rtl/CMakeLists.txt b/compiler-rt/lib/tsan/rtl/CMakeLists.txt index f40e72dbde1f9..8ddb6af279284 100644 --- a/compiler-rt/lib/tsan/rtl/CMakeLists.txt +++ b/compiler-rt/lib/tsan/rtl/CMakeLists.txt @@ -1,6 +1,6 @@ include_directories(../..) -set(TSAN_RTL_CFLAGS ${TSAN_CFLAGS}) +set(TSAN_RTL_CFLAGS ${TSAN_CFLAGS} -O0 -g3) append_list_if(COMPILER_RT_HAS_MSSE4_2_FLAG -msse4.2 TSAN_RTL_CFLAGS) append_list_if(SANITIZER_LIMIT_FRAME_SIZE -Wframe-larger-than=530 TSAN_RTL_CFLAGS) diff --git a/compiler-rt/test/profile/Inputs/instrprof-tls-dlclose-lib.c b/compiler-rt/test/profile/Inputs/instrprof-tls-dlclose-lib.c new file mode 100644 index 0000000000000..fcf874000aa8e --- /dev/null +++ b/compiler-rt/test/profile/Inputs/instrprof-tls-dlclose-lib.c @@ -0,0 +1,7 @@ +unsigned char determine_value_dyn(unsigned char c) { + if (c < 0x80) { + return c; + } else { + return -c; + } +} diff --git a/compiler-rt/test/profile/Inputs/instrprof-tls-dlclose-main.c b/compiler-rt/test/profile/Inputs/instrprof-tls-dlclose-main.c new file mode 100644 index 0000000000000..309d405430af4 --- /dev/null +++ b/compiler-rt/test/profile/Inputs/instrprof-tls-dlclose-main.c @@ -0,0 +1,93 @@ +#include <dlfcn.h> +#include <pthread.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> + +struct thread_arg { + uint64_t buf_size; + char const *buf; + uint64_t iteration_counter; + uint64_t output; +}; + +#ifndef DLOPEN_FUNC_DIR +unsigned char determine_value_dyn(unsigned char); +#endif + +void *thread_fn(void *arg_ptr) { +#ifdef DLOPEN_FUNC_DIR + + unsigned char (*determine_value_dyn)(unsigned char) = NULL; + + const char *dynlib_name = DLOPEN_FUNC_DIR "/lib.shared"; + const char *dynlib_sym = "determine_value_dyn"; + void *handle = dlopen(dynlib_name, DLOPEN_FLAGS); + if (handle == NULL) { + fprintf(stderr, "dlopen error on: %s: %s\n", dynlib_name, dlerror()); + exit(EXIT_FAILURE); + } + + determine_value_dyn = dlsym(handle, dynlib_sym); + if (handle == NULL) { + fprintf(stderr, "dlsym error on: %s : %s\n", dynlib_name, dynlib_sym); + exit(EXIT_FAILURE); + } +#endif + + struct thread_arg *arg = (struct thread_arg *)arg_ptr; + for (uint64_t i = 0; i < arg->buf_size; i++) { + unsigned char c = (unsigned char)arg->buf[i]; + arg->output += determine_value_dyn(c); + arg->iteration_counter++; + } + + // This should unload the thread local counters region for this module, + // causing an expected failure for -fprofile-thread-local +#ifdef DLOPEN_FUNC_DIR +# ifndef DONT_CLOSE + dlclose(handle); +# endif +#endif + return NULL; +} + +int main() { + const uint64_t len = 40000; + + char *example_string = (char *)malloc(sizeof(char) * len); + int high = 0; + for (uint64_t i = 0; i < len; i++) { + if (high == 2) { + example_string[i] = 0xff; + high = 0; + } else { + example_string[i] = 0x0; + high++; + } + } + + pthread_t thread; + struct thread_arg arg = { + len, + example_string, + 0, + 0, + }; + if (pthread_create(&thread, NULL, thread_fn, &arg) != 0) { + fprintf(stderr, "Failed to spawn thread, exiting\n"); + exit(EXIT_SUCCESS); + } + + if (pthread_join(thread, NULL) != 0) { + fprintf(stderr, "Failed to join thread, continuing\n"); + return EXIT_FAILURE; + } + + printf("Thread output:\n" + "iteration_counter: %lu\n" + "output: %lx\n\n", + arg.iteration_counter, arg.output); + + return EXIT_SUCCESS; +} diff --git a/compiler-rt/test/profile/Inputs/instrprof-tls-dlopen-func.c b/compiler-rt/test/profile/Inputs/instrprof-tls-dlopen-func.c new file mode 100644 index 0000000000000..9ec903ab4c17a --- /dev/null +++ b/compiler-rt/test/profile/Inputs/instrprof-tls-dlopen-func.c @@ -0,0 +1,9 @@ +#include <stdint.h> + +int8_t func(int8_t input) { + if (input < 0) { + return input; + } else { + return -input; + } +} diff --git a/compiler-rt/test/profile/Inputs/instrprof-tls-dlopen-func2.c b/compiler-rt/test/profile/Inputs/instrprof-tls-dlopen-func2.c new file mode 100644 index 0000000000000..94122d793a6ee --- /dev/null +++ b/compiler-rt/test/profile/Inputs/instrprof-tls-dlopen-func2.c @@ -0,0 +1,9 @@ +#include <stdint.h> + +int8_t func2(int8_t input) { + if (input >= 0) { + return -1; + } else { + return 1; + } +} diff --git a/compiler-rt/test/profile/Inputs/instrprof-tls-dlopen-main.c b/compiler-rt/test/profile/Inputs/instrprof-tls-dlopen-main.c new file mode 100644 index 0000000000000..fc436841d233c --- /dev/null +++ b/compiler-rt/test/profile/Inputs/instrprof-tls-dlopen-main.c @@ -0,0 +1,105 @@ +#include <pthread.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> + +#ifdef DLOPEN_FUNC_DIR +# include <dlfcn.h> +int8_t (*func)(int8_t) = NULL; +int8_t (*func2)(int8_t) = NULL; +#else +int8_t func(int8_t); +int8_t func2(int8_t); +#endif + +struct thread_arg { + uint64_t buf_size; + char const *buf; + uint64_t output; +}; + +void *thread_fn(void *arg_ptr) { + struct thread_arg *arg = (struct thread_arg *)arg_ptr; + for (uint64_t i = 0; i < arg->buf_size; i++) { + int8_t c = (int8_t)arg->buf[i]; + arg->output += func(c); + arg->output += func2(c); + } + return NULL; +} + +int main() { +#define n_threads 10 +#define len 40000 + +#ifdef DLOPEN_FUNC_DIR + const char *dynlib_path = DLOPEN_FUNC_DIR "/func.shared"; + const char *dynlib_sym = "func"; + void *handle = dlopen(dynlib_path, RTLD_LAZY); + if (handle == NULL) { + fprintf(stderr, "dlopen error on: %s: %s\n", dynlib_path, dlerror()); + return EXIT_FAILURE; + } + + func = dlsym(handle, dynlib_sym); + if (func == NULL) { + fprintf(stderr, "dlsym error on: %s : %s\n", dynlib_path, dynlib_sym); + return EXIT_FAILURE; + } + + const char *dynlib_path2 = DLOPEN_FUNC_DIR "/func2.shared"; + const char *dynlib_sym2 = "func2"; + void *handle2 = dlopen(dynlib_path2, RTLD_LAZY); + if (handle2 == NULL) { + fprintf(stderr, "dlopen error on: %s: %s\n", dynlib_path2, dlerror()); + return EXIT_FAILURE; + } + + func2 = dlsym(handle2, dynlib_sym2); + if (func2 == NULL) { + fprintf(stderr, "dlsym error on: %s : %s\n", dynlib_path2, dynlib_sym2); + return EXIT_FAILURE; + } +#endif + + pthread_t threads[n_threads] = {0}; + struct thread_arg args[n_threads] = {0}; + char *example_string = (char *)malloc(sizeof(char) * len); + int high = 0; + for (uint64_t i = 0; i < len; i++) { + if (high == 2) { + example_string[i] = 0xff; + high = 0; + } else { + example_string[i] = 0x0; + high++; + } + } + + for (uint64_t i = 0; i < n_threads; i++) { + struct thread_arg a = { + len, + example_string, + 0, + }; + args[i] = a; + if (pthread_create(&threads[i], NULL, thread_fn, &args[i]) != 0) { + fprintf(stderr, "Failed to spawn thread %lu, exiting\n", i); + return EXIT_FAILURE; + } + } + + int rc = EXIT_SUCCESS; + for (uint64_t i = 0; i < n_threads; i++) { + void *retval = NULL; + if (pthread_join(threads[i], &retval) != 0) { + printf("Failed to join thread %lu, continuing\n", i); + rc = EXIT_FAILURE; + } + + printf("Thread %lu output:\n" + "output: %lx\n\n", + i, args[i].output); + } + return rc; +} diff --git a/compiler-rt/test/profile/Inputs/instrprof-tls-exit.c b/compiler-rt/test/profile/Inputs/instrprof-tls-exit.c new file mode 100644 index 0000000000000..f7e6f78f019db --- /dev/null +++ b/compiler-rt/test/profile/Inputs/instrprof-tls-exit.c @@ -0,0 +1,37 @@ +#include <pthread.h> +#include <semaphore.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> + +void *exit_thread(void *arg_ptr) { + sem_t *s_p = (sem_t *)arg_ptr; + printf("Exit thread waiting...\n"); + if (sem_wait(s_p)) { + fprintf(stderr, "Failed to wait on signal from main thread\n"); + exit(EXIT_FAILURE); + } + printf("Exit thread activated\n"); + exit(0); + return NULL; +} + +int main() { + pthread_t exit; + sem_t s; + sem_init(&s, 0, 0); + if (pthread_create(&exit, NULL, exit_thread, &s) != 0) { + fprintf(stderr, "Failed to spawn exit thread\n"); + return EXIT_FAILURE; + } + if (sem_post(&s)) { + fprintf(stderr, "Failed to send signal to exit thread\n"); + return EXIT_FAILURE; + } + if (pthread_join(exit, NULL)) { + fprintf(stderr, "Failed to join exit thread\n"); + return EXIT_FAILURE; + } + fprintf(stderr, "Child thread should have called exit()\n"); + return EXIT_FAILURE; +} diff --git a/compiler-rt/test/profile/Linux/instrprof-tls-dlclose-memfault.test b/compiler-rt/test/profile/Linux/instrprof-tls-dlclose-memfault.test new file mode 100644 index 0000000000000..3974102090b4e --- /dev/null +++ b/compiler-rt/test/profile/Linux/instrprof-tls-dlclose-memfault.test @@ -0,0 +1,27 @@ +RUN: mkdir -p %t.tls.d +RUN: %clang_profgen -fcoverage-mapping -fprofile-thread-local -fdata-sections -ffunction-sections -fuse-ld=gold -Wl,--gc-sections -o %t.tls.d/lib.shared -fPIC -shared %S/../Inputs/instrprof-tls-dlclose-lib.c + +RUN: %clang_profgen -fcoverage-mapping -lpthread -DDLOPEN_FUNC_DIR=\"%t.tls.d\" -DDLOPEN_FLAGS="RTLD_LAZY" -fprofile-thread-local -fdata-sections -ffunction-sections -fuse-ld=gold -Wl,--gc-sections -o %t-tls -rpath %t.tls.d %S/../Inputs/instrprof-tls-dlclose-main.c + +RUN: mkdir -p %t.atomic.d +RUN: %clang_profgen -fcoverage-mapping -fprofile-update=atomic -fdata-sections -ffunction-sections -fuse-ld=gold -Wl,--gc-sections -o %t.atomic.d/lib.shared -fPIC -shared %S/../Inputs/instrprof-tls-dlclose-lib.c + +RUN: %clang_profgen -fcoverage-mapping -lpthread -DDLOPEN_FUNC_DIR=\"%t.atomic.d\" -DDLOPEN_FLAGS="RTLD_LAZY" -fprofile-update=atomic -fdata-sections -ffunction-sections -fuse-ld=gold -Wl,--gc-sections -o %t-atomic -rpath %t.atomic.d %S/../Inputs/instrprof-tls-dlclose-main.c + +RUN: env LLVM_PROFILE_FILE=%t-atomic.profraw %run %t-atomic +// Here we expect a segfault until the dlclose issue is fixed +RUN: env LLVM_PROFILE_FILE=%t-tls.profraw %run %t-tls + +RUN: llvm-profdata merge -o %t-tls.profdata %t-tls.profraw +RUN: llvm-profdata merge -o %t-atomic.profdata %t-atomic.profraw +RUN: %clang_profuse=%t-tls.profdata -o %t-main.tls.ll -S -emit-llvm %S/../Inputs/instrprof-tls-dlclose-main.c +RUN: %clang_profuse=%t-atomic.profdata -o %t-main.atomic.ll -S -emit-llvm %S/../Inputs/instrprof-tls-dlclose-main.c +RUN: diff %t-main.tls.ll %t-main.atomic.ll + +RUN: llvm-profdata merge -o %t-tls.profdata %t-tls.profraw +RUN: llvm-profdata merge -o %t-atomic.profdata %t-atomic.profraw +RUN: %clang_profuse=%t-tls.profdata -o %t-lib.tls.ll -S -emit-llvm %S/../Inputs/instrprof-tls-dlclose-lib.c +RUN: %clang_profuse=%t-atomic.profdata -o %t-lib.atomic.ll -S -emit-llvm %S/../Inputs/instrprof-tls-dlclose-lib.c +RUN: diff %t-lib.tls.ll %t-lib.atomic.ll + +XFAIL: target={{.*}} diff --git a/compiler-rt/test/profile/Linux/instrprof-tls-dlclose-mix-subset.test b/compiler-rt/test/profile/Linux/instrprof-tls-dlclose-mix-subset.test new file mode 100644 index 0000000000000..8cafef927ee7e --- /dev/null +++ b/compiler-rt/test/profile/Linux/instrprof-tls-dlclose-mix-subset.test @@ -0,0 +1,41 @@ +// Passing subset of combos where you still get coverage from modules +// which were opened with RTLD_NODELETE and later closed. +// +// These combos work because pthread_create is intercepted before +// it is first called. + +// All threadlocal + +RUN: mkdir -p %t.tls.d +RUN: %clang_profgen -fcoverage-mapping -fprofile-thread-local -fdata-sections -ffunction-sections -fuse-ld=gold -Wl,--gc-sections -o %t.tls.d/lib.shared -fPIC -shared %S/../Inputs/instrprof-tls-dlclose-lib.c + +RUN: %clang_profgen -fcoverage-mapping -lpthread -DDLOPEN_FUNC_DIR=\"%t.tls.d\" -DDLOPEN_FLAGS="RTLD_LAZY | RTLD_NODELETE" -fprofile-thread-local -fdata-sections -ffunction-sections -fuse-ld=gold -Wl,--gc-sections -o %t-tls -rpath %t.tls.d %S/../Inputs/instrprof-tls-dlclose-main.c + +// All atomic + +RUN: mkdir -p %t.atomic.d +RUN: %clang_profgen -fcoverage-mapping -fprofile-update=atomic -fdata-sections -ffunction-sections -fuse-ld=gold -Wl,--gc-sections -o %t.atomic.d/lib.shared -fPIC -shared %S/../Inputs/instrprof-tls-dlclose-lib.c + +RUN: %clang_profgen -fcoverage-mapping -lpthread -DDLOPEN_FUNC_DIR=\"%t.atomic.d\" -DDLOPEN_FLAGS="RTLD_LAZY | RTLD_NODELETE" -fprofile-update=atomic -fdata-sections -ffunction-sections -fuse-ld=gold -Wl,--gc-sections -o %t-atomic -rpath %t.atomic.d %S/../Inputs/instrprof-tls-dlclose-main.c + +// Atomic lib, threadlocal exe + +RUN: mkdir -p %t.atomic-tl.d +RUN: %clang_profgen -fcoverage-mapping -fprofile-update=atomic -fdata-sections -ffunction-sections -fuse-ld=gold -Wl,--gc-sections -o %t.atomic-tl.d/lib.shared -fPIC -shared %S/../Inputs/instrprof-tls-dlclose-lib.c + +RUN: %clang_profgen -fcoverage-mapping -lpthread -DDLOPEN_FUNC_DIR=\"%t.atomic-tl.d\" -DDLOPEN_FLAGS="RTLD_LAZY | RTLD_NODELETE" -fprofile-thread-local -fdata-sections -ffunction-sections -fuse-ld=gold -Wl,--gc-sections -o %t-atomic-tl -rpath %t.atomic-tl.d %S/../Inputs/instrprof-tls-dlclose-main.c + +// Atomic exe, threadlocal lib: Not working. + +RUN: env LLVM_PROFILE_FILE=%t-atomic.profraw %run %t-atomic +RUN: env LLVM_PROFILE_FILE=%t-tls.profraw %run %t-tls +RUN: env LLVM_PROFILE_FILE=%t-atomic-tl.profraw %run %t-atomic-tl + +RUN: llvm-profdata merge -o %t-tls.profdata %t-tls.profraw +RUN: llvm-profdata merge -o %t-atomic.profdata %t-atomic.profraw +RUN: llvm-profdata merge -o %t-atomic-tl.profdata %t-atomic-tl.profraw +RUN: %clang_profuse=%t-tls.profdata -o %t-main.tls.ll -S -emit-llvm %S/../Inputs/instrprof-tls-dlclose-main.c +RUN: %clang_profuse=%t-atomic.profdata -o %t-main.atomic.ll -S -emit-llvm %S/../Inputs/instrprof-tls-dlclose-main.c +RUN: %clang_profuse=%t-atomic-tl.profdata -o %t-main.atomic-tl.ll -S -emit-llvm %S/../Inputs/instrprof-tls-dlclose-main.c +RUN: diff %t-main.tls.ll %t-main.atomic.ll +RUN: diff %t-main.atomic-tl.ll %t-main.atomic.ll diff --git a/compiler-rt/test/profile/Linux/instrprof-tls-dlclose-mix.test b/compiler-rt/test/profile/Linux/instrprof-tls-dlclose-mix.test new file mode 100644 index 0000000000000..74d5f9e2a4f58 --- /dev/null +++ b/compiler-rt/test/profile/Linux/instrprof-tls-dlclose-mix.test @@ -0,0 +1,48 @@ +// All threadlocal + +RUN: mkdir -p %t.tls.d +RUN: %clang_profgen -fcoverage-mapping -fprofile-thread-local -fdata-sections -ffunction-sections -fuse-ld=gold -Wl,--gc-sections -o %t.tls.d/lib.shared -fPIC -shared %S/../Inputs/instrprof-tls-dlclose-lib.c + +RUN: %clang_profgen -fcoverage-mapping -lpthread -DDLOPEN_FUNC_DIR=\"%t.tls.d\" -DDLOPEN_FLAGS="RTLD_LAZY | RTLD_NODELETE" -fprofile-thread-local -fdata-sections -ffunction-sections -fuse-ld=gold -Wl,--gc-sections -o %t-tls -rpath %t.tls.d %S/../Inputs/instrprof-tls-dlclose-main.c + +// All atomic + +RUN: mkdir -p %t.atomic.d +RUN: %clang_profgen -fcoverage-mapping -fprofile-update=atomic -fdata-sections -ffunction-sections -fuse-ld=gold -Wl,--gc-sections -o %t.atomic.d/lib.shared -fPIC -shared %S/../Inputs/instrprof-tls-dlclose-lib.c + +RUN: %clang_profgen -fcoverage-mapping -lpthread -DDLOPEN_FUNC_DIR=\"%t.atomic.d\" -DDLOPEN_FLAGS="RTLD_LAZY | RTLD_NODELETE" -fprofile-update=atomic -fdata-sections -ffunction-sections -fuse-ld=gold -Wl,--gc-sections -o %t-atomic -rpath %t.atomic.d %S/../Inputs/instrprof-tls-dlclose-main.c + +// Atomic lib, threadlocal exe + +RUN: mkdir -p %t.atomic-tl.d +RUN: %clang_profgen -fcoverage-mapping -fprofile-update=atomic -fdata-sections -ffunction-sections -fuse-ld=gold -Wl,--gc-sections -o %t.atomic-tl.d/lib.shared -fPIC -shared %S/../Inputs/instrprof-tls-dlclose-lib.c + +RUN: %clang_profgen -fcoverage-mapping -lpthread -DDLOPEN_FUNC_DIR=\"%t.atomic-tl.d\" -DDLOPEN_FLAGS="RTLD_LAZY | RTLD_NODELETE" -fprofile-thread-local -fdata-sections -ffunction-sections -fuse-ld=gold -Wl,--gc-sections -o %t-atomic-tl -rpath %t.atomic-tl.d %S/../Inputs/instrprof-tls-dlclose-main.c + +// Atomic exe, threadlocal lib + +RUN: mkdir -p %t.tl-atomic.d +RUN: %clang_profgen -fcoverage-mapping -fprofile-thread-local -fdata-sections -ffunction-sections -fuse-ld=gold -Wl,--gc-sections -o %t.tl-atomic.d/lib.shared -fPIC -shared %S/../Inputs/instrprof-tls-dlclose-lib.c + +RUN: %clang_profgen -fcoverage-mapping -lpthread -DDLOPEN_FUNC_DIR=\"%t.tl-atomic.d\" -DDLOPEN_FLAGS="RTLD_LAZY | RTLD_NODELETE" -fprofile-update=atomic -fdata-sections -ffunction-sections -fuse-ld=gold -Wl,--gc-sections -o %t-tl-atomic -rpath %t.tl-atomic.d %S/../Inputs/instrprof-tls-dlclose-main.c + + +RUN: env LLVM_PROFILE_FILE=%t-atomic.profraw %run %t-atomic +RUN: env LLVM_PROFILE_FILE=%t-tls.profraw %run %t-tls +RUN: env LLVM_PROFILE_FILE=%t-atomic-tl.profraw %run %t-atomic-tl +RUN: env LLVM_PROFILE_FILE=%t-tl-atomic.profraw %run %t-tl-atomic + +RUN: llvm-profdata merge -o %t-tls.profdata %t-tls.profraw +RUN: llvm-profdata merge -o %t-atomic.profdata %t-atomic.profraw +RUN: llvm-profdata merge -o %t-atomic-tl.profdata %t-atomic-tl.profraw +RUN: llvm-profdata merge -o %t-tl-atomic.profdata %t-tl-atomic.profraw +RUN: %clang_profuse=%t-tls.profdata -o %t-main.tls.ll -S -emit-llvm %S/../Inputs/instrprof-tls-dlclose-main.c +RUN: %clang_profuse=%t-atomic.profdata -o %t-main.atomic.ll -S -emit-llvm %S/../Inputs/instrprof-tls-dlclose-main.c +RUN: %clang_profuse=%t-atomic-tl.profdata -o %t-main.atomic-tl.ll -S -emit-llvm %S/../Inputs/instrprof-tls-dlclose-main.c +RUN: %clang_profuse=%t-tl-atomic.profdata -o %t-main.tl-atomic.ll -S -emit-llvm %S/../Inputs/instrprof-tls-dlclose-main.c +RUN: diff %t-main.tls.ll %t-main.atomic.ll +RUN: diff %t-main.atomic-tl.ll %t-main.atomic.ll +RUN: diff %t-main.tl-atomic.ll %t-main.atomic.ll + +// Atomic exe, threadlocal lib does not pass. +XFAIL: target={{.*}} diff --git a/compiler-rt/test/profile/Linux/instrprof-tls-dlclose-nodelete.test b/compiler-rt/test/profile/Linux/instrprof-tls-dlclose-nodelete.test new file mode 100644 index 0000000000000..8e99a3b60a69a --- /dev/null +++ b/compiler-rt/test/profile/Linux/instrprof-tls-dlclose-nodelete.test @@ -0,0 +1,24 @@ +RUN: mkdir -p %t.tls.d +RUN: %clang_profgen -fcoverage-mapping -fprofile-thread-local -fdata-sections -ffunction-sections -fuse-ld=gold -Wl,--gc-sections -o %t.tls.d/lib.shared -fPIC -shared %S/../Inputs/instrprof-tls-dlclose-lib.c + +RUN: %clang_profgen -fcoverage-mapping -lpthread -DDLOPEN_FUNC_DIR=\"%t.tls.d\" -DDLOPEN_FLAGS="RTLD_NODELETE | RTLD_LAZY" -fprofile-thread-local -fdata-sections -ffunction-sections -fuse-ld=gold -Wl,--gc-sections -o %t-tls -rpath %t.tls.d %S/../Inputs/instrprof-tls-dlclose-main.c + +RUN: mkdir -p %t.atomic.d +RUN: %clang_profgen -fcoverage-mapping -fprofile-update=atomic -fdata-sections -ffunction-sections -fuse-ld=gold -Wl,--gc-sections -o %t.atomic.d/lib.shared -fPIC -shared %S/../Inputs/instrprof-tls-dlclose-lib.c + +RUN: %clang_profgen -fcoverage-mapping -lpthread -DDLOPEN_FUNC_DIR=\"%t.atomic.d\" -DDLOPEN_FLAGS="RTLD_NODELETE | RTLD_LAZY" -fprofile-update=atomic -fdata-sections -ffunction-sections -fuse-ld=gold -Wl,--gc-sections -o %t-atomic -rpath %t.atomic.d %S/../Inputs/instrprof-tls-dlclose-main.c + +RUN: env LLVM_PROFILE_FILE=%t-atomic.profraw %run %t-atomic +RUN: env LLVM_PROFILE_FILE=%t-tls.profraw %run %t-tls + +RUN: llvm-profdata merge -o %t-tls.profdata %t-tls.profraw +RUN: llvm-profdata merge -o %t-atomic.profdata %t-atomic.profraw +RUN: %clang_profuse=%t-tls.profdata -o %t-main.tls.ll -S -emit-llvm %S/../Inputs/instrprof-tls-dlclose-main.c +RUN: %clang_profuse=%t-atomic.profdata -o %t-main.atomic.ll -S -emit-llvm %S/../Inputs/instrprof-tls-dlclose-main.c +RUN: diff %t-main.tls.ll %t-main.atomic.ll + +RUN: llvm-profdata merge -o %t-tls.profdata %t-tls.profraw +RUN: llvm-profdata merge -o %t-atomic.profdata %t-atomic.profraw +RUN: %clang_profuse=%t-tls.profdata -o %t-lib.tls.ll -S -emit-llvm %S/../Inputs/instrprof-tls-dlclose-lib.c +RUN: %clang_profuse=%t-atomic.profdata -o %t-lib.atomic.ll -S -emit-llvm %S/../Inputs/instrprof-tls-dlclose-lib.c +RUN: diff %t-lib.tls.ll %t-lib.atomic.ll diff --git a/compiler-rt/test/profile/Linux/instrprof-tls-dlopen.test b/compiler-rt/test/profile/Linux/instrprof-tls-dlopen.test new file mode 100644 index 0000000000000..990c87e1bd86b --- /dev/null +++ b/compiler-rt/test/profile/Linux/instrprof-tls-dlopen.test @@ -0,0 +1,32 @@ +RUN: mkdir -p %t.tls.d +RUN: %clang_profgen -fcoverage-mapping -fprofile-thread-local -fdata-sections -ffunction-sections -fuse-ld=gold -Wl,--gc-sections -o %t.tls.d/func.shared -fPIC -shared %S/../Inputs/instrprof-tls-dlopen-func.c +RUN: %clang_profgen -fcoverage-mapping -fprofile-thread-local -fdata-sections -ffunction-sections -fuse-ld=gold -Wl,--gc-sections -o %t.tls.d/func2.shared -fPIC -shared %S/../Inputs/instrprof-tls-dlopen-func2.c + +RUN: %clang_profgen -fcoverage-mapping -lpthread -DDLOPEN_FUNC_DIR=\"%t.tls.d\" -DDLOPEN_FLAGS="RTLD_LAZY" -fprofile-thread-local -fdata-sections -ffunction-sections -fuse-ld=gold -Wl,--gc-sections -o %t-tls -rpath %t.tls.d %S/../Inputs/instrprof-tls-dlopen-main.c + +RUN: mkdir -p %t.atomic.d +RUN: %clang_profgen -fcoverage-mapping -fprofile-update=atomic -fdata-sections -ffunction-sections -fuse-ld=gold -Wl,--gc-sections -o %t.atomic.d/func.shared -fPIC -shared %S/../Inputs/instrprof-tls-dlopen-func.c +RUN: %clang_profgen -fcoverage-mapping -fprofile-update=atomic -fdata-sections -ffunction-sections -fuse-ld=gold -Wl,--gc-sections -o %t.atomic.d/func2.shared -fPIC -shared %S/../Inputs/instrprof-tls-dlopen-func2.c + +RUN: %clang_profgen -fcoverage-mapping -lpthread -DDLOPEN_FUNC_DIR=\"%t.atomic.d\" -DDLOPEN_FLAGS="RTLD_LAZY" -fprofile-update=atomic -fdata-sections -ffunction-sections -fuse-ld=gold -Wl,--gc-sections -o %t-atomic -rpath %t.atomic.d %S/../Inputs/instrprof-tls-dlopen-main.c + +RUN: env LLVM_PROFILE_FILE=%t-tls.profraw %run %t-tls +RUN: env LLVM_PROFILE_FILE=%t-atomic.profraw %run %t-atomic + +RUN: llvm-profdata merge -o %t-tls.profdata %t-tls.profraw +RUN: llvm-profdata merge -o %t-atomic.profdata %t-atomic.profraw +RUN: %clang_profuse=%t-tls.profdata -o %t-main.tls.ll -S -emit-llvm %S/../Inputs/instrprof-tls-dlopen-main.c +RUN: %clang_profuse=%t-atomic.profdata -o %t-main.atomic.ll -S -emit-llvm %S/../Inputs/instrprof-tls-dlopen-main.c +RUN: diff %t-main.tls.ll %t-main.atomic.ll + +RUN: llvm-profdata merge -o %t-tls.profdata %t-tls.profraw +RUN: llvm-profdata merge -o %t-atomic.profdata %t-atomic.profraw +RUN: %clang_profuse=%t-tls.profdata -o %t-func2.tls.ll -S -emit-llvm %S/../Inputs/instrprof-tls-dlopen-func2.c +RUN: %clang_profuse=%t-atomic.profdata -o %t-func2.atomic.ll -S -emit-llvm %S/../Inputs/instrprof-tls-dlopen-func2.c +RUN: diff %t-func2.tls.ll %t-func2.atomic.ll + +RUN: llvm-profdata merge -o %t-tls.profdata %t-tls.profraw +RUN: llvm-profdata merge -o %t-atomic.profdata %t-atomic.profraw +RUN: %clang_profuse=%t-tls.profdata -o %t-func.tls.ll -S -emit-llvm %S/../Inputs/instrprof-tls-dlopen-func.c +RUN: %clang_profuse=%t-atomic.profdata -o %t-func.atomic.ll -S -emit-llvm %S/../Inputs/instrprof-tls-dlopen-func.c +RUN: diff %t-func.tls.ll %t-func.atomic.ll diff --git a/compiler-rt/test/profile/Linux/instrprof-tls-exit.test b/compiler-rt/test/profile/Linux/instrprof-tls-exit.test new file mode 100644 index 0000000000000..fef3c78f0726c --- /dev/null +++ b/compiler-rt/test/profile/Linux/instrprof-tls-exit.test @@ -0,0 +1,17 @@ +RUN: %clang_profgen -fcoverage-mapping -fprofile-update=atomic -fdata-sections -ffunction-sections -fuse-ld=gold -Wl,--gc-sections -o %t-atomic %S/../Inputs/instrprof-tls-exit.c + +RUN: %clang_profgen -fcoverage-mapping -fprofile-thread-local -fdata-sections -ffunction-sections -fuse-ld=gold -Wl,--gc-sections -o %t-tls %S/../Inputs/instrprof-tls-exit.c + +RUN: env LLVM_PROFILE_FILE=%t-tls.profraw %run %t-tls +RUN: env LLVM_PROFILE_FILE=%t-atomic.profraw %run %t-atomic + +RUN: llvm-profdata merge -o %t-tls.profdata %t-tls.profraw +RUN: llvm-profdata merge -o %t-atomic.profdata %t-atomic.profraw + +RUN: %clang_profuse=%t-tls.profdata -o %t-tls.ll -S -emit-llvm %S/../Inputs/instrprof-tls-exit.c +RUN: %clang_profuse=%t-atomic.profdata -o %t-atomic.ll -S -emit-llvm %S/../Inputs/instrprof-tls-exit.c +RUN: diff %t-tls.ll %t-atomic.ll + +# With the first iteration of this change, it is understood that only exiting via the main thread will cause +# expected coverage outputs. +XFAIL: target={{.*}} diff --git a/compiler-rt/test/profile/Linux/instrprof-tls-noclose-mix.test b/compiler-rt/test/profile/Linux/instrprof-tls-noclose-mix.test new file mode 100644 index 0000000000000..67cb1d2f66543 --- /dev/null +++ b/compiler-rt/test/profile/Linux/instrprof-tls-noclose-mix.test @@ -0,0 +1,51 @@ +// All threadlocal + +RUN: mkdir -p %t.tls.d +RUN: %clang_profgen -fcoverage-mapping -fprofile-thread-local -fdata-sections -ffunction-sections -fuse-ld=gold -Wl,--gc-sections -o %t.tls.d/lib.shared -fPIC -shared %S/../Inputs/instrprof-tls-dlclose-lib.c + +RUN: %clang_profgen -fcoverage-mapping -lpthread -DDONT_CLOSE -DDLOPEN_FUNC_DIR=\"%t.tls.d\" -DDLOPEN_FLAGS="RTLD_LAZY | RTLD_NODELETE" -fprofile-thread-local -fdata-sections -ffunction-sections -fuse-ld=gold -Wl,--gc-sections -o %t-tls -rpath %t.tls.d %S/../Inputs/instrprof-tls-dlclose-main.c + +// All atomic + +RUN: mkdir -p %t.atomic.d +RUN: %clang_profgen -fcoverage-mapping -fprofile-update=atomic -fdata-sections -ffunction-sections -fuse-ld=gold -Wl,--gc-sections -o %t.atomic.d/lib.shared -fPIC -shared %S/../Inputs/instrprof-tls-dlclose-lib.c + +RUN: %clang_profgen -fcoverage-mapping -lpthread -DDONT_CLOSE -DDLOPEN_FUNC_DIR=\"%t.atomic.d\" -DDLOPEN_FLAGS="RTLD_LAZY | RTLD_NODELETE" -fprofile-update=atomic -fdata-sections -ffunction-sections -fuse-ld=gold -Wl,--gc-sections -o %t-atomic -rpath %t.atomic.d %S/../Inputs/instrprof-tls-dlclose-main.c + +// Atomic lib, threadlocal exe + +RUN: mkdir -p %t.atomic-tl.d +RUN: %clang_profgen -fcoverage-mapping -fprofile-update=atomic -fdata-sections -ffunction-sections -fuse-ld=gold -Wl,--gc-sections -o %t.atomic-tl.d/lib.shared -fPIC -shared %S/../Inputs/instrprof-tls-dlclose-lib.c + +RUN: %clang_profgen -fcoverage-mapping -lpthread -DDONT_CLOSE -DDLOPEN_FUNC_DIR=\"%t.atomic-tl.d\" -DDLOPEN_FLAGS="RTLD_LAZY | RTLD_NODELETE" -fprofile-thread-local -fdata-sections -ffunction-sections -fuse-ld=gold -Wl,--gc-sections -o %t-atomic-tl -rpath %t.atomic-tl.d %S/../Inputs/instrprof-tls-dlclose-main.c + +// Atomic exe, threadlocal lib + +RUN: mkdir -p %t.tl-atomic.d +RUN: %clang_profgen -fcoverage-mapping -fprofile-thread-local -fdata-sections -ffunction-sections -fuse-ld=gold -Wl,--gc-sections -o %t.tl-atomic.d/lib.shared -fPIC -shared %S/../Inputs/instrprof-tls-dlclose-lib.c + +RUN: %clang_profgen -fcoverage-mapping -g3 -lpthread -DDONT_CLOSE -DDLOPEN_FUNC_DIR=\"%t.tl-atomic.d\" -DDLOPEN_FLAGS="RTLD_LAZY | RTLD_NODELETE" -fprofile-update=atomic -fdata-sections -ffunction-sections -fuse-ld=gold -Wl,--gc-sections -o %t-tl-atomic -rpath %t.tl-atomic.d %S/../Inputs/instrprof-tls-dlclose-main.c + + +RUN: env LLVM_PROFILE_FILE=%t-atomic.profraw %run %t-atomic +RUN: env LLVM_PROFILE_FILE=%t-tls.profraw %run %t-tls +RUN: env LLVM_PROFILE_FILE=%t-atomic-tl.profraw %run %t-atomic-tl +RUN: env LLVM_PROFILE_FILE=%t-tl-atomic.profraw %run %t-tl-atomic + +RUN: llvm-profdata merge -o %t-tls.profdata %t-tls.profraw +RUN: llvm-profdata merge -o %t-atomic.profdata %t-atomic.profraw +RUN: llvm-profdata merge -o %t-atomic-tl.profdata %t-atomic-tl.profraw +RUN: llvm-profdata merge -o %t-tl-atomic.profdata %t-tl-atomic.profraw +RUN: %clang_profuse=%t-tls.profdata -o %t-main.tls.ll -S -emit-llvm %S/../Inputs/instrprof-tls-dlclose-main.c +RUN: %clang_profuse=%t-atomic.profdata -o %t-main.atomic.ll -S -emit-llvm %S/../Inputs/instrprof-tls-dlclose-main.c +RUN: %clang_profuse=%t-atomic-tl.profdata -o %t-main.atomic-tl.ll -S -emit-llvm %S/../Inputs/instrprof-tls-dlclose-main.c +RUN: %clang_profuse=%t-tl-atomic.profdata -o %t-main.tl-atomic.ll -S -emit-llvm %S/../Inputs/instrprof-tls-dlclose-main.c +RUN: diff %t-main.tls.ll %t-main.atomic.ll +RUN: diff %t-main.atomic-tl.ll %t-main.atomic.ll + +// The failure associated with threadlocal dlopened lib, atomic-update executable is that pthread_create +// is not intercepted before it is first called. That means that we can't run the thread exit handler. +RUN: diff %t-main.tl-atomic.ll %t-main.atomic.ll + +// Atomic exe, threadlocal lib does not pass. +XFAIL: target={{.*}} diff --git a/compiler-rt/test/profile/Linux/instrprof-tls-shared-mix-subset.test b/compiler-rt/test/profile/Linux/instrprof-tls-shared-mix-subset.test new file mode 100644 index 0000000000000..6d6b7e4b3bb59 --- /dev/null +++ b/compiler-rt/test/profile/Linux/instrprof-tls-shared-mix-subset.test @@ -0,0 +1,35 @@ +// All threadlocal + +RUN: mkdir -p %t.tls.d +RUN: %clang_profgen -fcoverage-mapping -fprofile-thread-local -fdata-sections -ffunction-sections -fuse-ld=gold -Wl,--gc-sections -o %t.tls.d/liblib.so -fPIC -shared %S/../Inputs/instrprof-tls-dlclose-lib.c + +RUN: %clang_profgen -fcoverage-mapping -lpthread -fprofile-thread-local -fdata-sections -ffunction-sections -fuse-ld=gold -Wl,--gc-sections -o %t-tls -rpath %t.tls.d -L%t.tls.d -llib %S/../Inputs/instrprof-tls-dlclose-main.c + +// All atomic + +RUN: mkdir -p %t.atomic.d +RUN: %clang_profgen -fcoverage-mapping -fprofile-update=atomic -fdata-sections -ffunction-sections -fuse-ld=gold -Wl,--gc-sections -o %t.atomic.d/liblib.so -fPIC -shared %S/../Inputs/instrprof-tls-dlclose-lib.c + +RUN: %clang_profgen -fcoverage-mapping -lpthread -fprofile-update=atomic -fdata-sections -ffunction-sections -fuse-ld=gold -Wl,--gc-sections -o %t-atomic -rpath %t.atomic.d -L%t.atomic.d -llib %S/../Inputs/instrprof-tls-dlclose-main.c + +// Atomic lib, threadlocal exe + +RUN: mkdir -p %t.atomic-tl.d +RUN: %clang_profgen -fcoverage-mapping -fprofile-update=atomic -fdata-sections -ffunction-sections -fuse-ld=gold -Wl,--gc-sections -o %t.atomic-tl.d/liblib.so -fPIC -shared %S/../Inputs/instrprof-tls-dlclose-lib.c + +RUN: %clang_profgen -fcoverage-mapping -lpthread -fprofile-thread-local -fdata-sections -ffunction-sections -fuse-ld=gold -Wl,--gc-sections -o %t-atomic-tl -rpath %t.atomic-tl.d -L%t.atomic-tl.d -llib %S/../Inputs/instrprof-tls-dlclose-main.c + +// Atomic exe, threadlocal lib + +RUN: env LLVM_PROFILE_FILE=%t-atomic.profraw %run %t-atomic +RUN: env LLVM_PROFILE_FILE=%t-tls.profraw %run %t-tls +RUN: env LLVM_PROFILE_FILE=%t-atomic-tl.profraw %run %t-atomic-tl + +RUN: llvm-profdata merge -o %t-tls.profdata %t-tls.profraw +RUN: llvm-profdata merge -o %t-atomic.profdata %t-atomic.profraw +RUN: llvm-profdata merge -o %t-atomic-tl.profdata %t-atomic-tl.profraw +RUN: %clang_profuse=%t-tls.profdata -o %t-main.tls.ll -S -emit-llvm %S/../Inputs/instrprof-tls-dlclose-main.c +RUN: %clang_profuse=%t-atomic.profdata -o %t-main.atomic.ll -S -emit-llvm %S/../Inputs/instrprof-tls-dlclose-main.c +RUN: %clang_profuse=%t-atomic-tl.profdata -o %t-main.atomic-tl.ll -S -emit-llvm %S/../Inputs/instrprof-tls-dlclose-main.c +RUN: diff %t-main.tls.ll %t-main.atomic.ll +RUN: diff %t-main.atomic-tl.ll %t-main.atomic.ll diff --git a/compiler-rt/test/profile/Linux/instrprof-tls-shared-mix.test b/compiler-rt/test/profile/Linux/instrprof-tls-shared-mix.test new file mode 100644 index 0000000000000..1fb58128ada32 --- /dev/null +++ b/compiler-rt/test/profile/Linux/instrprof-tls-shared-mix.test @@ -0,0 +1,48 @@ +// All threadlocal + +RUN: mkdir -p %t.tls.d +RUN: %clang_profgen -fcoverage-mapping -fprofile-thread-local -fdata-sections -ffunction-sections -fuse-ld=gold -Wl,--gc-sections -o %t.tls.d/liblib.so -fPIC -shared %S/../Inputs/instrprof-tls-dlclose-lib.c + +RUN: %clang_profgen -fcoverage-mapping -lpthread -fprofile-thread-local -fdata-sections -ffunction-sections -fuse-ld=gold -Wl,--gc-sections -o %t-tls -rpath %t.tls.d -L%t.tls.d -llib %S/../Inputs/instrprof-tls-dlclose-main.c + +// All atomic + +RUN: mkdir -p %t.atomic.d +RUN: %clang_profgen -fcoverage-mapping -fprofile-update=atomic -fdata-sections -ffunction-sections -fuse-ld=gold -Wl,--gc-sections -o %t.atomic.d/liblib.so -fPIC -shared %S/../Inputs/instrprof-tls-dlclose-lib.c + +RUN: %clang_profgen -fcoverage-mapping -lpthread -fprofile-update=atomic -fdata-sections -ffunction-sections -fuse-ld=gold -Wl,--gc-sections -o %t-atomic -rpath %t.atomic.d -L%t.atomic.d -llib %S/../Inputs/instrprof-tls-dlclose-main.c + +// Atomic lib, threadlocal exe + +RUN: mkdir -p %t.atomic-tl.d +RUN: %clang_profgen -fcoverage-mapping -fprofile-update=atomic -fdata-sections -ffunction-sections -fuse-ld=gold -Wl,--gc-sections -o %t.atomic-tl.d/liblib.so -fPIC -shared %S/../Inputs/instrprof-tls-dlclose-lib.c + +RUN: %clang_profgen -fcoverage-mapping -lpthread -fprofile-thread-local -fdata-sections -ffunction-sections -fuse-ld=gold -Wl,--gc-sections -o %t-atomic-tl -rpath %t.atomic-tl.d -L%t.atomic-tl.d -llib %S/../Inputs/instrprof-tls-dlclose-main.c + +// Atomic exe, threadlocal lib + +RUN: mkdir -p %t.tl-atomic.d +RUN: %clang_profgen -fcoverage-mapping -fprofile-thread-local -fdata-sections -ffunction-sections -fuse-ld=gold -Wl,--gc-sections -o %t.tl-atomic.d/liblib.so -fPIC -shared %S/../Inputs/instrprof-tls-dlclose-lib.c + +RUN: %clang_profgen -fcoverage-mapping -lpthread -fprofile-update=atomic -fdata-sections -ffunction-sections -fuse-ld=gold -Wl,--gc-sections -o %t-tl-atomic -rpath %t.tl-atomic.d -L%t.tl-atomic.d -llib %S/../Inputs/instrprof-tls-dlclose-main.c + + +RUN: env LLVM_PROFILE_FILE=%t-atomic.profraw %run %t-atomic +RUN: env LLVM_PROFILE_FILE=%t-tls.profraw %run %t-tls +RUN: env LLVM_PROFILE_FILE=%t-atomic-tl.profraw %run %t-atomic-tl +RUN: env LLVM_PROFILE_FILE=%t-tl-atomic.profraw %run %t-tl-atomic + +RUN: llvm-profdata merge -o %t-tls.profdata %t-tls.profraw +RUN: llvm-profdata merge -o %t-atomic.profdata %t-atomic.profraw +RUN: llvm-profdata merge -o %t-atomic-tl.profdata %t-atomic-tl.profraw +RUN: llvm-profdata merge -o %t-tl-atomic.profdata %t-tl-atomic.profraw +RUN: %clang_profuse=%t-tls.profdata -o %t-main.tls.ll -S -emit-llvm %S/../Inputs/instrprof-tls-dlclose-main.c +RUN: %clang_profuse=%t-atomic.profdata -o %t-main.atomic.ll -S -emit-llvm %S/../Inputs/instrprof-tls-dlclose-main.c +RUN: %clang_profuse=%t-atomic-tl.profdata -o %t-main.atomic-tl.ll -S -emit-llvm %S/../Inputs/instrprof-tls-dlclose-main.c +RUN: %clang_profuse=%t-tl-atomic.profdata -o %t-main.tl-atomic.ll -S -emit-llvm %S/../Inputs/instrprof-tls-dlclose-main.c +RUN: diff %t-main.tls.ll %t-main.atomic.ll +RUN: diff %t-main.atomic-tl.ll %t-main.atomic.ll +RUN: diff %t-main.tl-atomic.ll %t-main.atomic.ll + +// Atomic exe, threadlocal lib does not pass. +XFAIL: target={{.*}} diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h index 817ad9550f652..0c78450641db6 100644 --- a/llvm/include/llvm/ProfileData/InstrProf.h +++ b/llvm/include/llvm/ProfileData/InstrProf.h @@ -100,6 +100,9 @@ inline StringRef getInstrProfDataVarPrefix() { return "__profd_"; } /// Return the name prefix of profile counter variables. inline StringRef getInstrProfCountersVarPrefix() { return "__profc_"; } +/// Return the name prefix of profile counter variables. +inline StringRef getInstrProfCountersTLSVarPrefix() { return "__profc_tls_"; } + /// Return the name prefix of profile bitmap variables. inline StringRef getInstrProfBitmapVarPrefix() { return "__profbm_"; } diff --git a/llvm/include/llvm/ProfileData/InstrProfData.inc b/llvm/include/llvm/ProfileData/InstrProfData.inc index e9866d94b762c..8655bcf498437 100644 --- a/llvm/include/llvm/ProfileData/InstrProfData.inc +++ b/llvm/include/llvm/ProfileData/InstrProfData.inc @@ -312,6 +312,9 @@ INSTR_PROF_SECT_ENTRY(IPSK_data, \ INSTR_PROF_SECT_ENTRY(IPSK_cnts, \ INSTR_PROF_QUOTE(INSTR_PROF_CNTS_COMMON), \ INSTR_PROF_CNTS_COFF, "__DATA,") +INSTR_PROF_SECT_ENTRY(IPSK_tls_cnts, \ + INSTR_PROF_QUOTE(INSTR_PROF_TLS_CNTS_COMMON), \ + INSTR_PROF_CNTS_COFF, "__DATA,") INSTR_PROF_SECT_ENTRY(IPSK_bitmap, \ INSTR_PROF_QUOTE(INSTR_PROF_BITS_COMMON), \ INSTR_PROF_BITS_COFF, "__DATA,") @@ -750,6 +753,7 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure, #define INSTR_PROF_NAME_COMMON __llvm_prf_names #define INSTR_PROF_VNAME_COMMON __llvm_prf_vns #define INSTR_PROF_CNTS_COMMON __llvm_prf_cnts +#define INSTR_PROF_TLS_CNTS_COMMON __llvm_tls_prf_cnts #define INSTR_PROF_BITS_COMMON __llvm_prf_bits #define INSTR_PROF_VALS_COMMON __llvm_prf_vals #define INSTR_PROF_VNODES_COMMON __llvm_prf_vnds diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp index f9b58d9f27821..d4005350e84ca 100644 --- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -85,6 +85,11 @@ cl::opt<InstrProfCorrelator::ProfCorrelatorKind> ProfileCorrelate( "Use debug info to correlate"), clEnumValN(InstrProfCorrelator::BINARY, "binary", "Use binary to correlate"))); + +cl::opt<bool> + InstrProfThreadLocal("instr-prof-thread-local", + cl::desc("Generate thread local counter regions"), + cl::init(false)); } // namespace llvm namespace { @@ -215,6 +220,10 @@ class InstrLowerer final { struct PerFunctionProfileData { uint32_t NumValueSites[IPVK_Last + 1] = {}; GlobalVariable *RegionCounters = nullptr; + GlobalVariable *TLSRegionCounters = nullptr; + // Both a regular DataVar and TLS Datavar must exist when TLS counters are + // in use + GlobalVariable *TLSDataVar = nullptr; GlobalVariable *DataVar = nullptr; GlobalVariable *RegionBitmaps = nullptr; uint32_t NumBitmapBytes = 0; @@ -286,16 +295,24 @@ class InstrLowerer final { /// acts on. Value *getCounterAddress(InstrProfCntrInstBase *I); + Value *getThreadLocalCounterAddress(InstrProfCntrInstBase *I); + /// Get the region counters for an increment, creating them if necessary. /// /// If the counter array doesn't yet exist, the profile data variables /// referring to them will also be created. GlobalVariable *getOrCreateRegionCounters(InstrProfCntrInstBase *Inc); + /// Get the thread local region counters, creating them if necessary. + /// These must exist alongside the global region counters. + GlobalVariable * + getOrCreateThreadLocalRegionCounters(InstrProfCntrInstBase *Inc); + /// Create the region counters. GlobalVariable *createRegionCounters(InstrProfCntrInstBase *Inc, StringRef Name, - GlobalValue::LinkageTypes Linkage); + GlobalValue::LinkageTypes Linkage, + bool ThreadLocal); /// Compute the address of the test vector bitmap that this profiling /// instruction acts on. @@ -608,6 +625,7 @@ enum class ValueProfilingCallType { } // end anonymous namespace +// TODO: put TLS counters incompatibility checks here PreservedAnalyses InstrProfilingLoweringPass::run(Module &M, ModuleAnalysisManager &AM) { FunctionAnalysisManager &FAM = @@ -894,6 +912,9 @@ void InstrLowerer::lowerValueProfileInst(InstrProfValueProfileInst *Ind) { Value *InstrLowerer::getCounterAddress(InstrProfCntrInstBase *I) { auto *Counters = getOrCreateRegionCounters(I); + if (InstrProfThreadLocal) { + return getThreadLocalCounterAddress(I); + } IRBuilder<> Builder(I); if (isa<InstrProfTimestampInst>(I)) @@ -932,6 +953,22 @@ Value *InstrLowerer::getCounterAddress(InstrProfCntrInstBase *I) { return Builder.CreateIntToPtr(Add, Addr->getType()); } +Value *InstrLowerer::getThreadLocalCounterAddress(InstrProfCntrInstBase *I) { + GlobalVariable *CountersTLS = getOrCreateThreadLocalRegionCounters(I); + IRBuilder<> Builder(I); + + if (isa<InstrProfTimestampInst>(I)) + CountersTLS->setAlignment(Align(8)); + + auto *Addr = Builder.CreateConstInBoundsGEP2_32( + CountersTLS->getValueType(), + Builder.CreateThreadLocalAddress(CountersTLS), 0, + I->getIndex()->getZExtValue()); + + assert(!isRuntimeCounterRelocationEnabled()); + return Addr; +} + Value *InstrLowerer::getBitmapAddress(InstrProfMCDCTVBitmapUpdate *I) { auto *Bitmaps = getOrCreateRegionBitmaps(I); IRBuilder<> Builder(I); @@ -1391,13 +1428,18 @@ GlobalVariable *InstrLowerer::setupProfileSection(InstrProfInstBase *Inc, VarPrefix = getInstrProfCountersVarPrefix(); VarName = getVarName(Inc, VarPrefix, Renamed); InstrProfCntrInstBase *CntrIncrement = dyn_cast<InstrProfCntrInstBase>(Inc); - Ptr = createRegionCounters(CntrIncrement, VarName, Linkage); + Ptr = createRegionCounters(CntrIncrement, VarName, Linkage, false); } else if (IPSK == IPSK_bitmap) { VarPrefix = getInstrProfBitmapVarPrefix(); VarName = getVarName(Inc, VarPrefix, Renamed); InstrProfMCDCBitmapInstBase *BitmapUpdate = dyn_cast<InstrProfMCDCBitmapInstBase>(Inc); Ptr = createRegionBitmaps(BitmapUpdate, VarName, Linkage); + } else if (IPSK == IPSK_tls_cnts) { + VarPrefix = getInstrProfCountersTLSVarPrefix(); + VarName = getVarName(Inc, VarPrefix, Renamed); + InstrProfCntrInstBase *CntrIncrement = dyn_cast<InstrProfCntrInstBase>(Inc); + Ptr = createRegionCounters(CntrIncrement, VarName, Linkage, true); } else { llvm_unreachable("Profile Section must be for Counters or Bitmaps"); } @@ -1440,7 +1482,8 @@ InstrLowerer::getOrCreateRegionBitmaps(InstrProfMCDCBitmapInstBase *Inc) { GlobalVariable * InstrLowerer::createRegionCounters(InstrProfCntrInstBase *Inc, StringRef Name, - GlobalValue::LinkageTypes Linkage) { + GlobalValue::LinkageTypes Linkage, + bool ThreadLocal) { uint64_t NumCounters = Inc->getNumCounters()->getZExtValue(); auto &Ctx = M.getContext(); GlobalVariable *GV; @@ -1460,6 +1503,7 @@ InstrLowerer::createRegionCounters(InstrProfCntrInstBase *Inc, StringRef Name, Constant::getNullValue(CounterTy), Name); GV->setAlignment(Align(8)); } + GV->setThreadLocal(ThreadLocal); return GV; } @@ -1475,6 +1519,10 @@ InstrLowerer::getOrCreateRegionCounters(InstrProfCntrInstBase *Inc) { auto *CounterPtr = setupProfileSection(Inc, IPSK_cnts); PD.RegionCounters = CounterPtr; + if (InstrProfThreadLocal) { + PD.TLSRegionCounters = setupProfileSection(Inc, IPSK_tls_cnts); + } + if (DebugInfoCorrelate || ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO) { LLVMContext &Ctx = M.getContext(); @@ -1518,6 +1566,21 @@ InstrLowerer::getOrCreateRegionCounters(InstrProfCntrInstBase *Inc) { return PD.RegionCounters; } +GlobalVariable * +InstrLowerer::getOrCreateThreadLocalRegionCounters(InstrProfCntrInstBase *Inc) { + // If this check fails, this function would return a null pointer + assert(InstrProfThreadLocal); + GlobalVariable *NamePtr = Inc->getName(); + auto &PD = ProfileDataMap[NamePtr]; + if (PD.TLSRegionCounters) { + return PD.TLSRegionCounters; + } else { + // Initializes TLSRegionCounters when InstrProfThreadLocal is true + (void)getOrCreateRegionCounters(Inc); + return PD.TLSRegionCounters; + } +} + void InstrLowerer::createDataVariable(InstrProfCntrInstBase *Inc) { // When debug information is correlated to profile data, a data variable // is not needed. @@ -1555,6 +1618,8 @@ void InstrLowerer::createDataVariable(InstrProfCntrInstBase *Inc) { getVarName(Inc, getInstrProfCountersVarPrefix(), Renamed); std::string DataVarName = getVarName(Inc, getInstrProfDataVarPrefix(), Renamed); + std::string TLSDataVarName = + getVarName(Inc, getInstrProfCountersTLSVarPrefix(), Renamed); auto *Int8PtrTy = PointerType::getUnqual(Ctx); // Allocate statically the array of pointers to value profile nodes for _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits