llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang-driver Author: Andrew Wock (ajwock) <details> <summary>Changes</summary> LLVM can now generate increments to counters in thread local storage. Use a new compiler-rt runtime to atomically add thread local counters to global counters on thread exit. The clang driver will link the new runtime libraries in when the new option -fprofile-thread-local is specified. More details available in the RFC on discourse. --- Patch is 67.04 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/95494.diff 36 Files Affected: - (modified) clang/docs/UsersManual.rst (+8) - (modified) clang/include/clang/Basic/CodeGenOptions.def (+1) - (modified) clang/include/clang/Driver/Options.td (+3) - (modified) clang/include/clang/Driver/ToolChain.h (+6) - (modified) clang/lib/Driver/ToolChain.cpp (+10) - (modified) clang/lib/Driver/ToolChains/Clang.cpp (+12) - (modified) clang/lib/Driver/ToolChains/Linux.cpp (+7) - (modified) compiler-rt/include/profile/InstrProfData.inc (+4) - (modified) compiler-rt/lib/profile/CMakeLists.txt (+35) - (added) compiler-rt/lib/profile/InstrProfilingDyLibLinux.cpp (+63) - (modified) compiler-rt/lib/profile/InstrProfilingFile.c (+6) - (modified) compiler-rt/lib/profile/InstrProfilingPlatformLinux.c (+1) - (added) compiler-rt/lib/profile/InstrProfilingStaticTLSLinux.cpp (+123) - (added) compiler-rt/lib/profile/InstrProfilingTLS.c (+29) - (added) compiler-rt/lib/profile/InstrProfilingTLS.h (+39) - (added) compiler-rt/lib/profile/InstrProfilingTLSDyLib.c (+100) - (added) compiler-rt/lib/profile/InstrProfilingTLSDyLib.h (+4) - (modified) compiler-rt/lib/tsan/rtl/CMakeLists.txt (+1-1) - (added) compiler-rt/test/profile/Inputs/instrprof-tls-dlclose-lib.c (+7) - (added) compiler-rt/test/profile/Inputs/instrprof-tls-dlclose-main.c (+93) - (added) compiler-rt/test/profile/Inputs/instrprof-tls-dlopen-func.c (+9) - (added) compiler-rt/test/profile/Inputs/instrprof-tls-dlopen-func2.c (+9) - (added) compiler-rt/test/profile/Inputs/instrprof-tls-dlopen-main.c (+105) - (added) compiler-rt/test/profile/Inputs/instrprof-tls-exit.c (+37) - (added) compiler-rt/test/profile/Linux/instrprof-tls-dlclose-memfault.test (+27) - (added) compiler-rt/test/profile/Linux/instrprof-tls-dlclose-mix-subset.test (+41) - (added) compiler-rt/test/profile/Linux/instrprof-tls-dlclose-mix.test (+48) - (added) compiler-rt/test/profile/Linux/instrprof-tls-dlclose-nodelete.test (+24) - (added) compiler-rt/test/profile/Linux/instrprof-tls-dlopen.test (+32) - (added) compiler-rt/test/profile/Linux/instrprof-tls-exit.test (+17) - (added) compiler-rt/test/profile/Linux/instrprof-tls-noclose-mix.test (+51) - (added) compiler-rt/test/profile/Linux/instrprof-tls-shared-mix-subset.test (+35) - (added) compiler-rt/test/profile/Linux/instrprof-tls-shared-mix.test (+48) - (modified) llvm/include/llvm/ProfileData/InstrProf.h (+3) - (modified) llvm/include/llvm/ProfileData/InstrProfData.inc (+4) - (modified) llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp (+68-3) ``````````diff diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst index f954857b0235a..f7db513b92909 100644 --- a/clang/docs/UsersManual.rst +++ b/clang/docs/UsersManual.rst @@ -2932,6 +2932,14 @@ indexed format, regardeless whether it is produced by frontend or the IR pass. overhead. ``prefer-atomic`` will be transformed to ``atomic`` when supported by the target, or ``single`` otherwise. +.. option:: -fprofile-thread-local + + Increment profile counters in thread local storage and atomically add their + values to global counters on thread exit. This has the potential to deliver + both accuracy and high performance whenever there is high thread contention + on profile counters. This is an experimental option and it is only supported + on 64-bit linux. + Fine Tuning Profile Collection ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def index 7ffc40a00504f..7cd0bfb6d71b5 100644 --- a/clang/include/clang/Basic/CodeGenOptions.def +++ b/clang/include/clang/Basic/CodeGenOptions.def @@ -218,6 +218,7 @@ ENUM_CODEGENOPT(ProfileUse, ProfileInstrKind, 2, ProfileNone) /// instrumented. Selected group numbers can be 0 to N-1 inclusive. VALUE_CODEGENOPT(ProfileTotalFunctionGroups, 32, 1) VALUE_CODEGENOPT(ProfileSelectedFunctionGroup, 32, 0) +CODEGENOPT(InstrProfileThreadLocal, 1, 0) ///< Counters are updated on a per-thread basis CODEGENOPT(CoverageMapping , 1, 0) ///< Generate coverage mapping regions to ///< enable code coverage analysis. CODEGENOPT(DumpCoverageMapping , 1, 0) ///< Dump the generated coverage mapping diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index d44faa55c456f..aab5b63c991f1 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1768,6 +1768,9 @@ def fprofile_instr_generate : Flag<["-"], "fprofile-instr-generate">, def fprofile_instr_generate_EQ : Joined<["-"], "fprofile-instr-generate=">, Group<f_Group>, Visibility<[ClangOption, CLOption]>, MetaVarName<"<file>">, HelpText<"Generate instrumented code to collect execution counts into <file> (overridden by LLVM_PROFILE_FILE env var)">; +def fprofile_thread_local : Flag<["-"], "fprofile-thread-local">, + Group<f_Group>, Visibility<[ClangOption, CLOption]>, + HelpText<"Generage profile counters in thread local storage">; def fprofile_instr_use : Flag<["-"], "fprofile-instr-use">, Group<f_Group>, Visibility<[ClangOption, CLOption]>; def fprofile_instr_use_EQ : Joined<["-"], "fprofile-instr-use=">, diff --git a/clang/include/clang/Driver/ToolChain.h b/clang/include/clang/Driver/ToolChain.h index 9789cfacafd78..162c730782afb 100644 --- a/clang/include/clang/Driver/ToolChain.h +++ b/clang/include/clang/Driver/ToolChain.h @@ -752,6 +752,12 @@ class ToolChain { virtual void addProfileRTLibs(const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs) const; + /// addThreadLocalProfileRTLibs - With -fprofile-threadlocal, add the + /// threadlocal profile runtime static + shared library pair. + virtual void + addThreadLocalProfileRTLibs(const llvm::opt::ArgList &Args, + llvm::opt::ArgStringList &CmdArgs) const; + /// Add arguments to use system-specific CUDA includes. virtual void AddCudaIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const; diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp index 40ab2e91125d1..4708cb7df5044 100644 --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -1078,6 +1078,16 @@ void ToolChain::addProfileRTLibs(const llvm::opt::ArgList &Args, CmdArgs.push_back(getCompilerRTArgString(Args, "profile")); } +void ToolChain::addThreadLocalProfileRTLibs( + const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs) const { + if (needsProfileRT(Args) && Args.hasArg(options::OPT_fprofile_thread_local)) { + // Static first, so we can specify '-u' where needed + CmdArgs.push_back(getCompilerRTArgString(Args, "profile_threadlocal")); + CmdArgs.push_back(getCompilerRTArgString(Args, "profile_threadlocal", + ToolChain::FT_Shared)); + } +} + ToolChain::RuntimeLibType ToolChain::GetRuntimeLibType( const ArgList &Args) const { if (runtimeLibType) diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index b8d8ff3db5d1f..cd63ac56fecf6 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -720,6 +720,18 @@ static void addPGOAndCoverageFlags(const ToolChain &TC, Compilation &C, CmdArgs.push_back("-fcoverage-mcdc"); } + if (Args.hasArg(options::OPT_fprofile_thread_local)) { + if (!ProfileGenerateArg) + D.Diag(clang::diag::err_drv_argument_only_allowed_with) + << "-fprofile-thread-local" + << "-fprofile-instr-generate"; + + // Clang cc1 is not in the know about thread local coverage, but llvm + // should be + CmdArgs.push_back("-mllvm"); + CmdArgs.push_back("-instr-prof-thread-local"); + } + if (Arg *A = Args.getLastArg(options::OPT_ffile_compilation_dir_EQ, options::OPT_fcoverage_compilation_dir_EQ)) { if (A->getOption().matches(options::OPT_ffile_compilation_dir_EQ)) diff --git a/clang/lib/Driver/ToolChains/Linux.cpp b/clang/lib/Driver/ToolChains/Linux.cpp index 2222dea431c3c..0a889f957786a 100644 --- a/clang/lib/Driver/ToolChains/Linux.cpp +++ b/clang/lib/Driver/ToolChains/Linux.cpp @@ -843,6 +843,13 @@ void Linux::addProfileRTLibs(const llvm::opt::ArgList &Args, CmdArgs.push_back(Args.MakeArgString( Twine("-u", llvm::getInstrProfRuntimeHookVarName()))); ToolChain::addProfileRTLibs(Args, CmdArgs); + + if (needsProfileRT(Args) && Args.hasArg(options::OPT_fprofile_thread_local)) { + CmdArgs.push_back(Args.MakeArgString(Twine( + "-u", + llvm::StringRef("__llvm_profile_tls_register_thread_exit_handler")))); + } + ToolChain::addThreadLocalProfileRTLibs(Args, CmdArgs); } void Linux::addExtraOpts(llvm::opt::ArgStringList &CmdArgs) const { diff --git a/compiler-rt/include/profile/InstrProfData.inc b/compiler-rt/include/profile/InstrProfData.inc index e9866d94b762c..8655bcf498437 100644 --- a/compiler-rt/include/profile/InstrProfData.inc +++ b/compiler-rt/include/profile/InstrProfData.inc @@ -312,6 +312,9 @@ INSTR_PROF_SECT_ENTRY(IPSK_data, \ INSTR_PROF_SECT_ENTRY(IPSK_cnts, \ INSTR_PROF_QUOTE(INSTR_PROF_CNTS_COMMON), \ INSTR_PROF_CNTS_COFF, "__DATA,") +INSTR_PROF_SECT_ENTRY(IPSK_tls_cnts, \ + INSTR_PROF_QUOTE(INSTR_PROF_TLS_CNTS_COMMON), \ + INSTR_PROF_CNTS_COFF, "__DATA,") INSTR_PROF_SECT_ENTRY(IPSK_bitmap, \ INSTR_PROF_QUOTE(INSTR_PROF_BITS_COMMON), \ INSTR_PROF_BITS_COFF, "__DATA,") @@ -750,6 +753,7 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure, #define INSTR_PROF_NAME_COMMON __llvm_prf_names #define INSTR_PROF_VNAME_COMMON __llvm_prf_vns #define INSTR_PROF_CNTS_COMMON __llvm_prf_cnts +#define INSTR_PROF_TLS_CNTS_COMMON __llvm_tls_prf_cnts #define INSTR_PROF_BITS_COMMON __llvm_prf_bits #define INSTR_PROF_VALS_COMMON __llvm_prf_vals #define INSTR_PROF_VNODES_COMMON __llvm_prf_vnds diff --git a/compiler-rt/lib/profile/CMakeLists.txt b/compiler-rt/lib/profile/CMakeLists.txt index 45e5164891751..b9f3a20bb328d 100644 --- a/compiler-rt/lib/profile/CMakeLists.txt +++ b/compiler-rt/lib/profile/CMakeLists.txt @@ -70,14 +70,25 @@ set(PROFILE_SOURCES InstrProfilingUtil.c ) +set(PROFILE_STATIC_TLS_SOURCES + InstrProfilingTLS.c + InstrProfilingStaticTLSLinux.cpp) + +set(PROFILE_SHARED_TLS_SOURCES + InstrProfilingTLSDyLib.c + InstrProfilingDyLibLinux.cpp) + set(PROFILE_HEADERS InstrProfiling.h InstrProfilingInternal.h InstrProfilingPort.h InstrProfilingUtil.h + InstrProfilingTLS.h WindowsMMap.h ) +set(PROFILE_LINK_LIBS ${SANITIZER_COMMON_LINK_LIBS}) + if(WIN32) list(APPEND PROFILE_SOURCES WindowsMMap.c @@ -134,6 +145,30 @@ if(APPLE) ADDITIONAL_HEADERS ${PROFILE_HEADERS} PARENT_TARGET profile) else() + #if(UNIX AND NOT APPLE AND NOT ANDROID) + if(OS_NAME MATCHES "Linux") + add_compiler_rt_runtime(clang_rt.profile_threadlocal + STATIC + OS ${PROFILE_SUPPORTED_OS} + ARCHS ${PROFILE_SUPPORTED_ARCH} + CFLAGS ${EXTRA_FLAGS} + SOURCES ${PROFILE_STATIC_TLS_SOURCES} + ADDITIONAL_HEADERS ${PROFILE_HEADERS} + PARENT_TARGET profile) + + add_compiler_rt_runtime(clang_rt.profile_threadlocal + SHARED + OS ${PROFILE_SUPPORTED_OS} + ARCHS ${PROFILE_SUPPORTED_ARCH} + CFLAGS ${EXTRA_FLAGS} + SOURCES ${PROFILE_SHARED_TLS_SOURCES} + ADDITIONAL_HEADERS ${PROFILE_HEADERS} + OBJECT_LIBS RTInterception + RTSanitizerCommon + RTSanitizerCommonLibc + PARENT_TARGET profile) + endif() + add_compiler_rt_runtime(clang_rt.profile STATIC ARCHS ${PROFILE_SUPPORTED_ARCH} diff --git a/compiler-rt/lib/profile/InstrProfilingDyLibLinux.cpp b/compiler-rt/lib/profile/InstrProfilingDyLibLinux.cpp new file mode 100644 index 0000000000000..47f2baa6a5815 --- /dev/null +++ b/compiler-rt/lib/profile/InstrProfilingDyLibLinux.cpp @@ -0,0 +1,63 @@ +#if defined(__linux__) || defined(__FreeBSD__) || defined(__Fuchsia__) || \ + (defined(__sun__) && defined(__svr4__)) || defined(__NetBSD__) || \ + defined(_AIX) + +#include <elf.h> +#include <link.h> +#endif +#include <stdlib.h> +#include <string.h> + +extern "C" { + +#include "InstrProfiling.h" +#include "InstrProfilingInternal.h" +#include "InstrProfilingTLS.h" +#include "InstrProfilingTLSDyLib.h" +} + +#include "interception/interception.h" + +extern "C" { + +struct pthread_wrapper_arg { + void *(*fn)(void *); + void *arg; + uint32_t arg_keepalive; +}; + +void *pthread_fn_wrapper(void *arg_ptr) { + struct pthread_wrapper_arg *wrapper_arg = + (struct pthread_wrapper_arg *)arg_ptr; + void *(*fn)(void *) = __atomic_load_n(&wrapper_arg->fn, __ATOMIC_RELAXED); + void *arg = __atomic_load_n(&wrapper_arg->arg, __ATOMIC_RELAXED); + __atomic_store_n(&wrapper_arg->arg_keepalive, 0, __ATOMIC_RELEASE); + + // startup + // Do nothing (TLS is automatically loaded and zeroed) + void *retval = fn(arg); + // cleanup + run_thread_exit_handlers(); + // Combine counters with main counters + return retval; +} + +void __llvm_register_profile_intercepts() { register_profile_intercepts(); } + +} // end extern "C" + +INTERCEPTOR(int, pthread_create, void *thread, void *attr, + void *(*start_routine)(void *), void *arg) { + int res = -1; + struct pthread_wrapper_arg wrapper_arg = {(void *(*)(void *))start_routine, + arg, 1}; + + // do pthread + res = REAL(pthread_create)(thread, attr, pthread_fn_wrapper, &wrapper_arg); + // Spin wait for child thread to copy arguments + while (__atomic_load_n(&wrapper_arg.arg_keepalive, __ATOMIC_ACQUIRE) == 1) + ; + return res; +} + +void register_profile_intercepts() { INTERCEPT_FUNCTION(pthread_create); } diff --git a/compiler-rt/lib/profile/InstrProfilingFile.c b/compiler-rt/lib/profile/InstrProfilingFile.c index e4d99ef4872bd..64775f24fd83c 100644 --- a/compiler-rt/lib/profile/InstrProfilingFile.c +++ b/compiler-rt/lib/profile/InstrProfilingFile.c @@ -34,6 +34,7 @@ #include "InstrProfiling.h" #include "InstrProfilingInternal.h" #include "InstrProfilingPort.h" +#include "InstrProfilingTLS.h" #include "InstrProfilingUtil.h" /* From where is profile name specified. @@ -1084,6 +1085,8 @@ void __llvm_profile_set_filename(const char *FilenamePat) { parseAndSetFilename(FilenamePat, PNS_runtime_api, 1); } +void (*on_main_thread_exit)(void) = NULL; + /* The public API for writing profile data into the file with name * set by previous calls to __llvm_profile_set_filename or * __llvm_profile_override_default_filename or @@ -1097,6 +1100,9 @@ int __llvm_profile_write_file(void) { // Temporarily suspend getting SIGKILL when the parent exits. int PDeathSig = lprofSuspendSigKill(); + if (on_main_thread_exit) + on_main_thread_exit(); + if (lprofProfileDumped() || __llvm_profile_is_continuous_mode_enabled()) { PROF_NOTE("Profile data not written to file: %s.\n", "already written"); if (PDeathSig == 1) diff --git a/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c b/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c index b766436497b74..4f96523a56a37 100644 --- a/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c +++ b/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c @@ -45,6 +45,7 @@ extern __llvm_profile_data PROF_DATA_STOP COMPILER_RT_VISIBILITY COMPILER_RT_WEAK; extern char PROF_CNTS_START COMPILER_RT_VISIBILITY COMPILER_RT_WEAK; extern char PROF_CNTS_STOP COMPILER_RT_VISIBILITY COMPILER_RT_WEAK; + extern VTableProfData PROF_VTABLE_START COMPILER_RT_VISIBILITY COMPILER_RT_WEAK; extern VTableProfData PROF_VTABLE_STOP COMPILER_RT_VISIBILITY COMPILER_RT_WEAK; extern char PROF_VNAME_START COMPILER_RT_VISIBILITY COMPILER_RT_WEAK; diff --git a/compiler-rt/lib/profile/InstrProfilingStaticTLSLinux.cpp b/compiler-rt/lib/profile/InstrProfilingStaticTLSLinux.cpp new file mode 100644 index 0000000000000..fc5f785e1ab40 --- /dev/null +++ b/compiler-rt/lib/profile/InstrProfilingStaticTLSLinux.cpp @@ -0,0 +1,123 @@ +#if defined(__linux__) || defined(__FreeBSD__) || defined(__Fuchsia__) || \ + (defined(__sun__) && defined(__svr4__)) || defined(__NetBSD__) || \ + defined(_AIX) + +#include <elf.h> +#include <link.h> +#endif +#include <stdlib.h> +#include <string.h> + +extern "C" { + +#include "InstrProfiling.h" +#include "InstrProfilingInternal.h" +#include "InstrProfilingTLS.h" +} + +extern "C" { + +#define PROF_TLS_CNTS_START INSTR_PROF_SECT_START(INSTR_PROF_TLS_CNTS_COMMON) +#define PROF_TLS_CNTS_STOP INSTR_PROF_SECT_STOP(INSTR_PROF_TLS_CNTS_COMMON) + +extern char PROF_TLS_CNTS_START COMPILER_RT_VISIBILITY COMPILER_RT_WEAK; +extern char PROF_TLS_CNTS_STOP COMPILER_RT_VISIBILITY COMPILER_RT_WEAK; + +COMPILER_RT_VISIBILITY char *__llvm_profile_begin_tls_counters(void) { + return &PROF_TLS_CNTS_START; +} +COMPILER_RT_VISIBILITY char *__llvm_profile_end_tls_counters(void) { + return &PROF_TLS_CNTS_STOP; +} + +struct finalization_data { + char *mod_begin; + char *tls_img_begin; + char *tls_img_end; + char *cnts_begin; + char *cnts_end; +}; + +// This is O(num_modules + num_counters) unfortunately. If there were a +// mechanism to calculate the thread-local start of a thread-local section like +// there is a mechanism to calculate the static start of a static section (i.e. +// __start_$sectionname), that would simplify implementation a lot and make this +// just O(num_counters). +static int FindAndAddCounters_cb(struct dl_phdr_info *info, size_t size, + void *data) { + finalization_data *fdata = (finalization_data *)data; + char *mod_begin = fdata->mod_begin; + // We're looking for a match to the dladdr calculated based on PROF_CNTS_START + if (mod_begin != (char *)info->dlpi_addr) { + return 0; + } + + if (info->dlpi_tls_data == NULL) { + return 1; + } + + const Elf64_Phdr *hdr = info->dlpi_phdr; + const Elf64_Phdr *last_hdr = hdr + info->dlpi_phnum; + + const Elf64_Phdr *tls_hdr; + for (; hdr != last_hdr; ++hdr) { + if (hdr->p_type == PT_TLS) { + tls_hdr = hdr; + goto found_tls_ph; + } + } + return 1; +found_tls_ph: + uint64_t num_counters = + __llvm_profile_get_num_counters(fdata->tls_img_begin, fdata->tls_img_end); + uint64_t counter_size = __llvm_profile_counter_entry_size(); + + // Calculate the offset of __llvm_prf_tls_cnts into the tls block for this + // module. The addresses in use below correspond to the tls initialization + // image, which is statically allocated for the module, rather than the TLS + // block itself. + uint64_t ph_true_vaddr = + (uint64_t)info->dlpi_addr + (uint64_t)tls_hdr->p_vaddr; + uint64_t tls_cnts_tlsblk_offset = + (uint64_t)fdata->tls_img_begin - ph_true_vaddr; + + // Calculate the thread local copy of __llvm_prf_tls_cnts for this module. + uint64_t tls_prf_cnts_modlocal_begin = + (uint64_t)info->dlpi_tls_data + tls_cnts_tlsblk_offset; + + // We don't support single byte counters because they are also resilient to + // thread synchronization issues and they are designed to avoid memory + // overhead, which is the opposite of what TL counters do. + // TODO: warn? + if (counter_size == sizeof(uint64_t)) { + uint64_t *tls_cnt = (uint64_t *)tls_prf_cnts_modlocal_begin; + uint64_t *tls_end = (uint64_t *)tls_cnt + num_counters; + uint64_t *cnt = (uint64_t *)fdata->cnts_begin; + for (; tls_cnt != tls_end; tls_cnt++, cnt++) { + __atomic_fetch_add(cnt, *tls_cnt, __ATOMIC_RELAXED); + } + } + return 1; +} + +COMPILER_RT_VISIBILITY +void __llvm_profile_tls_counters_finalize(void) { + struct finalization_data fdata = {0}; + fdata.tls_img_begin = __llvm_profile_begin_tls_counters(); + fdata.tls_img_end = __llvm_profile_end_tls_counters(); + fdata.cnts_begin = __llvm_profile_begin_counters(); + fdata.cnts_end = __llvm_profile_end_counters(); + + if (!fdata.tls_img_begin || !fdata.tls_img_end || !fdata.cnts_begin || + !fdata.cnts_end) { + return; + } + + Dl_info info; + if (dladdr(fdata.cnts_begin, &info) == 0) { + return; + } + fdata.mod_begin = (char *)info.dli_fbase; + dl_iterate_phdr(FindAndAddCounters_cb, &fdata); +} +} diff --git a/compiler-rt/lib/profile/InstrProfilingTLS.c b/compiler-rt/lib/profile/InstrProfilingTLS.c new file mode 100644 index 0000000000000..029ed9e542e5a --- /dev/null +++ b/compiler-rt/lib/profile/InstrProfilingTLS.c @@ -0,0 +1,29 @@ +#include "InstrProfilingTLS.h" +#include "InstrProfiling.h" + +struct texit_fn_node module_node COMPILER_RT_VISIBILITY; + +// We act as a shim between the profile_threadlocal sharedlib +// and the profile static lib. We need to the tell the static lib +// to add all of the counters up on main thread exit, but the +// shared lib is the one who knows how to do that and whether its +// already been done. +// +// In the constructor we pass flush_main_thread_counters from the +// sharedlib to the non-tls statlib's on_main_thread_exit fnptr. +extern void flush_main_thread_counters(void); +extern void (*on_main_thread_exit)(void); + +__attribute__((constructor)) COMPILER_RT_VISIBILITY void +__llvm_profile_tls_register_thread_exit_handler(void) { + module_node.prev = NULL; + module_node.next = NULL; + module_node.fn = __llvm_profile_tls_counters_finalize; + register_tls_prfcnts_module_thread_exit_handler(&module_node); + if (!on_main_thread_exit) { + on_main_thread_exit = flush_main_thread_counters; + } +} + +// TODO: Add destructor +// (But not yet, I'm scared) diff --git a/compiler-rt/lib/profile/InstrProfilingTLS.h b/compiler-rt/lib/profile/InstrProfilingTLS.h new file mode 100644 index 0000000000000..1b6001d27d375 --- /dev/null +++ b/compiler-rt/lib/profile/InstrProfilingTLS.h @@ -0,0 +1,39 @@ +#ifndef INSTR_PROFILING_TLS_H +#define INSTR_PROFILING_TLS_H + +char *__llvm_profile_begin_tls_counters(void); +char *__llvm_profile_end_tls_counters(void); + +/*! + * \brief Add counter values from TLS to the global counters for the program + * + * On thread exit, atomically add the values in TLS counters to the static + * counters for the whole process. + */ +void __llvm_profile_tls_counters_finalize(void); + +/* + * Dylib stuff + */ +typedef void (*texit_fnc)(void); + +typedef struct texit_fn_node { + struct texit_fn_node *prev; + texit_fnc fn; + struct texit_fn_node *next; +} texit_fn_node; + +// TODO: really this should be write-preferring rwlocked +struct texit_fn_registry { + int texit_mtx; + texit_fn_node head; + texit_fn_node tail; +}; + +void register_tls_prfcnts_module_thread_exit_handler(texit_fn_node *new_node); +void unregister_tls_prfcnts_module_thread_exit_handler(texit_fn_node *new_node); +void run_thread_exit_handlers(void); + +void register_profile... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/95494 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits