From: Paolo Bonzini <pbonz...@redhat.com> Reviewed-by: Mike Day <ncm...@ncultra.org> --- configure | 63 ++++++++++++++++++++++---- include/qemu/tls.h | 127 +++++++++++++++++++++++++++++++++++++++++++++-------- include/qom/cpu.h | 2 +- tests/Makefile | 2 +- tests/test-tls.c | 87 ++++++++++++++++++++++++++++++++++++ 5 files changed, 252 insertions(+), 29 deletions(-) create mode 100644 tests/test-tls.c
diff --git a/configure b/configure index 18fa608..baf61c8 100755 --- a/configure +++ b/configure @@ -285,6 +285,7 @@ fi ar="${AR-${cross_prefix}ar}" as="${AS-${cross_prefix}as}" cpp="${CPP-$cc -E}" +nm="${NM-${cross_prefix}nm}" objcopy="${OBJCOPY-${cross_prefix}objcopy}" ld="${LD-${cross_prefix}ld}" libtool="${LIBTOOL-${cross_prefix}libtool}" @@ -1241,6 +1242,29 @@ if compile_prog "-Werror -fno-gcse" "" ; then TRANSLATE_OPT_CFLAGS=-fno-gcse fi +########################################## +# Using __thread is either faster than pthread_get/setspecific, +# or (if using GCC's "emutls" feature) exactly the same. So +# we always use it if available. + +cat > $TMPC << EOF +__thread int x; + +int main(void) +{ + x = 42; + return x; +} +EOF +if compile_prog "-Werror" "" ; then + tls=yes +else + tls=no +fi + +########################################## +# Position Independent executables + if test "$static" = "yes" ; then if test "$pie" = "yes" ; then error_exit "static and pie are mutually incompatible" @@ -1260,19 +1284,18 @@ if test "$pie" = ""; then fi if test "$pie" != "no" ; then + if test "$CONFIG_TLS" = yes; then + THREAD=__thread + else + THREAD= + fi cat > $TMPC << EOF - -#ifdef __linux__ -# define THREAD __thread -#else -# define THREAD -#endif - -static THREAD int tls_var; +static $THREAD int tls_var; int main(void) { return tls_var; } EOF + unset THREAD if compile_prog "-fPIE -DPIE" "-pie"; then QEMU_CFLAGS="-fPIE -DPIE $QEMU_CFLAGS" LDFLAGS="-pie $LDFLAGS" @@ -3184,6 +3207,22 @@ if test "$trace_backend" = "dtrace"; then fi ########################################## +# check for TLS runtime + +# Some versions of mingw include the "magic" definitions that make +# TLS work, some don't. Check for it. + +if test "$mingw32" = yes; then + cat > $TMPC << EOF +int main(void) { return 0; } +EOF + compile_prog "" "" + if $nm $TMPE | grep _tls_used > /dev/null 2>&1; then + mingw32_tls_runtime=yes + fi +fi + +########################################## # check and set a backend for coroutine # We prefer ucontext, but it's not always possible. The fallback @@ -3677,6 +3716,9 @@ if test "$mingw32" = "yes" ; then version_micro=0 echo "CONFIG_FILEVERSION=$version_major,$version_minor,$version_subminor,$version_micro" >> $config_host_mak echo "CONFIG_PRODUCTVERSION=$version_major,$version_minor,$version_subminor,$version_micro" >> $config_host_mak + if test "$mingw32_tls_runtime" = yes; then + echo "CONFIG_MINGW32_TLS_RUNTIME=y" >> $config_host_mak + fi else echo "CONFIG_POSIX=y" >> $config_host_mak fi @@ -3979,6 +4021,10 @@ if test "$cpuid_h" = "yes" ; then echo "CONFIG_CPUID_H=y" >> $config_host_mak fi +if test "$tls" = "yes" ; then + echo "CONFIG_TLS=y" >> $config_host_mak +fi + if test "$int128" = "yes" ; then echo "CONFIG_INT128=y" >> $config_host_mak fi @@ -4107,6 +4153,7 @@ echo "OBJCC=$objcc" >> $config_host_mak echo "AR=$ar" >> $config_host_mak echo "AS=$as" >> $config_host_mak echo "CPP=$cpp" >> $config_host_mak +echo "NM=$nm" >> $config_host_mak echo "OBJCOPY=$objcopy" >> $config_host_mak echo "LD=$ld" >> $config_host_mak echo "WINDRES=$windres" >> $config_host_mak diff --git a/include/qemu/tls.h b/include/qemu/tls.h index b92ea9d..c878aaa 100644 --- a/include/qemu/tls.h +++ b/include/qemu/tls.h @@ -1,7 +1,7 @@ /* * Abstraction layer for defining and using TLS variables * - * Copyright (c) 2011 Red Hat, Inc + * Copyright (c) 2011, 2013 Red Hat, Inc * Copyright (c) 2011 Linaro Limited * * Authors: @@ -25,28 +25,117 @@ #ifndef QEMU_TLS_H #define QEMU_TLS_H -/* Per-thread variables. Note that we only have implementations - * which are really thread-local on Linux; the dummy implementations - * define plain global variables. +#ifdef CONFIG_WIN32 + +/* Do not use GCC's "emutls" path on Windows, it is slower. + * + * The initial contents of TLS variables are placed in the .tls section. + * The linker takes all section starting with ".tls$", sorts them and puts + * the contents in a single ".tls" section. qemu-thread-win32.c defines + * special symbols in .tls$000 and .tls$ZZZ that represent the beginning + * and end of TLS memory. The linker and run-time library then cooperate + * to copy memory between those symbols in the TLS area of new threads. * - * This means that for the moment use should be restricted to - * per-VCPU variables, which are OK because: - * - the only -user mode supporting multiple VCPU threads is linux-user - * - TCG system mode is single-threaded regarding VCPUs - * - KVM system mode is multi-threaded but limited to Linux + * _tls_index holds the number of our module. The executable should be + * zero, DLLs are numbered 1 and up. The loader fills it in for us. * - * TODO: proper implementations via Win32 .tls sections and - * POSIX pthread_getspecific. + * Thus, Teb->ThreadLocalStoragePointer[_tls_index] is the base of + * the TLS segment for this (thread, module) pair. Each segment has + * the same layout as this module's .tls segment and is initialized + * with the content of the .tls segment; 0 is the _tls_start variable. + * So, get_##x passes us the offset of the passed variable relative to + * _tls_start, and we return that same offset plus the base of segment. */ -#ifdef __linux__ -#define DECLARE_TLS(type, x) extern DEFINE_TLS(type, x) -#define DEFINE_TLS(type, x) __thread __typeof__(type) tls__##x -#define tls_var(x) tls__##x + +typedef struct _TEB { + NT_TIB NtTib; + void *EnvironmentPointer; + void *x[3]; + char **ThreadLocalStoragePointer; +} TEB, *PTEB; + +extern int _tls_index; +extern int _tls_start; + +static inline void *tls_var(size_t offset) +{ + PTEB Teb = NtCurrentTeb(); + return (char *)(Teb->ThreadLocalStoragePointer[_tls_index]) + offset; +} + +#define DECLARE_TLS(type, x) \ +extern typeof(type) tls_##x __attribute__((section(".tls$QEMU"))); \ + \ +static inline typeof(type) *tls_get_##x(void) \ +{ \ + return tls_var((ULONG_PTR)&(tls_##x) - (ULONG_PTR)&_tls_start); \ +} \ + \ +static inline typeof(type) *tls_alloc_##x(void) \ +{ \ + typeof(type) *addr = get_##x(); \ + memset((void *)addr, 0, sizeof(type)); \ + return addr; \ +} \ + \ +extern int glue(dummy_, __LINE__) + +#define DEFINE_TLS(type, x) \ +typeof(type) tls_##x __attribute__((section(".tls$QEMU"))) + +#elif defined CONFIG_TLS +#define DECLARE_TLS(type, x) \ +extern __thread typeof(type) x; \ + \ +static inline typeof(type) *tls_get_##x(void) \ +{ \ + return &x; \ +} \ + \ +static inline typeof(type) *tls_alloc_##x(void) \ +{ \ + return &x; \ +} \ + \ +extern int glue(dummy_, __LINE__) + +#define DEFINE_TLS(type, x) \ +__thread typeof(type) x + +#elif defined CONFIG_POSIX +typedef struct QEMUTLSValue { + pthread_key_t k; + pthread_once_t o; +} QEMUTLSValue; + +#define DECLARE_TLS(type, x) \ +extern QEMUTLSValue x; \ +extern void tls_init_##x(void); \ + \ +static inline typeof(type) *tls_get_##x(void) \ +{ \ + return pthread_getspecific(x.k); \ +} \ + \ +static inline typeof(type) *tls_alloc_##x(void) \ +{ \ + void *datum = g_malloc0(sizeof(type)); \ + pthread_once(&x.o, tls_init_##x); \ + pthread_setspecific(x.k, datum); \ + return datum; \ +} \ + \ +extern int glue(dummy_, __LINE__) + +#define DEFINE_TLS(type, x) \ +void tls_init_##x(void) { \ + pthread_key_create(&x.k, g_free); \ +} \ + \ +QEMUTLSValue x = { .o = PTHREAD_ONCE_INIT } + #else -/* Dummy implementations which define plain global variables */ -#define DECLARE_TLS(type, x) extern DEFINE_TLS(type, x) -#define DEFINE_TLS(type, x) __typeof__(type) tls__##x -#define tls_var(x) tls__##x +#error No TLS abstraction available on this platform #endif #endif diff --git a/include/qom/cpu.h b/include/qom/cpu.h index 0d6e95c..5d0d40b 100644 --- a/include/qom/cpu.h +++ b/include/qom/cpu.h @@ -203,7 +203,7 @@ struct CPUState { extern CPUState *first_cpu; DECLARE_TLS(CPUState *, current_cpu); -#define current_cpu tls_var(current_cpu) +//#define current_cpu tls_var(current_cpu) /** * cpu_paging_enabled: diff --git a/tests/Makefile b/tests/Makefile index 4d68d28..d1defbb 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -47,7 +47,7 @@ gcov-files-test-mul64-y = util/host-utils.c check-unit-y += tests/test-tls$(EXESUF) # all code tested by test-tls is inside tls.h gcov-files-test-tls-y = -check-unit-y += tests/test-int128$(EXESUF) +#check-unit-y += tests/test-int128$(EXESUF) # all code tested by test-int128 is inside int128.h gcov-files-test-int128-y = check-unit-y += tests/rcutorture$(EXESUF) diff --git a/tests/test-tls.c b/tests/test-tls.c new file mode 100644 index 0000000..26a9ec7 --- /dev/null +++ b/tests/test-tls.c @@ -0,0 +1,87 @@ +/* + * Unit-tests for TLS wrappers + * + * Copyright (C) 2013 Red Hat Inc. + * + * Authors: + * Paolo Bonzini <pbonz...@redhat.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include <glib.h> +#include <errno.h> +#include <string.h> + +#include "qemu-common.h" +#include "qemu/atomic.h" +#include "qemu/thread.h" +#include "qemu/tls.h" + +DECLARE_TLS(volatile long long, cnt); +DEFINE_TLS(volatile long long, cnt); + +#define NUM_THREADS 10 + +int stop; + +static void *test_thread(void *arg) +{ + volatile long long *p_cnt = tls_alloc_cnt(); + volatile long long **p_ret = arg; + long long exp = 0; + + g_assert(tls_get_cnt() == p_cnt); + *p_ret = p_cnt; + g_assert(*p_cnt == 0); + while (atomic_mb_read(&stop) == 0) { + exp++; + (*p_cnt)++; + g_assert(*tls_get_cnt() == exp); + } + + return NULL; +} + +static void test_tls(void) +{ + volatile long long *addr[NUM_THREADS]; + QemuThread t[NUM_THREADS]; + int i; + + for (i = 0; i < NUM_THREADS; i++) { + qemu_thread_create(&t[i], test_thread, &addr[i], QEMU_THREAD_JOINABLE); + } + g_usleep(1000000); + atomic_mb_set(&stop, 1); + for (i = 0; i < NUM_THREADS; i++) { + qemu_thread_join(&t[i]); + } + for (i = 1; i < NUM_THREADS; i++) { + g_assert(addr[i] != addr[i - 1]); + } +} + +int main(int argc, char **argv) +{ + g_test_init(&argc, &argv, NULL); + + g_test_add_func("/tls", test_tls); + return g_test_run(); +} -- 1.8.3.1