Win32 emulated TLS is slow and is not available on all versions of GCC. Actually, Win32 does have support for decent TLS, only GCC does not map __thread to it. The good news is, it's perfectly possible to declare TLS variables with simple C code!
This patch adds wrappers to qemu-thread that will use __thread on POSIX systems, and the .tls segment on Windows. Signed-off-by: Paolo Bonzini <pbonz...@redhat.com> --- It does kinda uglify the declarations, but not too much IMO. Do we want to go this way? It will certainly limit the number of bug reports we get. coroutine-win32.c | 7 +++++-- qemu-thread-posix.h | 4 ++++ qemu-thread-win32.c | 17 +++++++++++++++++ qemu-thread-win32.h | 40 +++++++++++++++++++++++++++++++++++++++- qemu-thread.h | 2 ++ 5 files changed, 67 insertions(+), 3 deletions(-) diff --git a/coroutine-win32.c b/coroutine-win32.c index 0e29448..7f58432 100644 --- a/coroutine-win32.c +++ b/coroutine-win32.c @@ -24,6 +24,7 @@ #include "qemu-common.h" #include "qemu-coroutine-int.h" +#include "qemu-thread.h" typedef struct { @@ -33,8 +34,10 @@ typedef struct CoroutineAction action; } CoroutineWin32; -static __thread CoroutineWin32 leader; -static __thread Coroutine *current; +static DEFINE_TLS(CoroutineWin32, tls_leader); +static DEFINE_TLS(Coroutine *, tls_current); +#define leader get_tls(tls_leader) +#define current get_tls(tls_current) CoroutineAction qemu_coroutine_switch(Coroutine *from_, Coroutine *to_, CoroutineAction action) diff --git a/qemu-thread-posix.h b/qemu-thread-posix.h index d781ca6..2a302da 100644 --- a/qemu-thread-posix.h +++ b/qemu-thread-posix.h @@ -27,4 +27,8 @@ struct QemuThread { pthread_t thread; }; +#define DEFINE_TLS(type, x) __thread type x +#define DECLARE_TLS(type, x) extern DEFINE_TLS(type, x) +#define get_tls(x) (x) + #endif diff --git a/qemu-thread-win32.c b/qemu-thread-win32.c index f5891bd..3eb6b89 100644 --- a/qemu-thread-win32.c +++ b/qemu-thread-win32.c @@ -17,6 +17,23 @@ #include <assert.h> #include <limits.h> +/* TLS support, not exported by the mingw run-time library. */ + +int __attribute__((section(".tls$000"))) _tls_start = 0; +int __attribute__((section(".tls$ZZZ"))) _tls_end = 0; +int _tls_index = 0; + +const IMAGE_TLS_DIRECTORY _tls_used __attribute__((used, section(".rdata$T"))) = +{ + (ULONG)(ULONG_PTR) &_tls_start, // start of tls data + (ULONG)(ULONG_PTR) &_tls_end, // end of tls data + (ULONG)(ULONG_PTR) &_tls_index, // address of tls_index + (ULONG) 0, // pointer to callbacks + (ULONG) 0, // size of tls zero fill + (ULONG) 0 // characteristics +}; + + static void error_exit(int err, const char *msg) { char *pstr; diff --git a/qemu-thread-win32.h b/qemu-thread-win32.h index 6cb248f..2ffe7de 100644 --- a/qemu-thread-win32.h +++ b/qemu-thread-win32.h @@ -1,6 +1,7 @@ #ifndef __QEMU_THREAD_WIN32_H #define __QEMU_THREAD_WIN32_H 1 -#include "windows.h" +#include <windows.h> +#include <winnt.h> struct QemuMutex { CRITICAL_SECTION lock; @@ -27,4 +28,41 @@ struct QemuThread { void *ret; }; +typedef struct _TEB { + _NT_TIB NtTib; + void* EnvironmentPointer; + void *x[3]; + char **ThreadLocalStoragePointer; +} TEB, *PTEB; + +/* 1) The initial contents TLS variables is placed in the .tls section. */ + +#define DEFINE_TLS(type, x) type tls__##x __attribute__((section(".tls$BBB"))) + +/* 2) _tls_index holds the number of our module. The executable should be + zero, DLLs are numbered 1 and up. The loader fills it in for us. */ + +extern int _tls_index; +extern int _tls_start; + +/* 3) Thus, Teb->ThreadLocalStoragePointer[_tls_index] is the base of + the TLS segment for this (thread, module) pair. Each segment has + the same layout as this module's .tls segment and is initialized + with the content of the .tls segment; 0 is the _tls_start variable. + So, get_tls passes us the offset of the passed variable relative to + _tls_start, and we return that same offset plus the base of segment. */ + +static inline __attribute__((__pure__)) void *_get_tls(int offset) +{ + PTEB Teb = NtCurrentTeb(); + return (char *)(Teb->ThreadLocalStoragePointer[_tls_index]) + offset; +} + +/* 4) get_tls, in addition to computing the offset, returns an lvalue. + "I got it. Magic." */ + +#define get_tls(x) \ + (*(__typeof__(tls__##x) *) \ + _get_tls((ULONG_PTR)&(tls__##x) - (ULONG_PTR)&_tls_start)) + #endif diff --git a/qemu-thread.h b/qemu-thread.h index ae75638..858c8cc 100644 --- a/qemu-thread.h +++ b/qemu-thread.h @@ -49,4 +49,6 @@ void qemu_thread_get_self(QemuThread *thread); int qemu_thread_is_self(QemuThread *thread); void qemu_thread_exit(void *retval); +#define DECLARE_TLS(type, x) extern DEFINE_TLS(type, x) + #endif -- 1.7.6