On 10/05/2011 09:52 AM, Jan Kiszka wrote:
Yeah, it probably makes sense to build the abstractions around __thread
so that - one day - we can drop the legacy wrappers.
Just do not prepend "tls__" in the gcc model
Actually I did that on purpose so that people would not forget get_tls. :)
(there is also some inconsistency with prefixes in patch 3).
Yep, the attached v2 actually builds. I also needed a small change to
avoid errors with -Wredundant-decls, and I changed it to support arrays with
DECLARE_TLS(int[10], array);
And avoid leading "_" unless
they are dictated by the platform.
Ok, I replaced tls_init_thread with tls_init_main_thread and
_tls_init_thread with tls_init_thread.
And patch 3 needs to update darwin-user/main.c as well.
I think the declaration can just be removed.
What is the default priority of constructors BTW? You picked the
highest, will others that do not specify one have the same?
Looks like the prioritized constructors always run _before_ the others,
which is good.
$ cat f.c
int f(void) __attribute__((constructor(101)));
int f(void) { write (1, "101\n", 4); }
int h(void) __attribute__((constructor));
int h(void) { write (1, "default\n", 8); }
int g(void) __attribute__((constructor(102)));
int g(void) { write (1, "102\n", 4); }
int main() { write(1, "main\n", 5); }
$ gcc f.c
$ ./a.out
101
102
default
main
If interested people can test the patches more and submit them more
formally, I'd be very glad. I wrote it for RCU, but of course that one
is not really going to be 1.0 material (even for 9p).
Paolo
>From 497ed0672f7fe08d9654a0e5c11b682bea43a59e Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonz...@redhat.com>
Date: Wed, 5 Oct 2011 08:29:39 +0200
Subject: [PATCH 0/3] *** SUBJECT HERE ***
*** BLURB HERE ***
Paolo Bonzini (3):
qemu-threads: add TLS wrappers
Prepare Windows port for thread-local cpu_single_env
Make cpu_single_env thread-local
configure | 20 +++++++++++++++++
coroutine-win32.c | 7 ++++-
cpu-all.h | 4 ++-
cpus.c | 13 +++++++---
exec.c | 2 +-
qemu-thread-posix.c | 42 ++++++++++++++++++++++++++++++++---
qemu-thread-win32.c | 16 +++++++++++++
qemu-tls-gcc.h | 25 +++++++++++++++++++++
qemu-tls-pthread.h | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++
qemu-tls-win32.h | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++
10 files changed, 234 insertions(+), 12 deletions(-)
create mode 100644 qemu-tls-gcc.h
create mode 100644 qemu-tls-pthread.h
create mode 100644 qemu-tls-win32.h
--
1.7.6
>From d8c3c4e789f9b86a66042a9181333e1a096b6b93 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonz...@redhat.com>
Date: Tue, 16 Aug 2011 10:37:44 -0700
Subject: [PATCH 1/3] qemu-threads: add TLS wrappers
Win32 emulated TLS is slow and is not available on all versions of GCC;
some versions of Unix only have pthread_getspecific as a means to access
TLS.
Actually, Win32 does have support for decent TLS, and GCC does not map
__thread to it. But kind of unlike ELF TLS, it's perfectly possible
to declare TLS variables with simple C code! For pthread_getspecific
we similarly allocate a memory block; we have to compute all the offsets
at load time, which is also cheaper than doing a pthread_key_create for
each variable. Not optimal, but it works.
This patch adds wrappers to qemu-thread that will use __thread or
pthread_getspecific on POSIX systems, and the .tls segment on Windows.
It does kinda uglify the declarations, but not too much.
Signed-off-by: Paolo Bonzini <pbonz...@redhat.com>
---
configure | 20 +++++++++++++++++
coroutine-win32.c | 7 ++++-
qemu-thread-posix.c | 42 ++++++++++++++++++++++++++++++++---
qemu-thread-win32.c | 16 +++++++++++++
qemu-tls-gcc.h | 25 +++++++++++++++++++++
qemu-tls-pthread.h | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++
qemu-tls-win32.h | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++
7 files changed, 221 insertions(+), 6 deletions(-)
create mode 100644 qemu-tls-gcc.h
create mode 100644 qemu-tls-pthread.h
create mode 100644 qemu-tls-win32.h
diff --git a/configure b/configure
index 59b1494..50d7b54 100755
--- a/configure
+++ b/configure
@@ -1215,6 +1215,23 @@ EOF
fi
##########################################
+# __thread check
+
+if test "$mingw32" = "yes" ; then
+ tls_model=win32
+else
+ cat > $TMPC << EOF
+__thread int x;
+int main() { return x; }
+EOF
+ if compile_prog "" "" ; then
+ tls_model=gcc
+ else
+ tls_model=pthread
+ fi
+fi
+
+##########################################
# zlib check
if test "$zlib" != "no" ; then
@@ -2697,6 +2714,7 @@ echo "Documentation $docs"
[ ! -z "$uname_release" ] && \
echo "uname -r $uname_release"
echo "NPTL support $nptl"
+echo "TLS support $tls_model"
echo "GUEST_BASE $guest_base"
echo "PIE user targets $user_pie"
echo "vde support $vde"
@@ -3580,6 +3598,8 @@ if test "$target_linux_user" = "yes" -o "$target_bsd_user" = "yes" ; then
esac
fi
+symlink $source_path/qemu-tls-$tls_model.h qemu-tls.h
+
# use included Linux headers
if test "$linux" = "yes" ; then
includes="-I\$(SRC_PATH)/linux-headers $includes"
diff --git a/coroutine-win32.c b/coroutine-win32.c
index 4179609..708e220 100644
--- a/coroutine-win32.c
+++ b/coroutine-win32.c
@@ -24,6 +24,7 @@
#include "qemu-common.h"
#include "qemu-coroutine-int.h"
+#include "qemu-tls.h"
typedef struct
{
@@ -33,8 +34,10 @@ typedef struct
CoroutineAction action;
} CoroutineWin32;
-static __thread CoroutineWin32 leader;
-static __thread Coroutine *current;
+static DEFINE_TLS(CoroutineWin32, tls_leader);
+static DEFINE_TLS(Coroutine *, tls_current);
+#define leader get_tls(tls_leader)
+#define current get_tls(tls_current)
CoroutineAction qemu_coroutine_switch(Coroutine *from_, Coroutine *to_,
CoroutineAction action)
diff --git a/qemu-thread-posix.c b/qemu-thread-posix.c
index ac3c0c9..acd04ff 100644
--- a/qemu-thread-posix.c
+++ b/qemu-thread-posix.c
@@ -18,6 +18,9 @@
#include <stdint.h>
#include <string.h>
#include "qemu-thread.h"
+#include "qemu-common.h"
+#include "qemu-tls.h"
+#include "qemu-barrier.h"
static void error_exit(int err, const char *msg)
{
@@ -115,18 +118,44 @@ void qemu_cond_wait(QemuCond *cond, QemuMutex *mutex)
error_exit(err, __func__);
}
+size_t tls_size;
+pthread_key_t tls_key;
+
+static void __attribute__((constructor(102))) tls_init_main_thread(void)
+{
+ /* It's easier to always create the key, even if using GCC tls. */
+ pthread_key_create(&tls_key, g_free);
+ tls_init_thread();
+}
+
+typedef struct QemuThreadData {
+ void *(*start_routine)(void *);
+ void *arg;
+} QemuThreadData;
+
+static void *start_routine_wrapper(void *arg)
+{
+ QemuThreadData args = *(QemuThreadData *) arg;
+ g_free(arg);
+ tls_init_thread();
+ return args.start_routine(args.arg);
+}
+
void qemu_thread_create(QemuThread *thread,
- void *(*start_routine)(void*),
+ void *(*start_routine)(void *),
void *arg)
{
+ sigset_t set, oldset;
+ QemuThreadData *args = g_malloc(sizeof(QemuThreadData));
int err;
- /* Leave signal handling to the iothread. */
- sigset_t set, oldset;
+ args->start_routine = start_routine;
+ args->arg = arg;
+ /* Leave signal handling to the iothread. */
sigfillset(&set);
pthread_sigmask(SIG_SETMASK, &set, &oldset);
- err = pthread_create(&thread->thread, NULL, start_routine, arg);
+ err = pthread_create(&thread->thread, NULL, start_routine_wrapper, args);
if (err)
error_exit(err, __func__);
diff --git a/qemu-thread-win32.c b/qemu-thread-win32.c
index db8e744..118d92f 100644
--- a/qemu-thread-win32.c
+++ b/qemu-thread-win32.c
@@ -16,6 +16,22 @@
#include <assert.h>
#include <limits.h>
+/* TLS support. */
+
+int __attribute__((section(".tls$000"))) _tls_start = 0;
+int __attribute__((section(".tls$ZZZ"))) _tls_end = 0;
+int _tls_index = 0;
+
+const IMAGE_TLS_DIRECTORY _tls_used __attribute__((used, section(".rdata$T"))) = {
+ (ULONG)(ULONG_PTR) &_tls_start, /* start of tls data */
+ (ULONG)(ULONG_PTR) &_tls_end, /* end of tls data */
+ (ULONG)(ULONG_PTR) &_tls_index, /* address of tls_index */
+ (ULONG) 0, /* pointer to callbacks */
+ (ULONG) 0, /* size of tls zero fill */
+ (ULONG) 0 /* characteristics */
+};
+
+
static void error_exit(int err, const char *msg)
{
char *pstr;
diff --git a/qemu-tls-gcc.h b/qemu-tls-gcc.h
new file mode 100644
index 0000000..8cff148
--- /dev/null
+++ b/qemu-tls-gcc.h
@@ -0,0 +1,24 @@
+/*
+ * TLS with __thread
+ *
+ * Copyright Red Hat, Inc. 2011
+ *
+ * Authors:
+ * Paolo Bonzini <pbonz...@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef QEMU_TLS_GCC_H
+#define QEMU_TLS_GCC_H
+
+#define DECLARE_TLS(type, x) extern DEFINE_TLS(type, x)
+#define DEFINE_TLS(type, x) __thread __typeof__(type) tls__##x
+#define get_tls(x) tls__##x
+
+static inline size_t tls_init(size_t size, size_t alignment) { return 0; }
+static inline void tls_init_thread(void) {}
+
+#endif
diff --git a/qemu-tls-pthread.h b/qemu-tls-pthread.h
new file mode 100644
index 0000000..ef97528
--- /dev/null
+++ b/qemu-tls-pthread.h
@@ -0,0 +1,59 @@
+/*
+ * TLS with pthread_getspecific
+ *
+ * Copyright Red Hat, Inc. 2011
+ *
+ * Authors:
+ * Paolo Bonzini <pbonz...@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef QEMU_TLS_PTHREAD_H
+#define QEMU_TLS_PTHREAD_H
+
+#include <pthread.h>
+#include <glib.h>
+
+#define DECLARE_TLS(type, x) \
+ extern size_t tls_offset__##x; \
+ extern __typeof__(type) *tls_dummy__##x(void)
+
+#define DEFINE_TLS(type, x) \
+ size_t tls_offset__##x; \
+ static void __attribute__((constructor(101))) tls_init__##x(void) { \
+ tls_offset__##x = tls_init(sizeof(type), __alignof__(type)); \
+ } \
+ extern inline __attribute__((__gnu_inline__)) __typeof__(type) *tls_dummy__##x(void) { \
+ return NULL; \
+ } \
+ extern size_t tls_swallow_semicolon__##x
+
+extern size_t tls_size;
+extern pthread_key_t tls_key;
+
+static inline size_t tls_init(size_t size, size_t alignment)
+{
+ size_t tls_offset = (tls_size + alignment - 1) & -alignment;
+ tls_size = tls_offset + size;
+ return tls_offset;
+}
+
+static inline void tls_init_thread(void)
+{
+ void *mem = tls_size == 0 ? NULL : g_malloc0(tls_size);
+ pthread_setspecific(tls_key, mem);
+}
+
+static inline __attribute__((__const__)) void *_get_tls(size_t offset)
+{
+ char *base = pthread_getspecific(tls_key);
+ return &base[offset];
+}
+
+#define get_tls(x) \
+ (*(__typeof__(tls_dummy__##x())) _get_tls(tls_offset__##x))
+
+#endif
diff --git a/qemu-tls-win32.h b/qemu-tls-win32.h
new file mode 100644
index 0000000..d04d48b
--- /dev/null
+++ b/qemu-tls-win32.h
@@ -0,0 +1,59 @@
+/*
+ * TLS with Win32 .tls sections
+ *
+ * Copyright Red Hat, Inc. 2011
+ *
+ * Authors:
+ * Paolo Bonzini <pbonz...@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef QEMU_TLS_WIN32_H
+#define QEMU_TLS_WIN32_H
+
+#include <windows.h>
+#include <winnt.h>
+
+typedef struct _TEB {
+ NT_TIB NtTib;
+ void *EnvironmentPointer;
+ void *x[3];
+ char **ThreadLocalStoragePointer;
+} TEB, *PTEB;
+
+/* 1) The initial contents TLS variables is placed in the .tls section. */
+
+#define DECLARE_TLS(type, x) extern DEFINE_TLS(type, x)
+#define DEFINE_TLS(type, x) __typeof__(type) tls__##x __attribute__((section(".tls$AAA")))
+
+/* 2) _tls_index holds the number of our module. The executable should be
+ zero, DLLs are numbered 1 and up. The loader fills it in for us. */
+
+extern int _tls_index;
+extern int _tls_start;
+static inline void tls_init_thread(void) {}
+
+/* 3) Thus, Teb->ThreadLocalStoragePointer[_tls_index] is the base of
+ the TLS segment for this (thread, module) pair. Each segment has
+ the same layout as this module's .tls segment and is initialized
+ with the content of the .tls segment; 0 is the _tls_start variable.
+ So, get_tls passes us the offset of the passed variable relative to
+ _tls_start, and we return that same offset plus the base of segment. */
+
+static inline __attribute__((__const__)) void *_get_tls(size_t offset)
+{
+ PTEB Teb = NtCurrentTeb();
+ return (char *)(Teb->ThreadLocalStoragePointer[_tls_index]) + offset;
+}
+
+/* 4) get_tls, in addition to computing the offset, returns an lvalue.
+ "I got it. Magic." */
+
+#define get_tls(x) \
+ (*(__typeof__(tls__##x) *) \
+ _get_tls((ULONG_PTR)&(tls__##x) - (ULONG_PTR)&_tls_start))
+
+#endif
--
1.7.6
>From b10531473a833cf5e925f00461134b0bcd2295bb Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonz...@redhat.com>
Date: Mon, 29 Aug 2011 17:03:55 +0200
Subject: [PATCH 2/3] Prepare Windows port for thread-local cpu_single_env
Windows does not execute cpu_signal in VCPU-thread context,
so it won't be able to use cpu_single_env there. However,
it has the CPUState available, so nothing is lost.
Signed-off-by: Paolo Bonzini <pbonz...@redhat.com>
---
cpus.c | 13 +++++++++----
1 files changed, 9 insertions(+), 4 deletions(-)
diff --git a/cpus.c b/cpus.c
index 8978779..822ce7a 100644
--- a/cpus.c
+++ b/cpus.c
@@ -176,10 +176,10 @@ static void cpu_handle_guest_debug(CPUState *env)
env->stopped = 1;
}
-static void cpu_signal(int sig)
+static inline void do_cpu_kick(CPUState *env)
{
- if (cpu_single_env) {
- cpu_exit(cpu_single_env);
+ if (env) {
+ cpu_exit(env);
}
exit_request = 1;
}
@@ -437,6 +437,11 @@ static void qemu_kvm_init_cpu_signals(CPUState *env)
}
}
+static void cpu_signal(int sig)
+{
+ do_cpu_kick(cpu_single_env);
+}
+
static void qemu_tcg_init_cpu_signals(void)
{
sigset_t set;
@@ -708,7 +713,7 @@ static void qemu_cpu_kick_thread(CPUState *env)
#else /* _WIN32 */
if (!qemu_cpu_is_self(env)) {
SuspendThread(env->thread->thread);
- cpu_signal(0);
+ do_cpu_kick(env);
ResumeThread(env->thread->thread);
}
#endif
--
1.7.6
>From 6dd053d0acc0f0334432259d989329a4c688fe63 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonz...@redhat.com>
Date: Mon, 29 Aug 2011 17:04:01 +0200
Subject: [PATCH 3/3] Make cpu_single_env thread-local
Signed-off-by: Paolo Bonzini <pbonz...@redhat.com>
---
cpu-all.h | 4 +++-
darwin-user/main.c | 2 --
exec.c | 2 +-
3 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/cpu-all.h b/cpu-all.h
index 42a5fa0..e37ebfc 100644
--- a/cpu-all.h
+++ b/cpu-all.h
@@ -20,6 +20,7 @@
#define CPU_ALL_H
#include "qemu-common.h"
+#include "qemu-tls.h"
#include "cpu-common.h"
/* some important defines:
@@ -334,7 +335,8 @@ void cpu_dump_statistics(CPUState *env, FILE *f, fprintf_function cpu_fprintf,
void QEMU_NORETURN cpu_abort(CPUState *env, const char *fmt, ...)
GCC_FMT_ATTR(2, 3);
extern CPUState *first_cpu;
-extern CPUState *cpu_single_env;
+DECLARE_TLS(CPUState *,tls_cpu_single_env);
+#define cpu_single_env get_tls(tls_cpu_single_env)
/* Flags for use in ENV->INTERRUPT_PENDING.
diff --git a/darwin-user/main.c b/darwin-user/main.c
index 1a881a0..c0f14f8 100644
--- a/darwin-user/main.c
+++ b/darwin-user/main.c
@@ -729,8 +729,6 @@ static void usage(void)
/* XXX: currently only used for async signals (see signal.c) */
CPUState *global_env;
-/* used only if single thread */
-CPUState *cpu_single_env = NULL;
/* used to free thread contexts */
TaskState *first_task_state;
diff --git a/exec.c b/exec.c
index d0cbf15..afc5fe3 100644
--- a/exec.c
+++ b/exec.c
@@ -120,7 +120,7 @@ static MemoryRegion *system_io;
CPUState *first_cpu;
/* current CPU in the current thread. It is only valid inside
cpu_exec() */
-CPUState *cpu_single_env;
+DEFINE_TLS(CPUState *,tls_cpu_single_env);
/* 0 = Do not count executed instructions.
1 = Precise instruction counting.
2 = Adaptive rate instruction counting. */
--
1.7.6