[Qemu-devel] [PATCH 2/7] coroutine: add x86 specific coroutine backend

Paolo Bonzini Wed, 13 Mar 2019 05:42:15 -0700

This backend is faster (100ns vs 150ns per switch on my laptop), but
especially it will be possible to add CET support to it.


Unlike the ucontext backend, it does not save any register state;
all registers are clobbered in the CO_SWITCH asm, and as a result
the compiler automatically saves/restores caller-save registers in
qemu_coroutine_switch.  This however means that it does not have register
mangling, which is nicely provided by glibc to the ucontext backend.

Signed-off-by: Paolo Bonzini <pbonz...@redhat.com>
---
 configure                        |  10 ++
 scripts/qemugdb/coroutine.py     |   5 +-
 scripts/qemugdb/coroutine_x86.py |  21 +++
 util/Makefile.objs               |   1 +
 util/coroutine-x86.c             | 220 +++++++++++++++++++++++++++++++
 5 files changed, 257 insertions(+), 2 deletions(-)
 create mode 100644 scripts/qemugdb/coroutine_x86.py
 create mode 100644 util/coroutine-x86.c

diff --git a/configure b/configure
index cab830a4c9..42a7e479fd 100755
--- a/configure
+++ b/configure
@@ -5098,6 +5098,8 @@ fi
 if test "$coroutine" = ""; then
   if test "$mingw32" = "yes"; then
     coroutine=win32
+  elif test "$cpu" = "x86_64"; then
+     coroutine=x86
   elif test "$ucontext_works" = "yes"; then
     coroutine=ucontext
   else
@@ -5123,6 +5125,14 @@ else
       error_exit "only the 'windows' coroutine backend is valid for Windows"
     fi
     ;;
+  x86)
+    if test "$mingw32" = "yes"; then
+      error_exit "only the 'windows' coroutine backend is valid for Windows"
+    fi
+    if test "$cpu" != "x86_64"; then
+      error_exit "the 'x86' backend is only valid for x86_64 hosts"
+    fi
+    ;;
   *)
     error_exit "unknown coroutine backend $coroutine"
     ;;
diff --git a/scripts/qemugdb/coroutine.py b/scripts/qemugdb/coroutine.py
index db2753d949..f716db22bb 100644
--- a/scripts/qemugdb/coroutine.py
+++ b/scripts/qemugdb/coroutine.py
@@ -10,14 +10,15 @@
 # This work is licensed under the terms of the GNU GPL, version 2
 # or later.  See the COPYING file in the top-level directory.
 
-from . import coroutine_ucontext
+from . import coroutine_ucontext, coroutine_x86
 import gdb
 
 VOID_PTR = gdb.lookup_type('void').pointer()
 UINTPTR_T = gdb.lookup_type('uintptr_t')
 
 backends = {
-    'CoroutineUContext': coroutine_ucontext
+    'CoroutineUContext': coroutine_ucontext,
+    'CoroutineX86': coroutine_x86
 }
 
 def coroutine_backend():
diff --git a/scripts/qemugdb/coroutine_x86.py b/scripts/qemugdb/coroutine_x86.py
new file mode 100644
index 0000000000..05f830cdb8
--- /dev/null
+++ b/scripts/qemugdb/coroutine_x86.py
@@ -0,0 +1,21 @@
+#!/usr/bin/python
+
+# GDB debugging support
+#
+# Copyright 2019 Red Hat, Inc.
+#
+# Authors:
+#  Paolo Bonzini <pbonz...@redhat.com>
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or
+# later.  See the COPYING file in the top-level directory.
+
+import gdb
+
+U64_PTR = gdb.lookup_type('uint64_t').pointer()
+
+def get_coroutine_regs(addr):
+    addr = addr.cast(gdb.lookup_type('CoroutineX86').pointer())
+    rsp = addr['sp'].cast(U64_PTR)
+    return {'rsp': rsp,
+            'rip': rsp.dereference()}
diff --git a/util/Makefile.objs b/util/Makefile.objs
index 835fcd69e2..0808d86a19 100644
--- a/util/Makefile.objs
+++ b/util/Makefile.objs
@@ -38,6 +38,7 @@ util-obj-$(CONFIG_MEMBARRIER) += sys_membarrier.o
 util-obj-y += qemu-coroutine.o qemu-coroutine-lock.o qemu-coroutine-io.o
 util-obj-y += qemu-coroutine-sleep.o
 util-obj-y += coroutine-$(CONFIG_COROUTINE_BACKEND).o
+coroutine-x86.o-cflags := -mno-red-zone
 util-obj-y += buffer.o
 util-obj-y += timed-average.o
 util-obj-y += base64.o
diff --git a/util/coroutine-x86.c b/util/coroutine-x86.c
new file mode 100644
index 0000000000..bcb9666700
--- /dev/null
+++ b/util/coroutine-x86.c
@@ -0,0 +1,220 @@
+/*
+ * x86-specific coroutine initialization code
+ *
+ * Copyright (C) 2006  Anthony Liguori <anth...@codemonkey.ws>
+ * Copyright (C) 2011  Kevin Wolf <kw...@redhat.com>
+ * Copyright (C) 2019  Paolo Bonzini <pbonz...@redhat.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.0 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "qemu/coroutine_int.h"
+
+#ifdef CONFIG_VALGRIND_H
+#include <valgrind/valgrind.h>
+#endif
+
+#if defined(__SANITIZE_ADDRESS__) || __has_feature(address_sanitizer)
+#ifdef CONFIG_ASAN_IFACE_FIBER
+#define CONFIG_ASAN 1
+#include <sanitizer/asan_interface.h>
+#endif
+#endif
+
+#define COROUTINE_SHADOW_STACK_SIZE    4096
+
+typedef struct {
+    Coroutine base;
+    void *sp;
+
+    void *stack;
+    size_t stack_size;
+
+#ifdef CONFIG_VALGRIND_H
+    unsigned int valgrind_stack_id;
+#endif
+} CoroutineX86;
+
+/**
+ * Per-thread coroutine bookkeeping
+ */
+static __thread CoroutineX86 leader;
+static __thread Coroutine *current;
+
+static void finish_switch_fiber(void *fake_stack_save)
+{
+#ifdef CONFIG_ASAN
+    const void *bottom_old;
+    size_t size_old;
+
+    __sanitizer_finish_switch_fiber(fake_stack_save, &bottom_old, &size_old);
+
+    if (!leader.stack) {
+        leader.stack = (void *)bottom_old;
+        leader.stack_size = size_old;
+    }
+#endif
+}
+
+static void start_switch_fiber(void **fake_stack_save,
+                               const void *bottom, size_t size)
+{
+#ifdef CONFIG_ASAN
+    __sanitizer_start_switch_fiber(fake_stack_save, bottom, size);
+#endif
+}
+
+/*
+ * We hardcode all operands to specific registers so that we can write down 
all the
+ * others in the clobber list.  Note that action also needs to be hardcoded so 
that
+ * it is the same register in all expansions of this macro.  Also, we use %rdi
+ * for the coroutine because that is the ABI's first argument register;
+ * coroutine_trampoline can then retrieve the current coroutine from there.
+ *
+ * Note that push and call would clobber the red zone.  Makefile.objs compiles 
this
+ * file with -mno-red-zone.  The alternative is to subtract/add 128 bytes from 
rsp
+ * around the switch, with slightly lower cache performance.
+ */
+#define CO_SWITCH(from, to, action, jump) ({                                   
       \
+    int action_ = action;                                                      
       \
+    void *from_ = from;                                                        
       \
+    void *to_ = to;                                                            
       \
+    asm volatile(                                                              
       \
+        ".cfi_remember_state\n"                                                
       \
+        "pushq %%rbp\n"                     /* save frame register on source 
stack */ \
+        ".cfi_adjust_cfa_offset 8\n"                                           
       \
+        ".cfi_rel_offset %%rbp, 0\n"                                           
       \
+        "call 1f\n"                         /* switch continues at label 1 */  
       \
+        "jmp 2f\n"                          /* switch back continues at label 
2 */    \
+                                                                               
       \
+        "1: .cfi_adjust_cfa_offset 8\n"                                        
       \
+        "movq %%rsp, %c[SP](%[FROM])\n"     /* save source SP */               
       \
+        "movq %c[SP](%[TO]), %%rsp\n"       /* load destination SP */          
       \
+        jump "\n"                           /* coroutine switch */             
       \
+                                                                               
       \
+        "2: .cfi_adjust_cfa_offset -8\n"                                       
       \
+        "popq %%rbp\n"                                                         
       \
+        ".cfi_adjust_cfa_offset -8\n"                                          
       \
+        ".cfi_restore_state\n"                                                 
       \
+        : "+a" (action_), [FROM] "+b" (from_), [TO] "+D" (to_)                 
       \
+        : [SP] "i" (offsetof(CoroutineX86, sp))                                
       \
+        : "rcx", "rdx", "rsi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", 
"r15",  \
+          "memory");                                                           
       \
+    action_;                                                                   
       \
+})
+
+static void __attribute__((__used__)) coroutine_trampoline(CoroutineX86 *self)
+{
+    finish_switch_fiber(NULL);
+
+    while (true) {
+        Coroutine *co = &self->base;
+        qemu_coroutine_switch(co, co->caller, COROUTINE_TERMINATE);
+        co->entry(co->entry_arg);
+    }
+}
+
+Coroutine *qemu_coroutine_new(void)
+{
+    CoroutineX86 *co;
+    void *fake_stack_save = NULL;
+
+    co = g_malloc0(sizeof(*co));
+    co->stack_size = COROUTINE_STACK_SIZE;
+    co->stack = qemu_alloc_stack(&co->stack_size);
+    co->sp = co->stack + co->stack_size;
+
+#ifdef CONFIG_VALGRIND_H
+    co->valgrind_stack_id =
+        VALGRIND_STACK_REGISTER(co->stack, co->stack + co->stack_size);
+#endif
+
+    /* Immediately enter the coroutine once to pass it its address as the 
argument */
+    co->base.caller = qemu_coroutine_self();
+    start_switch_fiber(&fake_stack_save, co->stack, co->stack_size);
+    CO_SWITCH(current, co, 0, "jmp coroutine_trampoline");
+    finish_switch_fiber(fake_stack_save);
+    co->base.caller = NULL;
+
+    return &co->base;
+}
+
+#ifdef CONFIG_VALGRIND_H
+#if defined(CONFIG_PRAGMA_DIAGNOSTIC_AVAILABLE) && !defined(__clang__)
+/* Work around an unused variable in the valgrind.h macro... */
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
+#endif
+static inline void valgrind_stack_deregister(CoroutineX86 *co)
+{
+    VALGRIND_STACK_DEREGISTER(co->valgrind_stack_id);
+}
+#if defined(CONFIG_PRAGMA_DIAGNOSTIC_AVAILABLE) && !defined(__clang__)
+#pragma GCC diagnostic pop
+#endif
+#endif
+
+void qemu_coroutine_delete(Coroutine *co_)
+{
+    CoroutineX86 *co = DO_UPCAST(CoroutineX86, base, co_);
+
+#ifdef CONFIG_VALGRIND_H
+    valgrind_stack_deregister(co);
+#endif
+
+    qemu_free_stack(co->stack, co->stack_size);
+    g_free(co);
+}
+
+/*
+ * This function is marked noinline to prevent GCC from inlining it
+ * into coroutine_trampoline(). If we allow it to do that then it
+ * hoists the code to get the address of the TLS variable "current"
+ * out of the while() loop. This is an invalid transformation because
+ * qemu_coroutine_switch() may be called when running thread A but
+ * return in thread B, and so we might be in a different thread
+ * context each time round the loop.
+ */
+CoroutineAction __attribute__((noinline))
+qemu_coroutine_switch(Coroutine *from_, Coroutine *to_,
+                      CoroutineAction action)
+{
+    CoroutineX86 *from = DO_UPCAST(CoroutineX86, base, from_);
+    CoroutineX86 *to = DO_UPCAST(CoroutineX86, base, to_);
+    void *fake_stack_save = NULL;
+
+    current = to_;
+
+    start_switch_fiber(action == COROUTINE_TERMINATE ?
+                       NULL : &fake_stack_save, to->stack, to->stack_size);
+    action = CO_SWITCH(from, to, action, "ret");
+    finish_switch_fiber(fake_stack_save);
+
+    return action;
+}
+
+Coroutine *qemu_coroutine_self(void)
+{
+    if (!current) {
+        current = &leader.base;
+    }
+    return current;
+}
+
+bool qemu_in_coroutine(void)
+{
+    return current && current->caller;
+}
-- 
2.20.1

[Qemu-devel] [PATCH 2/7] coroutine: add x86 specific coroutine backend

Reply via email to