this adds a debug configure switch to enable monitoring of the maximum used stack size by all coroutines.
Signed-off-by: Peter Lieven <p...@kamp.de> --- configure | 18 ++++++++++++++++++ util/coroutine-ucontext.c | 40 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+) diff --git a/configure b/configure index 5929aba..82bcc25 100755 --- a/configure +++ b/configure @@ -296,6 +296,7 @@ libiscsi="" libnfs="" coroutine="" coroutine_pool="" +coroutine_stack_size_debug="no" seccomp="" glusterfs="" glusterfs_xlator_opt="no" @@ -1004,6 +1005,8 @@ for opt do ;; --enable-coroutine-pool) coroutine_pool="yes" ;; + --enable-coroutine-stack-size-debug) coroutine_stack_size_debug="yes" + ;; --disable-docs) docs="no" ;; --enable-docs) docs="yes" @@ -1361,6 +1364,8 @@ disabled with --disable-FEATURE, default is enabled if available: (for reading bzip2-compressed dmg images) seccomp seccomp support coroutine-pool coroutine freelist (better performance) + coroutine-stack-size-debug + report coroutine max stack usage (only for debugging) glusterfs GlusterFS backend archipelago Archipelago backend tpm TPM support @@ -4298,6 +4303,15 @@ fi if test "$coroutine" = "gthread" -a "$coroutine_pool" = "yes"; then error_exit "'gthread' coroutine backend does not support pool (use --disable-coroutine-pool)" fi +if test "$coroutine_stack_size_debug" = "yes"; then + if test "$coroutine" != "ucontext"; then + error_exit "coroutine stack size debugging currently only works with ucontext" + fi + if test "$coroutine_pool" = "yes"; then + echo "WARN: disabling coroutine pool for stack size debugging" + coroutine_pool=no + fi +fi ########################################## # check if we have open_by_handle_at @@ -4866,6 +4880,7 @@ echo "QGA MSI support $guest_agent_msi" echo "seccomp support $seccomp" echo "coroutine backend $coroutine" echo "coroutine pool $coroutine_pool" +echo "coroutine stack size debug $coroutine_stack_size_debug" echo "GlusterFS support $glusterfs" echo "Archipelago support $archipelago" echo "gcov $gcov_tool" @@ -5335,6 +5350,9 @@ if test "$coroutine_pool" = "yes" ; then else echo "CONFIG_COROUTINE_POOL=0" >> $config_host_mak fi +if test "$coroutine_stack_size_debug" = "yes" ; then + echo "CONFIG_COROUTINE_STACK_SIZE_DEBUG=y" >> $config_host_mak +fi if test "$open_by_handle_at" = "yes" ; then echo "CONFIG_OPEN_BY_HANDLE=y" >> $config_host_mak diff --git a/util/coroutine-ucontext.c b/util/coroutine-ucontext.c index 841e7db..27c61f3 100644 --- a/util/coroutine-ucontext.c +++ b/util/coroutine-ucontext.c @@ -31,6 +31,10 @@ #include <valgrind/valgrind.h> #endif +#ifdef CONFIG_COROUTINE_STACK_SIZE_DEBUG +#include "qemu/error-report.h" +#endif + typedef struct { Coroutine base; void *stack; @@ -48,6 +52,10 @@ typedef struct { static __thread CoroutineUContext leader; static __thread Coroutine *current; +#ifdef CONFIG_COROUTINE_STACK_SIZE_DEBUG +static uint32_t max_stack_usage; +#endif + /* * va_args to makecontext() must be type 'int', so passing * the pointer we need may require several int args. This @@ -88,6 +96,9 @@ Coroutine *qemu_coroutine_new(void) ucontext_t old_uc, uc; sigjmp_buf old_env; union cc_arg arg = {0}; +#ifdef CONFIG_COROUTINE_STACK_SIZE_DEBUG + void *ptr; +#endif /* The ucontext functions preserve signal masks which incurs a * system call overhead. sigsetjmp(buf, 0)/siglongjmp() does not @@ -118,6 +129,13 @@ Coroutine *qemu_coroutine_new(void) co->stack = g_malloc(stack_size); #endif +#ifdef CONFIG_COROUTINE_STACK_SIZE_DEBUG + for (ptr = co->stack + getpagesize(); + ptr < co->stack + COROUTINE_STACK_SIZE; ptr += sizeof(u_int32_t)) { + *(u_int32_t *)ptr = 0xdeadbeaf; + } +#endif + co->base.entry_arg = &old_env; /* stash away our jmp_buf */ uc.uc_link = &old_uc; @@ -161,6 +179,20 @@ void qemu_coroutine_delete(Coroutine *co_) { CoroutineUContext *co = DO_UPCAST(CoroutineUContext, base, co_); +#ifdef CONFIG_COROUTINE_STACK_SIZE_DEBUG + void *ptr; + for (ptr = co->stack + getpagesize(); + ptr < co->stack + COROUTINE_STACK_SIZE; ptr += sizeof(u_int32_t)) { + if (*(u_int32_t *)ptr != 0xdeadbeaf) { + break; + } + } + /* we only want to estimate the max stack usage, the OR will overestimate + * the stack usage, but this is ok here and avoids the usage of a mutex */ + atomic_or(&max_stack_usage, + COROUTINE_STACK_SIZE - (uintptr_t) (ptr - co->stack)); +#endif + #ifdef CONFIG_VALGRIND_H valgrind_stack_deregister(co); #endif @@ -210,3 +242,11 @@ bool qemu_in_coroutine(void) { return current && current->caller; } + +#ifdef CONFIG_COROUTINE_STACK_SIZE_DEBUG +static void __attribute__((destructor)) print_max_stack_usage(void) +{ + error_report("coroutine-ucontext: max stack usage was less or equal to " + "%"PRIu32" bytes.", max_stack_usage); +} +#endif -- 1.9.1