This is similar in intent to the async_safe_work mechanism. The main differences are:
- Work is run on a single CPU thread *after* all others are put to sleep - Sleeping threads are woken up by the worker thread upon completing its job - A flag as been added to tcg_ctx so that only one thread can schedule work at a time. The flag is checked every time tb_lock is acquired. - Handles the possibility of CPU threads being created after the existing CPUs are put to sleep. This is easily triggered with many threads on a many-core host in usermode. - Works for both softmmu and usermode Signed-off-by: Emilio G. Cota <c...@braap.org> --- cpu-exec.c | 89 +++++++++++++++++++++++++++++++++++++++++++++++++ exec.c | 4 +++ include/exec/exec-all.h | 5 +++ include/qom/cpu.h | 20 +++++++++++ tcg/tcg.h | 1 + translate-all.c | 23 ++++++++++++- 6 files changed, 141 insertions(+), 1 deletion(-) diff --git a/cpu-exec.c b/cpu-exec.c index ff08da8..378ce52 100644 --- a/cpu-exec.c +++ b/cpu-exec.c @@ -393,6 +393,57 @@ static inline void cpu_exit_loop_lock_reset(CPUState *cpu) { } #endif +static inline void cpu_sleep_other(CPUState *cpu, CPUState *curr) +{ + assert(cpu->tcg_sleep_owner == NULL); + qemu_mutex_lock(cpu->tcg_work_lock); + cpu->tcg_sleep_requests++; + cpu->tcg_sleep_owner = curr; + qemu_mutex_unlock(cpu->tcg_work_lock); +#ifdef CONFIG_SOFTMMU + cpu_exit(cpu); +#else + /* cannot call cpu_exit(); cpu->exit_request is not for usermode */ + smp_wmb(); + cpu->tcg_exit_req = 1; +#endif +} + +/* call with no locks held */ +static inline void cpu_sleep_others(CPUState *curr) +{ + CPUState *cpu; + + CPU_FOREACH(cpu) { + if (cpu == curr) { + continue; + } + cpu_sleep_other(cpu, curr); + } + /* wait until all other threads are out of the execution loop */ + synchronize_rcu(); +} + +static inline void cpu_wake_others(CPUState *curr) +{ + CPUState *cpu; + + CPU_FOREACH(cpu) { + if (cpu == curr) { + continue; + } + if (cpu->tcg_sleep_owner != curr) { + assert(!cpu->inited); + continue; + } + qemu_mutex_lock(cpu->tcg_work_lock); + cpu->tcg_sleep_requests--; + cpu->tcg_sleep_owner = NULL; + qemu_cond_signal(cpu->tcg_work_cond); + qemu_mutex_unlock(cpu->tcg_work_lock); + } +} + /* main execution loop */ int cpu_exec(CPUState *cpu) @@ -410,6 +461,44 @@ int cpu_exec(CPUState *cpu) current_cpu = cpu; + /* + * Prevent threads that were created during a TCG work critical section + * (and that therefore didn't have cpu->tcg_work_owner set) from executing. + * What we do is then to not let them run by sending them out of the CPU + * loop until the tcg_work_pending flag goes down. + */ + if (unlikely(!cpu->inited)) { + tb_lock(); + tb_unlock(); + cpu->inited = true; + } + + if (cpu->tcg_work_func) { + cpu_sleep_others(cpu); + /* + * At this point all existing threads are sleeping. + * With the check above we make sure that threads that might be + * concurrently added at this point won't execute until the end of the + * work window, so we can safely call the work function. + */ + cpu->tcg_work_func(cpu->tcg_work_arg); + cpu->tcg_work_func = NULL; + cpu->tcg_work_arg = NULL; + + /* mark the end of the TCG work critical section */ + tb_lock_nocheck(); + tcg_ctx.tb_ctx.work_pending = false; + tb_unlock(); + cpu_wake_others(cpu); + } + + qemu_mutex_lock(cpu->tcg_work_lock); + assert(cpu->tcg_sleep_requests >= 0); + while (unlikely(cpu->tcg_sleep_requests)) { + qemu_cond_wait(cpu->tcg_work_cond, cpu->tcg_work_lock); + } + qemu_mutex_unlock(cpu->tcg_work_lock); + #ifndef CONFIG_USER_ONLY /* FIXME: user-mode emulation probably needs a similar mechanism as well, * for example for tb_flush. diff --git a/exec.c b/exec.c index 58cd096..45a9761 100644 --- a/exec.c +++ b/exec.c @@ -579,6 +579,10 @@ void cpu_exec_init(CPUState *cpu, Error **errp) qemu_mutex_init(&cpu->tb_jmp_cache_lock); seqlock_init(&cpu->tb_jmp_cache_sequence, &cpu->tb_jmp_cache_lock); + cpu->tcg_work_cond = g_malloc(sizeof(*cpu->tcg_work_cond)); + qemu_cond_init(cpu->tcg_work_cond); + cpu->tcg_work_lock = g_malloc(sizeof(*cpu->tcg_work_lock)); + qemu_mutex_init(cpu->tcg_work_lock); #ifndef CONFIG_USER_ONLY cpu->as = &address_space_memory; cpu->thread_id = qemu_get_thread_id(); diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index 050e820..be8315c 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -198,6 +198,11 @@ struct TBContext { int nb_tbs; /* any access to the tbs or the page table must use this lock */ QemuMutex tb_lock; + /* + * This ensures that only one thread can perform safe work at a time. + * Protected by tb_lock; check the flag right after acquiring the lock. + */ + bool work_pending; /* statistics */ int tb_flush_count; diff --git a/include/qom/cpu.h b/include/qom/cpu.h index ab484be..aba7edb 100644 --- a/include/qom/cpu.h +++ b/include/qom/cpu.h @@ -273,6 +273,15 @@ struct CPUState { bool stop; bool stopped; bool cpu_loop_exit_locked; + bool inited; + /* tcg_work_* protected by tcg_work_lock */ + QemuCond *tcg_work_cond; + QemuMutex *tcg_work_lock; + void (*tcg_work_func)(void *arg); + void *tcg_work_arg; + CPUState *tcg_sleep_owner; + int tcg_sleep_requests; + volatile sig_atomic_t exit_request; uint32_t interrupt_request; int singlestep_enabled; @@ -582,6 +591,17 @@ void async_run_safe_work_on_cpu(CPUState *cpu, void (*func)(void *data), bool async_safe_work_pending(void); /** + * cpu_tcg_sched_work: + * @cpu: CPU thread to schedule the work on + * @func: function to be called when all other CPU threads are asleep + * @arg: argument to be passed to @func + * + * Schedule work to be done while all other CPU threads are put to sleep. + * Call with tb_lock held. + */ +void cpu_tcg_sched_work(CPUState *cpu, void (*func)(void *arg), void *arg); + +/** * qemu_get_cpu: * @index: The CPUState@cpu_index value of the CPU to obtain. * diff --git a/tcg/tcg.h b/tcg/tcg.h index 9a873ac..1229f7e 100644 --- a/tcg/tcg.h +++ b/tcg/tcg.h @@ -596,6 +596,7 @@ void tcg_pool_reset(TCGContext *s); void tcg_pool_delete(TCGContext *s); void tb_lock(void); +void tb_lock_nocheck(void); void tb_unlock(void); bool tb_lock_recursive(void); void tb_lock_reset(void); diff --git a/translate-all.c b/translate-all.c index 8f8c402..f3f7fb2 100644 --- a/translate-all.c +++ b/translate-all.c @@ -133,13 +133,24 @@ TCGContext tcg_ctx; /* translation block context */ __thread int have_tb_lock; -void tb_lock(void) +/* acquire tb_lock without checking for pending work */ +void tb_lock_nocheck(void) { assert(!have_tb_lock); qemu_mutex_lock(&tcg_ctx.tb_ctx.tb_lock); have_tb_lock++; } +void tb_lock(void) +{ + tb_lock_nocheck(); + if (unlikely(tcg_ctx.tb_ctx.work_pending)) { + assert(current_cpu); + current_cpu->exception_index = EXCP_INTERRUPT; + cpu_loop_exit(current_cpu); + } +} + void tb_unlock(void) { assert(have_tb_lock); @@ -961,6 +972,16 @@ static void tb_page_check(void) #endif +void cpu_tcg_sched_work(CPUState *cpu, void (*func)(void *arg), void *arg) +{ + assert(have_tb_lock); + tcg_ctx.tb_ctx.work_pending = true; + cpu->tcg_work_func = func; + cpu->tcg_work_arg = arg; + cpu->exception_index = EXCP_INTERRUPT; + cpu_loop_exit(cpu); +} + static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb) { TranslationBlock *tb1; -- 1.9.1