The TCG code cache consists of multiple regions shared among vCPUs in multi-threaded TCG mode. For cold-plugged vCPUs, these regions are sized and allocated during initialization in the `tcg_register_thread()` function when the vCPUs are realized. Later, these regions must be reallocated for hot-plugged vCPUs as well.
If region allocation fails for hot-plugged vCPUs—due to the code cache being under stress—the TCG code cache must be flushed to create space for the newly hot-plugged vCPU. The only safe way to perform `tb_flush()` is to execute it synchronously within the `cpu_exec()` loop. Reported-by: Miguel Luis <miguel.l...@oracle.com> Signed-off-by: Miguel Luis <miguel.l...@oracle.com> Signed-off-by: Salil Mehta <salil.me...@huawei.com> --- accel/tcg/tcg-accel-ops-mttcg.c | 2 +- accel/tcg/tcg-accel-ops-rr.c | 2 +- include/tcg/startup.h | 6 ++++++ include/tcg/tcg.h | 1 + tcg/region.c | 14 ++++++++++++++ tcg/tcg.c | 13 ++++++++++++- 6 files changed, 35 insertions(+), 3 deletions(-) diff --git a/accel/tcg/tcg-accel-ops-mttcg.c b/accel/tcg/tcg-accel-ops-mttcg.c index 49814ec4af..ab2f79d2c7 100644 --- a/accel/tcg/tcg-accel-ops-mttcg.c +++ b/accel/tcg/tcg-accel-ops-mttcg.c @@ -74,7 +74,7 @@ static void *mttcg_cpu_thread_fn(void *arg) force_rcu.notifier.notify = mttcg_force_rcu; force_rcu.cpu = cpu; rcu_add_force_rcu_notifier(&force_rcu.notifier); - tcg_register_thread(); + tcg_register_thread(cpu); bql_lock(); qemu_thread_get_self(cpu->thread); diff --git a/accel/tcg/tcg-accel-ops-rr.c b/accel/tcg/tcg-accel-ops-rr.c index 8ebadf8e9e..953231837c 100644 --- a/accel/tcg/tcg-accel-ops-rr.c +++ b/accel/tcg/tcg-accel-ops-rr.c @@ -186,7 +186,7 @@ static void *rr_cpu_thread_fn(void *arg) rcu_register_thread(); force_rcu.notify = rr_force_rcu; rcu_add_force_rcu_notifier(&force_rcu); - tcg_register_thread(); + tcg_register_thread(cpu); bql_lock(); qemu_thread_get_self(cpu->thread); diff --git a/include/tcg/startup.h b/include/tcg/startup.h index f71305765c..a565071516 100644 --- a/include/tcg/startup.h +++ b/include/tcg/startup.h @@ -25,6 +25,8 @@ #ifndef TCG_STARTUP_H #define TCG_STARTUP_H +#include "hw/core/cpu.h" + /** * tcg_init: Initialize the TCG runtime * @tb_size: translation buffer size @@ -43,7 +45,11 @@ void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus); * accelerator's init_machine() method) must register with this * function before initiating translation. */ +#ifdef CONFIG_USER_ONLY void tcg_register_thread(void); +#else +void tcg_register_thread(CPUState *cpu); +#endif /** * tcg_prologue_init(): Generate the code for the TCG prologue diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h index 21d5884741..e3328cc600 100644 --- a/include/tcg/tcg.h +++ b/include/tcg/tcg.h @@ -518,6 +518,7 @@ struct TCGContext { /* Track which vCPU triggers events */ CPUState *cpu; /* *_trans */ + bool tbflush_pend; /* TB flush pending due to vCPU hotplug */ /* These structures are private to tcg-target.c.inc. */ #ifdef TCG_TARGET_NEED_LDST_LABELS diff --git a/tcg/region.c b/tcg/region.c index 478ec051c4..9007bfd71e 100644 --- a/tcg/region.c +++ b/tcg/region.c @@ -393,6 +393,20 @@ bool tcg_region_alloc(TCGContext *s) static void tcg_region_initial_alloc__locked(TCGContext *s) { bool err = tcg_region_alloc__locked(s); + + /* + * Hotplugged vCPUs may initially fail to find even a single available + * region. This could be due to the TB cache being under stress from the + * existing vCPUs. To mitigate this, the TB cache should be flushed. + * Therefore, the region allocation failure should be ignored, and a flag + * set to mark `tb_flush()` as pending. The flush will be performed later, + * synchronously in the context of `cpu_exec_loop()`/`tb_gen_code()`. + */ + if (err && s->cpu && DEVICE(s->cpu)->hotplugged) { + s->tbflush_pend = true; + return; + } + g_assert(!err); } diff --git a/tcg/tcg.c b/tcg/tcg.c index 34e3056380..5e9c6b2b4b 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -783,12 +783,13 @@ void tcg_register_thread(void) tcg_ctx = &tcg_init_ctx; } #else -void tcg_register_thread(void) +void tcg_register_thread(CPUState *cpu) { TCGContext *s = g_malloc(sizeof(*s)); unsigned int i, n; *s = tcg_init_ctx; + s->cpu = cpu; /* Relink mem_base. */ for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) { @@ -1388,6 +1389,16 @@ TranslationBlock *tcg_tb_alloc(TCGContext *s) TranslationBlock *tb; void *next; + /* + * The hotplugged vCPU's TCG context might not have any regions allocated. + * If this condition is detected, we should flush the TB cache to ensure + * that regions can be allocated for the newly hotplugged vCPU's TCGContext. + */ + if (s->tbflush_pend) { + s->tbflush_pend = false; + return NULL; + } + retry: tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align); next = (void *)ROUND_UP((uintptr_t)(tb + 1), align); -- 2.34.1