On Thu, Oct 27, 2022 at 04:18:56PM +0200, Ilya Leoshkevich wrote: > On Tue, Oct 04, 2022 at 12:52:36PM -0700, Richard Henderson wrote: > > Wrap the bare TranslationBlock pointer into a structure. > > > > Reviewed-by: Alex Bennée <alex.ben...@linaro.org> > > Reviewed-by: Philippe Mathieu-Daudé <f4...@amsat.org> > > Signed-off-by: Richard Henderson <richard.hender...@linaro.org> > > --- > > accel/tcg/tb-hash.h | 1 + > > accel/tcg/tb-jmp-cache.h | 24 ++++++++++++++++++++++++ > > include/exec/cpu-common.h | 1 + > > include/hw/core/cpu.h | 15 +-------------- > > include/qemu/typedefs.h | 1 + > > accel/stubs/tcg-stub.c | 4 ++++ > > accel/tcg/cpu-exec.c | 10 +++++++--- > > accel/tcg/cputlb.c | 9 +++++---- > > accel/tcg/translate-all.c | 28 +++++++++++++++++++++++++--- > > hw/core/cpu-common.c | 3 +-- > > plugins/core.c | 2 +- > > trace/control-target.c | 2 +- > > 12 files changed, 72 insertions(+), 28 deletions(-) > > create mode 100644 accel/tcg/tb-jmp-cache.h > > Hi, > > After this patch, I get: > > qemu-s390x: qemu/include/qemu/rcu.h:102: rcu_read_unlock: Assertion > `p_rcu_reader->depth != 0' failed. > > in one of the wasmtime tests (host=x86_64, guest=s390x). > GDB shows that the root cause is actually this: > > Thread 181 "wasi_tokio::pat" received signal SIGSEGV, Segmentation fault. > [Switching to Thread 0x7ffff6c54640 (LWP 168352)] > 0x0000555555626736 in do_tb_phys_invalidate (tb=tb@entry=0x7fffea4b8500 > <code_gen_buffer+38503635>, rm_from_page_list=rm_from_page_list@entry=true) > at qemu/accel/tcg/translate-all.c:1192 > 1192 if (qatomic_read(&jc->array[h].tb) == tb) { > (gdb) bt > #0 0x0000555555626736 in do_tb_phys_invalidate > (tb=tb@entry=0x7fffea4b8500 <code_gen_buffer+38503635>, > rm_from_page_list=rm_from_page_list@entry=true) at > qemu/accel/tcg/translate-all.c:1192 > #1 0x0000555555626b98 in tb_phys_invalidate__locked (tb=0x7fffea4b8500 > <code_gen_buffer+38503635>) at qemu/accel/tcg/translate-all.c:1211 > #2 tb_invalidate_phys_page_range__locked (p=<optimized out>, > start=start@entry=836716683264, end=end@entry=836716687360, retaddr=0, > pages=0x0) at qemu/accel/tcg/translate-all.c:1678 > #3 0x0000555555626dfb in tb_invalidate_phys_range (start=836716683264, > start@entry=836716584960, end=end@entry=836716982272) at > qemu/accel/tcg/translate-all.c:1753 > #4 0x0000555555639e43 in target_munmap (start=start@entry=836716584960, > len=len@entry=397312) at qemu/linux-user/mmap.c:769 > > Let me know if you need more information, I can try to extract a > minimal reproducer. > > Best regards, > Ilya
Putting CPUJumpCache inside CPUState made problem go away: diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h index 18ca701b443..3ea528566c3 100644 --- a/include/hw/core/cpu.h +++ b/include/hw/core/cpu.h @@ -32,6 +32,7 @@ #include "qemu/thread.h" #include "qemu/plugin.h" #include "qom/object.h" +#include "accel/tcg/tb-jmp-cache.h" typedef int (*WriteCoreDumpFunction)(const void *buf, size_t size, void *opaque); @@ -366,7 +367,7 @@ struct CPUState { CPUArchState *env_ptr; IcountDecr *icount_decr_ptr; - CPUJumpCache *tb_jmp_cache; + CPUJumpCache tb_jmp_cache; struct GDBRegisterState *gdb_regs; int gdb_num_regs; diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c index 2d7e610ee24..47165fc03e3 100644 --- a/accel/tcg/cpu-exec.c +++ b/accel/tcg/cpu-exec.c @@ -253,7 +253,7 @@ static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc, tcg_debug_assert(!(cflags & CF_INVALID)); hash = tb_jmp_cache_hash_func(pc); - tb = qatomic_rcu_read(&cpu->tb_jmp_cache->array[hash].tb); + tb = qatomic_rcu_read(&cpu->tb_jmp_cache.array[hash].tb); if (likely(tb && tb->pc == pc && @@ -267,7 +267,7 @@ static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc, if (tb == NULL) { return NULL; } - qatomic_set(&cpu->tb_jmp_cache->array[hash].tb, tb); + qatomic_set(&cpu->tb_jmp_cache.array[hash].tb, tb); return tb; } @@ -998,7 +998,7 @@ int cpu_exec(CPUState *cpu) * for the fast lookup */ h = tb_jmp_cache_hash_func(pc); - qatomic_set(&cpu->tb_jmp_cache->array[h].tb, tb); + qatomic_set(&cpu->tb_jmp_cache.array[h].tb, tb); } #ifndef CONFIG_USER_ONLY diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index 63ecc152366..fffd9cb15f8 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -1188,7 +1188,7 @@ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list) /* remove the TB from the hash list */ h = tb_jmp_cache_hash_func(tb->pc); CPU_FOREACH(cpu) { - CPUJumpCache *jc = cpu->tb_jmp_cache; + CPUJumpCache *jc = &cpu->tb_jmp_cache; if (qatomic_read(&jc->array[h].tb) == tb) { qatomic_set(&jc->array[h].tb, NULL); } @@ -2445,23 +2445,12 @@ int page_unprotect(target_ulong address, uintptr_t pc) } #endif /* CONFIG_USER_ONLY */ -/* - * Called by generic code at e.g. cpu reset after cpu creation, - * therefore we must be prepared to allocate the jump cache. - */ void tcg_flush_jmp_cache(CPUState *cpu) { - CPUJumpCache *jc = cpu->tb_jmp_cache; + CPUJumpCache *jc = &cpu->tb_jmp_cache; - if (likely(jc)) { - for (int i = 0; i < TB_JMP_CACHE_SIZE; i++) { - qatomic_set(&jc->array[i].tb, NULL); - } - } else { - /* This should happen once during realize, and thus never race. */ - jc = g_new0(CPUJumpCache, 1); - jc = qatomic_xchg(&cpu->tb_jmp_cache, jc); - assert(jc == NULL); + for (int i = 0; i < TB_JMP_CACHE_SIZE; i++) { + qatomic_set(&jc->array[i].tb, NULL); } } So there must be a race in tcg_flush_jmp_cache() after all? Best regards, Ilya