This will allow us to safely look up TB's without taking any locks. Note however that tb_lock protects the valid field, so if chaining is an option then we'll have to acquire the lock.
Signed-off-by: Emilio G. Cota <c...@braap.org> --- cpu-exec.c | 23 +++++++--------------- include/exec/exec-all.h | 3 +-- translate-all.c | 51 +++++++++++++++++-------------------------------- 3 files changed, 25 insertions(+), 52 deletions(-) diff --git a/cpu-exec.c b/cpu-exec.c index 5ad578d..826ec25 100644 --- a/cpu-exec.c +++ b/cpu-exec.c @@ -239,9 +239,7 @@ static void cpu_exec_nocache(CPUState *cpu, int max_cycles, tb_lock(); tb = tb_gen_code(cpu, orig_tb->pc, orig_tb->cs_base, orig_tb->flags, max_cycles | CF_NOCACHE); - tb->orig_tb = (atomic_mb_read(&tcg_ctx.tb_ctx.tb_invalidated_flag) - ? NULL - : orig_tb); + tb->orig_tb = orig_tb->valid ? orig_tb : NULL; cpu->current_tb = tb; tb_unlock(); @@ -268,8 +266,6 @@ static TranslationBlock *tb_find_physical(CPUState *cpu, tb_page_addr_t phys_pc, phys_page1; target_ulong virt_page2; - atomic_mb_set(&tcg_ctx.tb_ctx.tb_invalidated_flag, 0); - /* find translated block using physical mappings */ phys_pc = get_page_addr_code(env, pc); phys_page1 = phys_pc & TARGET_PAGE_MASK; @@ -536,15 +532,6 @@ int cpu_exec(CPUState *cpu) cpu_loop_exit(cpu); } tb = tb_find_fast(cpu); - /* Note: we do it here to avoid a gcc bug on Mac OS X when - doing it in tb_find_slow */ - if (atomic_mb_read(&tcg_ctx.tb_ctx.tb_invalidated_flag)) { - /* as some TB could have been invalidated because - of memory exceptions while generating the code, we - must recompute the hash index here */ - next_tb = 0; - atomic_mb_set(&tcg_ctx.tb_ctx.tb_invalidated_flag, 0); - } if (qemu_loglevel_mask(CPU_LOG_EXEC)) { qemu_log("Trace %p [" TARGET_FMT_lx "] %s\n", tb->tc_ptr, tb->pc, lookup_symbol(tb->pc)); @@ -553,9 +540,13 @@ int cpu_exec(CPUState *cpu) spans two pages, we cannot safely do a direct jump. */ if (next_tb != 0 && tb->page_addr[1] == -1) { + TranslationBlock *next; + tb_lock_recursive(); - tb_add_jump((TranslationBlock *)(next_tb & ~TB_EXIT_MASK), - next_tb & TB_EXIT_MASK, tb); + next = (TranslationBlock *)(next_tb & ~TB_EXIT_MASK); + if (tb->valid && next->valid) { + tb_add_jump(next, next_tb & TB_EXIT_MASK, tb); + } } /* The lock may not be taken if we went through the * fast lookup path and did not have to do any patching. diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index 3b8399a..7e4aea7 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -178,6 +178,7 @@ struct TranslationBlock { jmp_first */ struct TranslationBlock *jmp_next[2]; struct TranslationBlock *jmp_first; + bool valid; /* protected by tb_lock */ }; #include "qemu/thread.h" @@ -195,8 +196,6 @@ struct TBContext { /* statistics */ int tb_flush_count; int tb_phys_invalidate_count; - - int tb_invalidated_flag; }; void tb_free(TranslationBlock *tb); diff --git a/translate-all.c b/translate-all.c index 668b43a..94adcd0 100644 --- a/translate-all.c +++ b/translate-all.c @@ -791,6 +791,17 @@ static inline void invalidate_page_bitmap(PageDesc *p) #endif } +static void tb_invalidate_all(void) +{ + int i; + + for (i = 0; i < tcg_ctx.tb_ctx.nb_tbs; i++) { + TranslationBlock *tb = &tcg_ctx.tb_ctx.tbs[i]; + + tb->valid = false; + } +} + /* Set to NULL all the 'first_tb' fields in all PageDescs. */ static void page_flush_tb_1(int level, void **lp) { @@ -866,6 +877,7 @@ void tb_flush(CPUState *cpu) cpu_tb_jmp_cache_clear(cpu); } + tb_invalidate_all(); memset(tcg_ctx.tb_ctx.tb_phys_hash, 0, sizeof(tcg_ctx.tb_ctx.tb_phys_hash)); page_flush_tb(); @@ -1021,11 +1033,6 @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr) tb_page_addr_t phys_pc; TranslationBlock *tb1, *tb2; - /* Set the invalidated_flag first, to block patching a - * jump to tb. FIXME: invalidated_flag should be per TB. - */ - atomic_mb_set(&tcg_ctx.tb_ctx.tb_invalidated_flag, 1); - /* Now remove the TB from the hash list, so that tb_find_slow * cannot find it anymore. */ @@ -1045,8 +1052,6 @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr) invalidate_page_bitmap(p); } - tcg_ctx.tb_ctx.tb_invalidated_flag = 1; - /* remove the TB from the hash list */ CPU_FOREACH(cpu) { tb_jmp_cache_entry_clear(cpu, tb); @@ -1070,33 +1075,7 @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr) } tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2); /* fail safe */ -#if 0 - /* TODO: I think this barrier is not necessary. On the - * cpu_exec side, it is okay if the read from tb_jmp_cache - * comes after the read from tb_phys_hash. This is because - * the read would be bleeding into the tb_lock critical - * section, hence there cannot be any concurrent tb_invalidate. - * And if you don't need a barrier there, you shouldn't need - * one here, either. - */ - smp_wmb(); -#endif - - /* Finally, remove the TB from the per-CPU cache that is - * accessed without tb_lock. The tb can still be executed - * once after returning, if the cache was accessed before - * this point, but that's it. - * - * The cache cannot be filled with this tb anymore, because - * the lists are accessed with tb_lock held. - */ - h = tb_jmp_cache_hash_func(tb->pc); - CPU_FOREACH(cpu) { - if (cpu->tb_jmp_cache[h] == tb) { - cpu->tb_jmp_cache[h] = NULL; - } - } - + tb->valid = false; tcg_ctx.tb_ctx.tb_phys_invalidate_count++; } @@ -1157,12 +1136,16 @@ TranslationBlock *tb_gen_code(CPUState *cpu, tb_flush_safe(cpu); #endif cpu_loop_exit(cpu); + tb_flush(cpu); + /* cannot fail at this point */ + tb = tb_alloc(pc); } tb->tc_ptr = tcg_ctx.code_gen_ptr; tb->cs_base = cs_base; tb->flags = flags; tb->cflags = cflags; + tb->valid = true; cpu_gen_code(env, tb, &code_gen_size); tcg_ctx.code_gen_ptr = (void *)(((uintptr_t)tcg_ctx.code_gen_ptr + code_gen_size + CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1)); -- 1.9.1