This paves the way for a lockless tb_find_fast. Signed-off-by: Emilio G. Cota <c...@braap.org> --- cpu-exec.c | 8 +++++++- exec.c | 2 ++ include/qom/cpu.h | 15 +++++++++++++++ qom/cpu.c | 2 +- translate-all.c | 32 +++++++++++++++++++++++++++++++- 5 files changed, 56 insertions(+), 3 deletions(-)
diff --git a/cpu-exec.c b/cpu-exec.c index f758928..5ad578d 100644 --- a/cpu-exec.c +++ b/cpu-exec.c @@ -334,7 +334,9 @@ static TranslationBlock *tb_find_slow(CPUState *cpu, } /* we add the TB in the virtual pc hash table */ + seqlock_write_lock(&cpu->tb_jmp_cache_sequence); cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)] = tb; + seqlock_write_unlock(&cpu->tb_jmp_cache_sequence); return tb; } @@ -343,13 +345,17 @@ static inline TranslationBlock *tb_find_fast(CPUState *cpu) CPUArchState *env = (CPUArchState *)cpu->env_ptr; TranslationBlock *tb; target_ulong cs_base, pc; + unsigned int version; int flags; /* we record a subset of the CPU state. It will always be the same before a given translated block is executed. */ cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags); - tb = cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)]; + do { + version = seqlock_read_begin(&cpu->tb_jmp_cache_sequence); + tb = cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)]; + } while (seqlock_read_retry(&cpu->tb_jmp_cache_sequence, version)); if (unlikely(!tb || tb->pc != pc || tb->cs_base != cs_base || tb->flags != flags)) { tb = tb_find_slow(cpu, pc, cs_base, flags); diff --git a/exec.c b/exec.c index edf2236..ae6f416 100644 --- a/exec.c +++ b/exec.c @@ -577,6 +577,8 @@ void cpu_exec_init(CPUState *cpu, Error **errp) int cpu_index; Error *local_err = NULL; + qemu_mutex_init(&cpu->tb_jmp_cache_lock); + seqlock_init(&cpu->tb_jmp_cache_sequence, &cpu->tb_jmp_cache_lock); #ifndef CONFIG_USER_ONLY cpu->as = &address_space_memory; cpu->thread_id = qemu_get_thread_id(); diff --git a/include/qom/cpu.h b/include/qom/cpu.h index dbe0438..f383c24 100644 --- a/include/qom/cpu.h +++ b/include/qom/cpu.h @@ -27,6 +27,7 @@ #include "exec/hwaddr.h" #include "exec/memattrs.h" #include "qemu/queue.h" +#include "qemu/seqlock.h" #include "qemu/thread.h" #include "qemu/typedefs.h" @@ -287,6 +288,13 @@ struct CPUState { void *env_ptr; /* CPUArchState */ struct TranslationBlock *current_tb; + /* + * The seqlock here is needed because not all updates are to a single + * entry; sometimes we want to atomically clear all entries that belong to + * a given page, e.g. when flushing said page. + */ + QemuMutex tb_jmp_cache_lock; + QemuSeqLock tb_jmp_cache_sequence; struct TranslationBlock *tb_jmp_cache[TB_JMP_CACHE_SIZE]; struct GDBRegisterState *gdb_regs; @@ -342,6 +350,13 @@ extern struct CPUTailQ cpus; extern __thread CPUState *current_cpu; +static inline void cpu_tb_jmp_cache_clear(CPUState *cpu) +{ + seqlock_write_lock(&cpu->tb_jmp_cache_sequence); + memset(cpu->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof(void *)); + seqlock_write_unlock(&cpu->tb_jmp_cache_sequence); +} + /** * cpu_paging_enabled: * @cpu: The CPU whose state is to be inspected. diff --git a/qom/cpu.c b/qom/cpu.c index ac19710..5e72e7a 100644 --- a/qom/cpu.c +++ b/qom/cpu.c @@ -251,7 +251,7 @@ static void cpu_common_reset(CPUState *cpu) cpu->icount_decr.u32 = 0; cpu->can_do_io = 1; cpu->exception_index = -1; - memset(cpu->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof(void *)); + cpu_tb_jmp_cache_clear(cpu); } static bool cpu_common_has_work(CPUState *cs) diff --git a/translate-all.c b/translate-all.c index 76a0be8..668b43a 100644 --- a/translate-all.c +++ b/translate-all.c @@ -863,7 +863,7 @@ void tb_flush(CPUState *cpu) tcg_ctx.tb_ctx.nb_tbs = 0; CPU_FOREACH(cpu) { - memset(cpu->tb_jmp_cache, 0, sizeof(cpu->tb_jmp_cache)); + cpu_tb_jmp_cache_clear(cpu); } memset(tcg_ctx.tb_ctx.tb_phys_hash, 0, sizeof(tcg_ctx.tb_ctx.tb_phys_hash)); @@ -988,6 +988,27 @@ static inline void tb_jmp_remove(TranslationBlock *tb, int n) } } +static inline void tb_jmp_cache_entry_clear(CPUState *cpu, TranslationBlock *tb) +{ + unsigned int version; + unsigned int h; + bool hit = false; + + h = tb_jmp_cache_hash_func(tb->pc); + do { + version = seqlock_read_begin(&cpu->tb_jmp_cache_sequence); + hit = cpu->tb_jmp_cache[h] == tb; + } while (seqlock_read_retry(&cpu->tb_jmp_cache_sequence, version)); + + if (hit) { + seqlock_write_lock(&cpu->tb_jmp_cache_sequence); + if (likely(cpu->tb_jmp_cache[h] == tb)) { + cpu->tb_jmp_cache[h] = NULL; + } + seqlock_write_unlock(&cpu->tb_jmp_cache_sequence); + } +} + /* invalidate one TB * * Called with tb_lock held. @@ -1024,6 +1045,13 @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr) invalidate_page_bitmap(p); } + tcg_ctx.tb_ctx.tb_invalidated_flag = 1; + + /* remove the TB from the hash list */ + CPU_FOREACH(cpu) { + tb_jmp_cache_entry_clear(cpu, tb); + } + /* suppress this TB from the two jump lists */ tb_jmp_remove(tb, 0); tb_jmp_remove(tb, 1); @@ -1707,12 +1735,14 @@ void tb_flush_jmp_cache(CPUState *cpu, target_ulong addr) /* Discard jump cache entries for any tb which might potentially overlap the flushed page. */ i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE); + seqlock_write_lock(&cpu->tb_jmp_cache_sequence); memset(&cpu->tb_jmp_cache[i], 0, TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *)); i = tb_jmp_cache_hash_page(addr); memset(&cpu->tb_jmp_cache[i], 0, TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *)); + seqlock_write_unlock(&cpu->tb_jmp_cache_sequence); } void dump_exec_info(FILE *f, fprintf_function cpu_fprintf) -- 1.9.1