On 23/08/2015 17:23, Emilio G. Cota wrote: > This paves the way for a lockless tb_find_fast. > > Signed-off-by: Emilio G. Cota <c...@braap.org> > --- > cpu-exec.c | 8 +++++++- > exec.c | 2 ++ > include/qom/cpu.h | 15 +++++++++++++++ > qom/cpu.c | 2 +- > translate-all.c | 32 +++++++++++++++++++++++++++++++- > 5 files changed, 56 insertions(+), 3 deletions(-) > > diff --git a/cpu-exec.c b/cpu-exec.c > index f758928..5ad578d 100644 > --- a/cpu-exec.c > +++ b/cpu-exec.c > @@ -334,7 +334,9 @@ static TranslationBlock *tb_find_slow(CPUState *cpu, > } > > /* we add the TB in the virtual pc hash table */ > + seqlock_write_lock(&cpu->tb_jmp_cache_sequence); > cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)] = tb; > + seqlock_write_unlock(&cpu->tb_jmp_cache_sequence); > return tb; > } > > @@ -343,13 +345,17 @@ static inline TranslationBlock *tb_find_fast(CPUState > *cpu) > CPUArchState *env = (CPUArchState *)cpu->env_ptr; > TranslationBlock *tb; > target_ulong cs_base, pc; > + unsigned int version; > int flags; > > /* we record a subset of the CPU state. It will > always be the same before a given translated block > is executed. */ > cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags); > - tb = cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)]; > + do { > + version = seqlock_read_begin(&cpu->tb_jmp_cache_sequence); > + tb = cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)]; > + } while (seqlock_read_retry(&cpu->tb_jmp_cache_sequence, version)); > if (unlikely(!tb || tb->pc != pc || tb->cs_base != cs_base || > tb->flags != flags)) { > tb = tb_find_slow(cpu, pc, cs_base, flags); > diff --git a/exec.c b/exec.c > index edf2236..ae6f416 100644 > --- a/exec.c > +++ b/exec.c > @@ -577,6 +577,8 @@ void cpu_exec_init(CPUState *cpu, Error **errp) > int cpu_index; > Error *local_err = NULL; > > + qemu_mutex_init(&cpu->tb_jmp_cache_lock); > + seqlock_init(&cpu->tb_jmp_cache_sequence, &cpu->tb_jmp_cache_lock); > #ifndef CONFIG_USER_ONLY > cpu->as = &address_space_memory; > cpu->thread_id = qemu_get_thread_id(); > diff --git a/include/qom/cpu.h b/include/qom/cpu.h > index dbe0438..f383c24 100644 > --- a/include/qom/cpu.h > +++ b/include/qom/cpu.h > @@ -27,6 +27,7 @@ > #include "exec/hwaddr.h" > #include "exec/memattrs.h" > #include "qemu/queue.h" > +#include "qemu/seqlock.h" > #include "qemu/thread.h" > #include "qemu/typedefs.h" > > @@ -287,6 +288,13 @@ struct CPUState { > > void *env_ptr; /* CPUArchState */ > struct TranslationBlock *current_tb; > + /* > + * The seqlock here is needed because not all updates are to a single > + * entry; sometimes we want to atomically clear all entries that belong > to > + * a given page, e.g. when flushing said page. > + */ > + QemuMutex tb_jmp_cache_lock; > + QemuSeqLock tb_jmp_cache_sequence; > struct TranslationBlock *tb_jmp_cache[TB_JMP_CACHE_SIZE]; > > struct GDBRegisterState *gdb_regs; > @@ -342,6 +350,13 @@ extern struct CPUTailQ cpus; > > extern __thread CPUState *current_cpu; > > +static inline void cpu_tb_jmp_cache_clear(CPUState *cpu) > +{ > + seqlock_write_lock(&cpu->tb_jmp_cache_sequence); > + memset(cpu->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof(void *)); > + seqlock_write_unlock(&cpu->tb_jmp_cache_sequence); > +} > + > /** > * cpu_paging_enabled: > * @cpu: The CPU whose state is to be inspected. > diff --git a/qom/cpu.c b/qom/cpu.c > index ac19710..5e72e7a 100644 > --- a/qom/cpu.c > +++ b/qom/cpu.c > @@ -251,7 +251,7 @@ static void cpu_common_reset(CPUState *cpu) > cpu->icount_decr.u32 = 0; > cpu->can_do_io = 1; > cpu->exception_index = -1; > - memset(cpu->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof(void *)); > + cpu_tb_jmp_cache_clear(cpu); > } > > static bool cpu_common_has_work(CPUState *cs) > diff --git a/translate-all.c b/translate-all.c > index 76a0be8..668b43a 100644 > --- a/translate-all.c > +++ b/translate-all.c > @@ -863,7 +863,7 @@ void tb_flush(CPUState *cpu) > tcg_ctx.tb_ctx.nb_tbs = 0; > > CPU_FOREACH(cpu) { > - memset(cpu->tb_jmp_cache, 0, sizeof(cpu->tb_jmp_cache)); > + cpu_tb_jmp_cache_clear(cpu); > } > > memset(tcg_ctx.tb_ctx.tb_phys_hash, 0, > sizeof(tcg_ctx.tb_ctx.tb_phys_hash)); > @@ -988,6 +988,27 @@ static inline void tb_jmp_remove(TranslationBlock *tb, > int n) > } > } > > +static inline void tb_jmp_cache_entry_clear(CPUState *cpu, TranslationBlock > *tb) > +{ > + unsigned int version; > + unsigned int h; > + bool hit = false; > + > + h = tb_jmp_cache_hash_func(tb->pc); > + do { > + version = seqlock_read_begin(&cpu->tb_jmp_cache_sequence); > + hit = cpu->tb_jmp_cache[h] == tb; > + } while (seqlock_read_retry(&cpu->tb_jmp_cache_sequence, version)); > + > + if (hit) { > + seqlock_write_lock(&cpu->tb_jmp_cache_sequence); > + if (likely(cpu->tb_jmp_cache[h] == tb)) { > + cpu->tb_jmp_cache[h] = NULL; > + } > + seqlock_write_unlock(&cpu->tb_jmp_cache_sequence); > + } > +} > + > /* invalidate one TB > * > * Called with tb_lock held. > @@ -1024,6 +1045,13 @@ void tb_phys_invalidate(TranslationBlock *tb, > tb_page_addr_t page_addr) > invalidate_page_bitmap(p); > } > > + tcg_ctx.tb_ctx.tb_invalidated_flag = 1; > + > + /* remove the TB from the hash list */ > + CPU_FOREACH(cpu) { > + tb_jmp_cache_entry_clear(cpu, tb); > + } > + > /* suppress this TB from the two jump lists */ > tb_jmp_remove(tb, 0); > tb_jmp_remove(tb, 1); > @@ -1707,12 +1735,14 @@ void tb_flush_jmp_cache(CPUState *cpu, target_ulong > addr) > /* Discard jump cache entries for any tb which might potentially > overlap the flushed page. */ > i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE); > + seqlock_write_lock(&cpu->tb_jmp_cache_sequence); > memset(&cpu->tb_jmp_cache[i], 0, > TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *)); > > i = tb_jmp_cache_hash_page(addr); > memset(&cpu->tb_jmp_cache[i], 0, > TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *)); > + seqlock_write_unlock(&cpu->tb_jmp_cache_sequence); > } > > void dump_exec_info(FILE *f, fprintf_function cpu_fprintf) >
I'm not sure how the last three patches compare with the existing "tcg: move tb_find_fast outside the tb_lock critical section"? Paolo