From: KONRAD Frederic <fred.kon...@greensocs.com> This protects TBContext with tb_lock to make tb_* thread safe.
We can still have issue with tb_flush in case of multithread TCG: An other CPU can be executing code during a flush. This can be fixed later by making all other TCG thread exiting before calling tb_flush(). tb_find_slow is separated into tb_find_slow and tb_find_physical as the whole tb_find_slow doesn't require to lock the tb. Signed-off-by: KONRAD Frederic <fred.kon...@greensocs.com> Changes: V1 -> V2: * Drop a tb_lock arround tb_find_fast in cpu-exec.c. --- cpu-exec.c | 60 ++++++++++++++-------- target-arm/translate.c | 5 ++ tcg/tcg.h | 7 +++ translate-all.c | 137 ++++++++++++++++++++++++++++++++++++++----------- 4 files changed, 158 insertions(+), 51 deletions(-) diff --git a/cpu-exec.c b/cpu-exec.c index d6336d9..5d9b518 100644 --- a/cpu-exec.c +++ b/cpu-exec.c @@ -130,6 +130,8 @@ static void init_delay_params(SyncClocks *sc, const CPUState *cpu) void cpu_loop_exit(CPUState *cpu) { cpu->current_tb = NULL; + /* Release those mutex before long jump so other thread can work. */ + tb_lock_reset(); siglongjmp(cpu->jmp_env, 1); } @@ -142,6 +144,8 @@ void cpu_resume_from_signal(CPUState *cpu, void *puc) /* XXX: restore cpu registers saved in host registers */ cpu->exception_index = -1; + /* Release those mutex before long jump so other thread can work. */ + tb_lock_reset(); siglongjmp(cpu->jmp_env, 1); } @@ -253,12 +257,9 @@ static void cpu_exec_nocache(CPUArchState *env, int max_cycles, tb_free(tb); } -static TranslationBlock *tb_find_slow(CPUArchState *env, - target_ulong pc, - target_ulong cs_base, - uint64_t flags) +static TranslationBlock *tb_find_physical(CPUArchState *env, target_ulong pc, + target_ulong cs_base, uint64_t flags) { - CPUState *cpu = ENV_GET_CPU(env); TranslationBlock *tb, **ptb1; unsigned int h; tb_page_addr_t phys_pc, phys_page1; @@ -273,8 +274,9 @@ static TranslationBlock *tb_find_slow(CPUArchState *env, ptb1 = &tcg_ctx.tb_ctx.tb_phys_hash[h]; for(;;) { tb = *ptb1; - if (!tb) - goto not_found; + if (!tb) { + return tb; + } if (tb->pc == pc && tb->page_addr[0] == phys_page1 && tb->cs_base == cs_base && @@ -282,28 +284,43 @@ static TranslationBlock *tb_find_slow(CPUArchState *env, /* check next page if needed */ if (tb->page_addr[1] != -1) { tb_page_addr_t phys_page2; - virt_page2 = (pc & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE; phys_page2 = get_page_addr_code(env, virt_page2); - if (tb->page_addr[1] == phys_page2) - goto found; + if (tb->page_addr[1] == phys_page2) { + return tb; + } } else { - goto found; + return tb; } } ptb1 = &tb->phys_hash_next; } - not_found: - /* if no translated code available, then translate it now */ - tb = tb_gen_code(cpu, pc, cs_base, flags, 0); - - found: - /* Move the last found TB to the head of the list */ - if (likely(*ptb1)) { - *ptb1 = tb->phys_hash_next; - tb->phys_hash_next = tcg_ctx.tb_ctx.tb_phys_hash[h]; - tcg_ctx.tb_ctx.tb_phys_hash[h] = tb; + return tb; +} + +static TranslationBlock *tb_find_slow(CPUArchState *env, target_ulong pc, + target_ulong cs_base, uint64_t flags) +{ + /* + * First try to get the tb if we don't find it we need to lock and compile + * it. + */ + CPUState *cpu = ENV_GET_CPU(env); + TranslationBlock *tb; + + tb = tb_find_physical(env, pc, cs_base, flags); + if (!tb) { + tb_lock(); + /* + * Retry to get the TB in case a CPU just translate it to avoid having + * duplicated TB in the pool. + */ + tb = tb_find_physical(env, pc, cs_base, flags); + if (!tb) { + tb = tb_gen_code(cpu, pc, cs_base, flags, 0); + } + tb_unlock(); } /* we add the TB in the virtual pc hash table */ cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)] = tb; @@ -326,6 +343,7 @@ static inline TranslationBlock *tb_find_fast(CPUArchState *env) tb->flags != flags)) { tb = tb_find_slow(env, pc, cs_base, flags); } + return tb; } diff --git a/target-arm/translate.c b/target-arm/translate.c index 971b6db..47345aa 100644 --- a/target-arm/translate.c +++ b/target-arm/translate.c @@ -11162,6 +11162,8 @@ static inline void gen_intermediate_code_internal(ARMCPU *cpu, dc->tb = tb; + tb_lock(); + dc->is_jmp = DISAS_NEXT; dc->pc = pc_start; dc->singlestep_enabled = cs->singlestep_enabled; @@ -11499,6 +11501,7 @@ done_generating: tb->size = dc->pc - pc_start; tb->icount = num_insns; } + tb_unlock(); } void gen_intermediate_code(CPUARMState *env, TranslationBlock *tb) @@ -11567,6 +11570,7 @@ void arm_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf, void restore_state_to_opc(CPUARMState *env, TranslationBlock *tb, int pc_pos) { + tb_lock(); if (is_a64(env)) { env->pc = tcg_ctx.gen_opc_pc[pc_pos]; env->condexec_bits = 0; @@ -11574,4 +11578,5 @@ void restore_state_to_opc(CPUARMState *env, TranslationBlock *tb, int pc_pos) env->regs[15] = tcg_ctx.gen_opc_pc[pc_pos]; env->condexec_bits = gen_opc_condexec_bits[pc_pos]; } + tb_unlock(); } diff --git a/tcg/tcg.h b/tcg/tcg.h index 41e4869..032fe10 100644 --- a/tcg/tcg.h +++ b/tcg/tcg.h @@ -592,17 +592,24 @@ void *tcg_malloc_internal(TCGContext *s, int size); void tcg_pool_reset(TCGContext *s); void tcg_pool_delete(TCGContext *s); +void tb_lock(void); +void tb_unlock(void); +void tb_lock_reset(void); + static inline void *tcg_malloc(int size) { TCGContext *s = &tcg_ctx; uint8_t *ptr, *ptr_end; + tb_lock(); size = (size + sizeof(long) - 1) & ~(sizeof(long) - 1); ptr = s->pool_cur; ptr_end = ptr + size; if (unlikely(ptr_end > s->pool_end)) { + tb_unlock(); return tcg_malloc_internal(&tcg_ctx, size); } else { s->pool_cur = ptr_end; + tb_unlock(); return ptr; } } diff --git a/translate-all.c b/translate-all.c index b6b0e1c..c25b79b 100644 --- a/translate-all.c +++ b/translate-all.c @@ -127,6 +127,34 @@ static void *l1_map[V_L1_SIZE]; /* code generation context */ TCGContext tcg_ctx; +/* translation block context */ +__thread volatile int have_tb_lock; + +void tb_lock(void) +{ + if (!have_tb_lock) { + qemu_mutex_lock(&tcg_ctx.tb_ctx.tb_lock); + } + have_tb_lock++; +} + +void tb_unlock(void) +{ + assert(have_tb_lock > 0); + have_tb_lock--; + if (!have_tb_lock) { + qemu_mutex_unlock(&tcg_ctx.tb_ctx.tb_lock); + } +} + +void tb_lock_reset(void) +{ + if (have_tb_lock) { + qemu_mutex_unlock(&tcg_ctx.tb_ctx.tb_lock); + } + have_tb_lock = 0; +} + static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc, tb_page_addr_t phys_page2); static TranslationBlock *tb_find_pc(uintptr_t tc_ptr); @@ -215,6 +243,7 @@ static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb, #ifdef CONFIG_PROFILER ti = profile_getclock(); #endif + tb_lock(); tcg_func_start(s); gen_intermediate_code_pc(env, tb); @@ -228,8 +257,10 @@ static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb, /* find opc index corresponding to search_pc */ tc_ptr = (uintptr_t)tb->tc_ptr; - if (searched_pc < tc_ptr) + if (searched_pc < tc_ptr) { + tb_unlock(); return -1; + } s->tb_next_offset = tb->tb_next_offset; #ifdef USE_DIRECT_JUMP @@ -241,8 +272,10 @@ static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb, #endif j = tcg_gen_code_search_pc(s, (tcg_insn_unit *)tc_ptr, searched_pc - tc_ptr); - if (j < 0) + if (j < 0) { + tb_unlock(); return -1; + } /* now find start of instruction before */ while (s->gen_opc_instr_start[j] == 0) { j--; @@ -255,6 +288,8 @@ static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb, s->restore_time += profile_getclock() - ti; s->restore_count++; #endif + + tb_unlock(); return 0; } @@ -672,6 +707,7 @@ static inline void code_gen_alloc(size_t tb_size) CODE_GEN_AVG_BLOCK_SIZE; tcg_ctx.tb_ctx.tbs = g_malloc(tcg_ctx.code_gen_max_blocks * sizeof(TranslationBlock)); + qemu_mutex_init(&tcg_ctx.tb_ctx.tb_lock); } /* Must be called before using the QEMU cpus. 'tb_size' is the size @@ -696,16 +732,22 @@ bool tcg_enabled(void) return tcg_ctx.code_gen_buffer != NULL; } -/* Allocate a new translation block. Flush the translation buffer if - too many translation blocks or too much generated code. */ +/* + * Allocate a new translation block. Flush the translation buffer if + * too many translation blocks or too much generated code. + * tb_alloc is not thread safe but tb_gen_code is protected by a mutex so this + * function is called only by one thread. + */ static TranslationBlock *tb_alloc(target_ulong pc) { - TranslationBlock *tb; + TranslationBlock *tb = NULL; if (tcg_ctx.tb_ctx.nb_tbs >= tcg_ctx.code_gen_max_blocks || (tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer) >= tcg_ctx.code_gen_buffer_max_size) { - return NULL; + tb = &tcg_ctx.tb_ctx.tbs[tcg_ctx.tb_ctx.nb_tbs++]; + tb->pc = pc; + tb->cflags = 0; } tb = &tcg_ctx.tb_ctx.tbs[tcg_ctx.tb_ctx.nb_tbs++]; tb->pc = pc; @@ -718,11 +760,16 @@ void tb_free(TranslationBlock *tb) /* In practice this is mostly used for single use temporary TB Ignore the hard cases and just back up if this TB happens to be the last one generated. */ + + tb_lock(); + if (tcg_ctx.tb_ctx.nb_tbs > 0 && tb == &tcg_ctx.tb_ctx.tbs[tcg_ctx.tb_ctx.nb_tbs - 1]) { tcg_ctx.code_gen_ptr = tb->tc_ptr; tcg_ctx.tb_ctx.nb_tbs--; } + + tb_unlock(); } static inline void invalidate_page_bitmap(PageDesc *p) @@ -773,6 +820,8 @@ void tb_flush(CPUArchState *env1) { CPUState *cpu = ENV_GET_CPU(env1); + tb_lock(); + #if defined(DEBUG_FLUSH) printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n", (unsigned long)(tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer), @@ -797,6 +846,8 @@ void tb_flush(CPUArchState *env1) /* XXX: flush processor icache at this point if cache flush is expensive */ tcg_ctx.tb_ctx.tb_flush_count++; + + tb_unlock(); } #ifdef DEBUG_TB_CHECK @@ -806,6 +857,8 @@ static void tb_invalidate_check(target_ulong address) TranslationBlock *tb; int i; + tb_lock(); + address &= TARGET_PAGE_MASK; for (i = 0; i < CODE_GEN_PHYS_HASH_SIZE; i++) { for (tb = tb_ctx.tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) { @@ -817,6 +870,8 @@ static void tb_invalidate_check(target_ulong address) } } } + + tb_unlock(); } /* verify that all the pages have correct rights for code */ @@ -825,6 +880,8 @@ static void tb_page_check(void) TranslationBlock *tb; int i, flags1, flags2; + tb_lock(); + for (i = 0; i < CODE_GEN_PHYS_HASH_SIZE; i++) { for (tb = tcg_ctx.tb_ctx.tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) { @@ -836,6 +893,8 @@ static void tb_page_check(void) } } } + + tb_unlock(); } #endif @@ -916,6 +975,8 @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr) tb_page_addr_t phys_pc; TranslationBlock *tb1, *tb2; + tb_lock(); + /* remove the TB from the hash list */ phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK); h = tb_phys_hash_func(phys_pc); @@ -963,6 +1024,7 @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr) tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2); /* fail safe */ tcg_ctx.tb_ctx.tb_phys_invalidate_count++; + tb_unlock(); } static void build_page_bitmap(PageDesc *p) @@ -1004,6 +1066,8 @@ TranslationBlock *tb_gen_code(CPUState *cpu, target_ulong virt_page2; int code_gen_size; + tb_lock(); + phys_pc = get_page_addr_code(env, pc); if (use_icount) { cflags |= CF_USE_ICOUNT; @@ -1032,6 +1096,8 @@ TranslationBlock *tb_gen_code(CPUState *cpu, phys_page2 = get_page_addr_code(env, virt_page2); } tb_link_page(tb, phys_pc, phys_page2); + + tb_unlock(); return tb; } @@ -1330,13 +1396,15 @@ static inline void tb_alloc_page(TranslationBlock *tb, } /* add a new TB and link it to the physical page tables. phys_page2 is - (-1) to indicate that only one page contains the TB. */ + * (-1) to indicate that only one page contains the TB. */ static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc, tb_page_addr_t phys_page2) { unsigned int h; TranslationBlock **ptb; + tb_lock(); + /* Grab the mmap lock to stop another thread invalidating this TB before we are done. */ mmap_lock(); @@ -1370,6 +1438,8 @@ static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc, tb_page_check(); #endif mmap_unlock(); + + tb_unlock(); } /* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr < @@ -1378,31 +1448,34 @@ static TranslationBlock *tb_find_pc(uintptr_t tc_ptr) { int m_min, m_max, m; uintptr_t v; - TranslationBlock *tb; - - if (tcg_ctx.tb_ctx.nb_tbs <= 0) { - return NULL; - } - if (tc_ptr < (uintptr_t)tcg_ctx.code_gen_buffer || - tc_ptr >= (uintptr_t)tcg_ctx.code_gen_ptr) { - return NULL; - } - /* binary search (cf Knuth) */ - m_min = 0; - m_max = tcg_ctx.tb_ctx.nb_tbs - 1; - while (m_min <= m_max) { - m = (m_min + m_max) >> 1; - tb = &tcg_ctx.tb_ctx.tbs[m]; - v = (uintptr_t)tb->tc_ptr; - if (v == tc_ptr) { - return tb; - } else if (tc_ptr < v) { - m_max = m - 1; - } else { - m_min = m + 1; + TranslationBlock *tb = NULL; + + tb_lock(); + + if ((tcg_ctx.tb_ctx.nb_tbs > 0) + && (tc_ptr >= (uintptr_t)tcg_ctx.code_gen_buffer && + tc_ptr < (uintptr_t)tcg_ctx.code_gen_ptr)) { + /* binary search (cf Knuth) */ + m_min = 0; + m_max = tcg_ctx.tb_ctx.nb_tbs - 1; + while (m_min <= m_max) { + m = (m_min + m_max) >> 1; + tb = &tcg_ctx.tb_ctx.tbs[m]; + v = (uintptr_t)tb->tc_ptr; + if (v == tc_ptr) { + tb_unlock(); + return tb; + } else if (tc_ptr < v) { + m_max = m - 1; + } else { + m_min = m + 1; + } } + tb = &tcg_ctx.tb_ctx.tbs[m_max]; } - return &tcg_ctx.tb_ctx.tbs[m_max]; + + tb_unlock(); + return tb; } #if !defined(CONFIG_USER_ONLY) @@ -1564,6 +1637,8 @@ void dump_exec_info(FILE *f, fprintf_function cpu_fprintf) int direct_jmp_count, direct_jmp2_count, cross_page; TranslationBlock *tb; + tb_lock(); + target_code_size = 0; max_target_code_size = 0; cross_page = 0; @@ -1619,6 +1694,8 @@ void dump_exec_info(FILE *f, fprintf_function cpu_fprintf) tcg_ctx.tb_ctx.tb_phys_invalidate_count); cpu_fprintf(f, "TLB flush count %d\n", tlb_flush_count); tcg_dump_info(f, cpu_fprintf); + + tb_unlock(); } void dump_opcount_info(FILE *f, fprintf_function cpu_fprintf) -- 1.9.0