On 10/08/2015 17:27, fred.kon...@greensocs.com wrote: > diff --git a/cpu-exec.c b/cpu-exec.c > index f3358a9..a012e9d 100644 > --- a/cpu-exec.c > +++ b/cpu-exec.c > @@ -131,6 +131,8 @@ static void init_delay_params(SyncClocks *sc, const > CPUState *cpu) > void cpu_loop_exit(CPUState *cpu) > { > cpu->current_tb = NULL; > + /* Release those mutex before long jump so other thread can work. */ > + tb_lock_reset(); > siglongjmp(cpu->jmp_env, 1); > } > > @@ -143,6 +145,8 @@ void cpu_resume_from_signal(CPUState *cpu, void *puc) > /* XXX: restore cpu registers saved in host registers */ > > cpu->exception_index = -1; > + /* Release those mutex before long jump so other thread can work. */ > + tb_lock_reset(); > siglongjmp(cpu->jmp_env, 1); > } >
I think you should start easy and reuse the existing tb_lock code in cpu-exec.c: diff --git a/cpu-exec.c b/cpu-exec.c index 9305f03..2909ec2 100644 --- a/cpu-exec.c +++ b/cpu-exec.c @@ -307,7 +307,6 @@ static TranslationBlock *tb_find_slow(CPUState *cpu, target_ulong pc, tb = tb_find_physical(cpu, pc, cs_base, flags); if (!tb) { - tb_lock(); /* * Retry to get the TB in case a CPU just translate it to avoid having * duplicated TB in the pool. @@ -316,7 +315,6 @@ static TranslationBlock *tb_find_slow(CPUState *cpu, target_ulong pc, if (!tb) { tb = tb_gen_code(cpu, pc, cs_base, flags, 0); } - tb_unlock(); } /* we add the TB in the virtual pc hash table */ cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)] = tb; @@ -372,11 +372,6 @@ int cpu_exec(CPUState *cpu) uintptr_t next_tb; SyncClocks sc; - /* This must be volatile so it is not trashed by longjmp() */ -#if defined(CONFIG_USER_ONLY) - volatile bool have_tb_lock = false; -#endif - if (cpu->halted) { if (!cpu_has_work(cpu)) { return EXCP_HALTED; @@ -480,10 +475,7 @@ int cpu_exec(CPUState *cpu) cpu->exception_index = EXCP_INTERRUPT; cpu_loop_exit(cpu); } -#if defined(CONFIG_USER_ONLY) - qemu_mutex_lock(&tcg_ctx.tb_ctx.tb_lock); - have_tb_lock = true; -#endif + tb_lock(); tb = tb_find_fast(cpu); /* Note: we do it here to avoid a gcc bug on Mac OS X when doing it in tb_find_slow */ @@ -505,10 +497,7 @@ int cpu_exec(CPUState *cpu) tb_add_jump((TranslationBlock *)(next_tb & ~TB_EXIT_MASK), next_tb & TB_EXIT_MASK, tb); } -#if defined(CONFIG_USER_ONLY) - have_tb_lock = false; - qemu_mutex_unlock(&tcg_ctx.tb_ctx.tb_lock); -#endif + tb_unlock(); /* cpu_interrupt might be called while translating the TB, but before it is linked into a potentially infinite loop and becomes env->current_tb. Avoid @@ -575,12 +564,7 @@ int cpu_exec(CPUState *cpu) x86_cpu = X86_CPU(cpu); env = &x86_cpu->env; #endif -#if defined(CONFIG_USER_ONLY) - if (have_tb_lock) { - qemu_mutex_unlock(&tcg_ctx.tb_ctx.tb_lock); - have_tb_lock = false; - } -#endif + tb_lock_reset(); } } /* for(;;) */ Optimizations should then come on top. > diff --git a/target-arm/translate.c b/target-arm/translate.c > index 69ac18c..960c75e 100644 > --- a/target-arm/translate.c > +++ b/target-arm/translate.c > @@ -11166,6 +11166,8 @@ static inline void > gen_intermediate_code_internal(ARMCPU *cpu, > > dc->tb = tb; > > + tb_lock(); This locks twice, I think? Both cpu_restore_state_from_tb and tb_gen_code (which calls cpu_gen_code) take the lock. How does it work? > + > dc->is_jmp = DISAS_NEXT; > dc->pc = pc_start; > dc->singlestep_enabled = cs->singlestep_enabled; > @@ -11506,6 +11508,7 @@ done_generating: > tb->size = dc->pc - pc_start; > tb->icount = num_insns; > } > + tb_unlock(); > } > > +/* tb_lock must be help for tcg_malloc_internal. */ "Held", not "help". Paolo