On 15/04/16 17:23, Alex Bennée wrote: > diff --git a/cputlb.c b/cputlb.c > index 1412049..42a3b07 100644 > --- a/cputlb.c > +++ b/cputlb.c > @@ -56,22 +56,14 @@ > } \ > } while (0) > > +/* We need a solution for stuffing 64 bit pointers in 32 bit ones if > + * we care about this combination */ > +QEMU_BUILD_BUG_ON(sizeof(target_ulong) > sizeof(void *)); > + > /* statistics */ > int tlb_flush_count; > > -/* NOTE: > - * If flush_global is true (the usual case), flush all tlb entries. > - * If flush_global is false, flush (at least) all tlb entries not > - * marked global. > - * > - * Since QEMU doesn't currently implement a global/not-global flag > - * for tlb entries, at the moment tlb_flush() will also flush all > - * tlb entries in the flush_global == false case. This is OK because > - * CPU architectures generally permit an implementation to drop > - * entries from the TLB at any time, so flushing more entries than > - * required is only an efficiency issue, not a correctness issue. > - */ > -void tlb_flush(CPUState *cpu, int flush_global) > +static void tlb_flush_nocheck(CPUState *cpu, int flush_global) > { > CPUArchState *env = cpu->env_ptr; > > @@ -89,6 +81,34 @@ void tlb_flush(CPUState *cpu, int flush_global) > env->tlb_flush_addr = -1; > env->tlb_flush_mask = 0; > tlb_flush_count++; > + /* atomic_mb_set(&cpu->pending_tlb_flush, 0); */ > +} > + > +static void tlb_flush_global_async_work(CPUState *cpu, void *opaque) > +{ > + tlb_flush_nocheck(cpu, GPOINTER_TO_INT(opaque)); > +} > + > +/* NOTE: > + * If flush_global is true (the usual case), flush all tlb entries. > + * If flush_global is false, flush (at least) all tlb entries not > + * marked global. > + * > + * Since QEMU doesn't currently implement a global/not-global flag > + * for tlb entries, at the moment tlb_flush() will also flush all > + * tlb entries in the flush_global == false case. This is OK because > + * CPU architectures generally permit an implementation to drop > + * entries from the TLB at any time, so flushing more entries than > + * required is only an efficiency issue, not a correctness issue. > + */ > +void tlb_flush(CPUState *cpu, int flush_global) > +{ > + if (cpu->created) {
Why do we check for 'cpu->created' here? Any why don't do that in tlb_flush_page_all()? > + async_run_on_cpu(cpu, tlb_flush_global_async_work, > + GINT_TO_POINTER(flush_global)); > + } else { > + tlb_flush_nocheck(cpu, flush_global); > + } > } > > static inline void v_tlb_flush_by_mmuidx(CPUState *cpu, va_list argp) > @@ -222,6 +242,21 @@ void tlb_flush_page_by_mmuidx(CPUState *cpu, > target_ulong addr, ...) > tb_flush_jmp_cache(cpu, addr); > } > > +static void tlb_flush_page_async_work(CPUState *cpu, void *opaque) > +{ > + tlb_flush_page(cpu, GPOINTER_TO_UINT(opaque)); > +} > + > +void tlb_flush_page_all(target_ulong addr) > +{ > + CPUState *cpu; > + > + CPU_FOREACH(cpu) { > + async_run_on_cpu(cpu, tlb_flush_page_async_work, > + GUINT_TO_POINTER(addr)); > + } > +} > + > /* update the TLBs so that writes to code in the virtual page 'addr' > can be detected */ > void tlb_protect_code(ram_addr_t ram_addr) > diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h > index 9144ee0..f695577 100644 > --- a/include/exec/exec-all.h > +++ b/include/exec/exec-all.h > @@ -190,6 +190,7 @@ void tlb_set_page(CPUState *cpu, target_ulong vaddr, > void tb_invalidate_phys_addr(AddressSpace *as, hwaddr addr); > void probe_write(CPUArchState *env, target_ulong addr, int mmu_idx, > uintptr_t retaddr); > +void tlb_flush_page_all(target_ulong addr); > #else > static inline void tlb_flush_page(CPUState *cpu, target_ulong addr) > { tlb_flush_by_mmuidx() and tlb_flush_page_by_mmuidx() want to be safe as well. Kind regards, Sergey