From: Jan Kiszka <jan.kis...@siemens.com> This finally allows TCG to benefit from the iothread introduction: Drop the global mutex while running pure TCG CPU code. Reacquire the lock when entering MMIO or PIO emulation, or when leaving the TCG loop.
We have to revert a few optimization for the current TCG threading model, namely kicking the TCG thread in qemu_mutex_lock_iothread and not kicking it in qemu_cpu_kick. We also need to disable RAM block reordering until we have a more efficient locking mechanism at hand. I'm pretty sure some cases are still broken, definitely SMP (we no longer perform round-robin scheduling "by chance"). Still, a Linux x86 UP guest and my Musicpal ARM model boot fine here. These numbers demonstrate where we gain something: 20338 jan 20 0 331m 75m 6904 R 99 0.9 0:50.95 qemu-system-arm 20337 jan 20 0 331m 75m 6904 S 20 0.9 0:26.50 qemu-system-arm The guest CPU was fully loaded, but the iothread could still run mostly independent on a second core. Without the patch we don't get beyond 32206 jan 20 0 330m 73m 7036 R 82 0.9 1:06.00 qemu-system-arm 32204 jan 20 0 330m 73m 7036 S 21 0.9 0:17.03 qemu-system-arm We don't benefit significantly, though, when the guest is not fully loading a host CPU. Note that this patch depends on http://thread.gmane.org/gmane.comp.emulators.qemu/118657 Changes from Fred Konrad: * Rebase on the current HEAD. * Fixes a deadlock in qemu_devices_reset(). --- cpus.c | 19 +++++++------------ cputlb.c | 5 +++++ exec.c | 25 +++++++++++++++++++++++++ softmmu_template.h | 6 ++++++ target-i386/misc_helper.c | 27 ++++++++++++++++++++++++--- translate-all.c | 2 ++ vl.c | 6 ++++++ 7 files changed, 75 insertions(+), 15 deletions(-) diff --git a/cpus.c b/cpus.c index 91a48f2..f10c94d 100644 --- a/cpus.c +++ b/cpus.c @@ -1017,7 +1017,7 @@ static void *qemu_tcg_cpu_thread_fn(void *arg) qemu_tcg_init_cpu_signals(); qemu_thread_get_self(cpu->thread); - qemu_mutex_lock(&qemu_global_mutex); + qemu_mutex_lock_iothread(); CPU_FOREACH(cpu) { cpu->thread_id = qemu_get_thread_id(); cpu->created = true; @@ -1125,17 +1125,7 @@ static bool qemu_in_vcpu_thread(void) void qemu_mutex_lock_iothread(void) { - if (!tcg_enabled()) { - qemu_mutex_lock(&qemu_global_mutex); - } else { - iothread_requesting_mutex = true; - if (qemu_mutex_trylock(&qemu_global_mutex)) { - qemu_cpu_kick_thread(first_cpu); - qemu_mutex_lock(&qemu_global_mutex); - } - iothread_requesting_mutex = false; - qemu_cond_broadcast(&qemu_io_proceeded_cond); - } + qemu_mutex_lock(&qemu_global_mutex); } void qemu_mutex_unlock_iothread(void) @@ -1356,7 +1346,12 @@ static int tcg_cpu_exec(CPUArchState *env) cpu->icount_decr.u16.low = decr; cpu->icount_extra = count; } + + qemu_mutex_unlock_iothread(); + ret = cpu_exec(env); + + qemu_mutex_lock_iothread(); #ifdef CONFIG_PROFILER qemu_time += profile_getclock() - ti; #endif diff --git a/cputlb.c b/cputlb.c index 3b271d4..4a7e634 100644 --- a/cputlb.c +++ b/cputlb.c @@ -30,6 +30,9 @@ #include "exec/ram_addr.h" #include "tcg/tcg.h" +void qemu_mutex_lock_iothread(void); +void qemu_mutex_unlock_iothread(void); + //#define DEBUG_TLB //#define DEBUG_TLB_CHECK @@ -125,8 +128,10 @@ void tlb_flush_page(CPUState *cpu, target_ulong addr) can be detected */ void tlb_protect_code(ram_addr_t ram_addr) { + qemu_mutex_lock_iothread(); cpu_physical_memory_reset_dirty(ram_addr, TARGET_PAGE_SIZE, DIRTY_MEMORY_CODE); + qemu_mutex_unlock_iothread(); } /* update the TLB so that writes in physical page 'phys_addr' are no longer diff --git a/exec.c b/exec.c index 081818e..705d451 100644 --- a/exec.c +++ b/exec.c @@ -1786,6 +1786,7 @@ static void check_watchpoint(int offset, int len, int flags) } wp->hitaddr = vaddr; if (!cpu->watchpoint_hit) { + qemu_mutex_unlock_iothread(); cpu->watchpoint_hit = wp; tb_check_watchpoint(cpu); if (wp->flags & BP_STOP_BEFORE_ACCESS) { @@ -2557,6 +2558,7 @@ static inline uint32_t ldl_phys_internal(AddressSpace *as, hwaddr addr, mr = address_space_translate(as, addr, &addr1, &l, false); if (l < 4 || !memory_access_is_direct(mr, false)) { /* I/O case */ + qemu_mutex_lock_iothread(); io_mem_read(mr, addr1, &val, 4); #if defined(TARGET_WORDS_BIGENDIAN) if (endian == DEVICE_LITTLE_ENDIAN) { @@ -2567,6 +2569,7 @@ static inline uint32_t ldl_phys_internal(AddressSpace *as, hwaddr addr, val = bswap32(val); } #endif + qemu_mutex_unlock_iothread(); } else { /* RAM case */ ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr) @@ -2616,6 +2619,7 @@ static inline uint64_t ldq_phys_internal(AddressSpace *as, hwaddr addr, false); if (l < 8 || !memory_access_is_direct(mr, false)) { /* I/O case */ + qemu_mutex_lock_iothread(); io_mem_read(mr, addr1, &val, 8); #if defined(TARGET_WORDS_BIGENDIAN) if (endian == DEVICE_LITTLE_ENDIAN) { @@ -2626,6 +2630,7 @@ static inline uint64_t ldq_phys_internal(AddressSpace *as, hwaddr addr, val = bswap64(val); } #endif + qemu_mutex_unlock_iothread(); } else { /* RAM case */ ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr) @@ -2683,6 +2688,7 @@ static inline uint32_t lduw_phys_internal(AddressSpace *as, hwaddr addr, false); if (l < 2 || !memory_access_is_direct(mr, false)) { /* I/O case */ + qemu_mutex_lock_iothread(); io_mem_read(mr, addr1, &val, 2); #if defined(TARGET_WORDS_BIGENDIAN) if (endian == DEVICE_LITTLE_ENDIAN) { @@ -2693,6 +2699,7 @@ static inline uint32_t lduw_phys_internal(AddressSpace *as, hwaddr addr, val = bswap16(val); } #endif + qemu_mutex_unlock_iothread(); } else { /* RAM case */ ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr) @@ -2741,7 +2748,9 @@ void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val) mr = address_space_translate(as, addr, &addr1, &l, true); if (l < 4 || !memory_access_is_direct(mr, true)) { + qemu_mutex_lock_iothread(); io_mem_write(mr, addr1, val, 4); + qemu_mutex_unlock_iothread(); } else { addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK; ptr = qemu_get_ram_ptr(addr1); @@ -2749,10 +2758,12 @@ void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val) if (unlikely(in_migration)) { if (cpu_physical_memory_is_clean(addr1)) { + qemu_mutex_lock_iothread(); /* invalidate code */ tb_invalidate_phys_page_range(addr1, addr1 + 4, 0); /* set dirty bit */ cpu_physical_memory_set_dirty_range_nocode(addr1, 4); + qemu_mutex_unlock_iothread(); } } } @@ -2780,7 +2791,9 @@ static inline void stl_phys_internal(AddressSpace *as, val = bswap32(val); } #endif + qemu_mutex_lock_iothread(); io_mem_write(mr, addr1, val, 4); + qemu_mutex_unlock_iothread(); } else { /* RAM case */ addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK; @@ -2796,7 +2809,9 @@ static inline void stl_phys_internal(AddressSpace *as, stl_p(ptr, val); break; } + qemu_mutex_lock_iothread(); invalidate_and_set_dirty(addr1, 4); + qemu_mutex_unlock_iothread(); } } @@ -2843,7 +2858,9 @@ static inline void stw_phys_internal(AddressSpace *as, val = bswap16(val); } #endif + qemu_mutex_lock_iothread(); io_mem_write(mr, addr1, val, 2); + qemu_mutex_unlock_iothread(); } else { /* RAM case */ addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK; @@ -2859,7 +2876,9 @@ static inline void stw_phys_internal(AddressSpace *as, stw_p(ptr, val); break; } + qemu_mutex_lock_iothread(); invalidate_and_set_dirty(addr1, 2); + qemu_mutex_unlock_iothread(); } } @@ -2881,20 +2900,26 @@ void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val) /* XXX: optimize */ void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val) { + qemu_mutex_lock_iothread(); val = tswap64(val); address_space_rw(as, addr, (void *) &val, 8, 1); + qemu_mutex_unlock_iothread(); } void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val) { + qemu_mutex_lock_iothread(); val = cpu_to_le64(val); address_space_rw(as, addr, (void *) &val, 8, 1); + qemu_mutex_unlock_iothread(); } void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val) { + qemu_mutex_lock_iothread(); val = cpu_to_be64(val); address_space_rw(as, addr, (void *) &val, 8, 1); + qemu_mutex_unlock_iothread(); } /* virtual memory access for debug (includes writing to ROM) */ diff --git a/softmmu_template.h b/softmmu_template.h index 6b4e615..e3c6dc8 100644 --- a/softmmu_template.h +++ b/softmmu_template.h @@ -157,8 +157,12 @@ static inline DATA_TYPE glue(io_read, SUFFIX)(CPUArchState *env, cpu_io_recompile(cpu, retaddr); } + qemu_mutex_lock_iothread(); + cpu->mem_io_vaddr = addr; io_mem_read(mr, physaddr, &val, 1 << SHIFT); + + qemu_mutex_unlock_iothread(); return val; } #endif @@ -376,9 +380,11 @@ static inline void glue(io_write, SUFFIX)(CPUArchState *env, cpu_io_recompile(cpu, retaddr); } + qemu_mutex_lock_iothread(); cpu->mem_io_vaddr = addr; cpu->mem_io_pc = retaddr; io_mem_write(mr, physaddr, val, 1 << SHIFT); + qemu_mutex_unlock_iothread(); } void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, diff --git a/target-i386/misc_helper.c b/target-i386/misc_helper.c index 4aaf1e4..0a953a9 100644 --- a/target-i386/misc_helper.c +++ b/target-i386/misc_helper.c @@ -24,32 +24,53 @@ void helper_outb(uint32_t port, uint32_t data) { + qemu_mutex_lock_iothread(); cpu_outb(port, data & 0xff); + qemu_mutex_unlock_iothread(); } target_ulong helper_inb(uint32_t port) { - return cpu_inb(port); + target_ulong ret; + + qemu_mutex_lock_iothread(); + ret = cpu_inb(port); + qemu_mutex_unlock_iothread(); + return ret; } void helper_outw(uint32_t port, uint32_t data) { + qemu_mutex_lock_iothread(); cpu_outw(port, data & 0xffff); + qemu_mutex_unlock_iothread(); } target_ulong helper_inw(uint32_t port) { - return cpu_inw(port); + target_ulong ret; + + qemu_mutex_lock_iothread(); + ret = cpu_inw(port); + qemu_mutex_unlock_iothread(); + return ret; } void helper_outl(uint32_t port, uint32_t data) { + qemu_mutex_lock_iothread(); cpu_outl(port, data); + qemu_mutex_unlock_iothread(); } target_ulong helper_inl(uint32_t port) { - return cpu_inl(port); + target_ulong ret; + + qemu_mutex_lock_iothread(); + ret = cpu_inl(port); + qemu_mutex_unlock_iothread(); + return ret; } void helper_into(CPUX86State *env, int next_eip_addend) diff --git a/translate-all.c b/translate-all.c index 68505c0..a986d61 100644 --- a/translate-all.c +++ b/translate-all.c @@ -1240,6 +1240,7 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end, #endif #ifdef TARGET_HAS_PRECISE_SMC if (current_tb_modified) { + qemu_mutex_unlock_iothread(); /* we generate a block containing just the instruction modifying the memory. It will ensure that it cannot modify itself */ @@ -1337,6 +1338,7 @@ static void tb_invalidate_phys_page(tb_page_addr_t addr, p->first_tb[current_cpu->cpu_index] = NULL; #ifdef TARGET_HAS_PRECISE_SMC if (current_tb_modified) { + qemu_mutex_unlock_iothread(); /* we generate a block containing just the instruction modifying the memory. It will ensure that it cannot modify itself */ diff --git a/vl.c b/vl.c index 7786b2f..160e4a8 100644 --- a/vl.c +++ b/vl.c @@ -1608,10 +1608,16 @@ void qemu_devices_reset(void) { QEMUResetEntry *re, *nre; + /* + * Some device's reset needs to grab the global_mutex. So just release it + * here. + */ + qemu_mutex_unlock_iothread(); /* reset all devices */ QTAILQ_FOREACH_SAFE(re, &reset_handlers, entry, nre) { re->func(re->opaque); } + qemu_mutex_lock_iothread(); } void qemu_system_reset(bool report) -- 1.9.0