From: Jan Kiszka <jan.kis...@siemens.com> This opens the path to get rid of the iothread lock on vmexits in KVM mode. On x86, the in-kernel irqchips has to be used because we otherwise need to synchronize APIC and other per-cpu state accesses that could be changed concurrently.
s390x and ARM should be fine without specific locking as their pre/post-run callbacks are empty. MIPS and POWER require locking for the pre-run callback. Signed-off-by: Jan Kiszka <jan.kis...@siemens.com> Signed-off-by: Paolo Bonzini <pbonz...@redhat.com> --- kvm-all.c | 14 ++++++++++++-- target-i386/kvm.c | 18 ++++++++++++++++++ target-mips/kvm.c | 4 ++++ target-ppc/kvm.c | 4 ++++ 4 files changed, 38 insertions(+), 2 deletions(-) diff --git a/kvm-all.c b/kvm-all.c index 55025cc..8da1deb 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -1765,6 +1765,8 @@ int kvm_cpu_exec(CPUState *cpu) return EXCP_HLT; } + qemu_mutex_unlock_iothread(); + do { if (cpu->kvm_vcpu_dirty) { kvm_arch_put_registers(cpu, KVM_PUT_RUNTIME_STATE); @@ -1781,11 +1783,9 @@ int kvm_cpu_exec(CPUState *cpu) */ qemu_cpu_kick_self(); } - qemu_mutex_unlock_iothread(); run_ret = kvm_vcpu_ioctl(cpu, KVM_RUN, 0); - qemu_mutex_lock_iothread(); kvm_arch_post_run(cpu, run); if (run_ret < 0) { @@ -1804,19 +1804,23 @@ int kvm_cpu_exec(CPUState *cpu) switch (run->exit_reason) { case KVM_EXIT_IO: DPRINTF("handle_io\n"); + qemu_mutex_lock_iothread(); kvm_handle_io(run->io.port, (uint8_t *)run + run->io.data_offset, run->io.direction, run->io.size, run->io.count); + qemu_mutex_unlock_iothread(); ret = 0; break; case KVM_EXIT_MMIO: DPRINTF("handle_mmio\n"); + qemu_mutex_lock_iothread(); cpu_physical_memory_rw(run->mmio.phys_addr, run->mmio.data, run->mmio.len, run->mmio.is_write); + qemu_mutex_unlock_iothread(); ret = 0; break; case KVM_EXIT_IRQ_WINDOW_OPEN: @@ -1825,7 +1829,9 @@ int kvm_cpu_exec(CPUState *cpu) break; case KVM_EXIT_SHUTDOWN: DPRINTF("shutdown\n"); + qemu_mutex_lock_iothread(); qemu_system_reset_request(); + qemu_mutex_unlock_iothread(); ret = EXCP_INTERRUPT; break; case KVM_EXIT_UNKNOWN: @@ -1854,11 +1860,15 @@ int kvm_cpu_exec(CPUState *cpu) break; default: DPRINTF("kvm_arch_handle_exit\n"); + qemu_mutex_lock_iothread(); ret = kvm_arch_handle_exit(cpu, run); + qemu_mutex_unlock_iothread(); break; } } while (ret == 0); + qemu_mutex_lock_iothread(); + if (ret < 0) { cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_CODE); vm_stop(RUN_STATE_INTERNAL_ERROR); diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 41d09e5..94cff8c 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -2191,7 +2191,10 @@ void kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run) /* Inject NMI */ if (cpu->interrupt_request & CPU_INTERRUPT_NMI) { + qemu_mutex_lock_iothread(); cpu->interrupt_request &= ~CPU_INTERRUPT_NMI; + qemu_mutex_unlock_iothread(); + DPRINTF("injected NMI\n"); ret = kvm_vcpu_ioctl(cpu, KVM_NMI); if (ret < 0) { @@ -2200,6 +2203,10 @@ void kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run) } } + if (!kvm_irqchip_in_kernel()) { + qemu_mutex_lock_iothread(); + } + /* Force the VCPU out of its inner loop to process any INIT requests * or (for userspace APIC, but it is cheap to combine the checks here) * pending TPR access reports. @@ -2243,6 +2250,8 @@ void kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run) DPRINTF("setting tpr\n"); run->cr8 = cpu_get_apic_tpr(x86_cpu->apic_state); + + qemu_mutex_unlock_iothread(); } } @@ -2256,8 +2265,17 @@ void kvm_arch_post_run(CPUState *cpu, struct kvm_run *run) } else { env->eflags &= ~IF_MASK; } + + /* We need to protect the apic state against concurrent accesses from + * different threads in case the userspace irqchip is used. */ + if (!kvm_irqchip_in_kernel()) { + qemu_mutex_lock_iothread(); + } cpu_set_apic_tpr(x86_cpu->apic_state, run->cr8); cpu_set_apic_base(x86_cpu->apic_state, run->apic_base); + if (!kvm_irqchip_in_kernel()) { + qemu_mutex_unlock_iothread(); + } } int kvm_arch_process_async_events(CPUState *cs) diff --git a/target-mips/kvm.c b/target-mips/kvm.c index 4d1f7ea..7f02be7 100644 --- a/target-mips/kvm.c +++ b/target-mips/kvm.c @@ -98,6 +98,8 @@ void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run) int r; struct kvm_mips_interrupt intr; + qemu_mutex_lock_iothread(); + if ((cs->interrupt_request & CPU_INTERRUPT_HARD) && cpu_mips_io_interrupts_pending(cpu)) { intr.cpu = -1; @@ -108,6 +110,8 @@ void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run) __func__, cs->cpu_index, intr.irq); } } + + qemu_mutex_unlock_iothread(); } void kvm_arch_post_run(CPUState *cs, struct kvm_run *run) diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c index 12328a4..ce2498a 100644 --- a/target-ppc/kvm.c +++ b/target-ppc/kvm.c @@ -1241,6 +1241,8 @@ void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run) int r; unsigned irq; + qemu_mutex_lock_iothread(); + /* PowerPC QEMU tracks the various core input pins (interrupt, critical * interrupt, reset, etc) in PPC-specific env->irq_input_state. */ if (!cap_interrupt_level && @@ -1268,6 +1270,8 @@ void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run) /* We don't know if there are more interrupts pending after this. However, * the guest will return to userspace in the course of handling this one * anyways, so we will get a chance to deliver the rest. */ + + qemu_mutex_unlock_iothread(); } void kvm_arch_post_run(CPUState *cpu, struct kvm_run *run) -- 2.3.0