[PATCH v2 4/5] KVM: x86: Add kvm_skip_emulated_instruction and use it.

Kyle Huey Tue, 29 Nov 2016 12:43:15 -0800

kvm_skip_emulated_instruction calls both
kvm_x86_ops->skip_emulated_instruction and kvm_vcpu_check_singlestep,
skipping the emulated instruction and generating a trap if necessary.


Replacing skip_emulated_instruction calls with
kvm_skip_emulated_instruction is straightforward, except for:

- ICEBP, which is already inside a trap, so avoid triggering another trap.
- Instructions that can trigger exits to userspace, such as the IO insns,
  MOVs to CR8, and HALT. If kvm_skip_emulated_instruction does trigger a
  KVM_GUESTDBG_SINGLESTEP exit, and the handling code for
  IN/OUT/MOV CR8/HALT also triggers an exit to userspace, the latter will
  take precedence. The singlestep will be triggered again on the next
  instruction, which is the current behavior.
- Task switch instructions which would require additional handling (e.g.
  the task switch bit) and are instead left alone.
- Cases where VMLAUNCH/VMRESUME do not proceed to the next instruction,
  which do not trigger singlestep traps as mentioned previously.

Signed-off-by: Kyle Huey <kh...@kylehuey.com>
---
 arch/x86/include/asm/kvm_host.h |   3 +-
 arch/x86/kvm/cpuid.c            |   3 +-
 arch/x86/kvm/svm.c              |  11 +--
 arch/x86/kvm/vmx.c              | 177 ++++++++++++++++------------------------
 arch/x86/kvm/x86.c              |  33 ++++++--
 5 files changed, 103 insertions(+), 124 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 80bad5c..8d15870 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1363,17 +1363,18 @@ void kvm_arch_async_page_not_present(struct kvm_vcpu 
*vcpu,
                                     struct kvm_async_pf *work);
 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
                                 struct kvm_async_pf *work);
 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
                               struct kvm_async_pf *work);
 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu);
 extern bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn);
 
-void kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err);
+int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu);
+int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err);
 
 int kvm_is_in_guest(void);
 
 int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size);
 int x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size);
 bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu);
 bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu);
 
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 07cc629..dc2685e 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -885,12 +885,11 @@ int kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
 
        eax = kvm_register_read(vcpu, VCPU_REGS_RAX);
        ecx = kvm_register_read(vcpu, VCPU_REGS_RCX);
        kvm_cpuid(vcpu, &eax, &ebx, &ecx, &edx);
        kvm_register_write(vcpu, VCPU_REGS_RAX, eax);
        kvm_register_write(vcpu, VCPU_REGS_RBX, ebx);
        kvm_register_write(vcpu, VCPU_REGS_RCX, ecx);
        kvm_register_write(vcpu, VCPU_REGS_RDX, edx);
-       kvm_x86_ops->skip_emulated_instruction(vcpu);
-       return 1;
+       return kvm_skip_emulated_instruction(vcpu);
 }
 EXPORT_SYMBOL_GPL(kvm_emulate_cpuid);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 5bdffcd..08a4d3a 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -3146,18 +3146,17 @@ static int skinit_interception(struct vcpu_svm *svm)
        trace_kvm_skinit(svm->vmcb->save.rip, kvm_register_read(&svm->vcpu, 
VCPU_REGS_RAX));
 
        kvm_queue_exception(&svm->vcpu, UD_VECTOR);
        return 1;
 }
 
 static int wbinvd_interception(struct vcpu_svm *svm)
 {
-       kvm_emulate_wbinvd(&svm->vcpu);
-       return 1;
+       return kvm_emulate_wbinvd(&svm->vcpu);
 }
 
 static int xsetbv_interception(struct vcpu_svm *svm)
 {
        u64 new_bv = kvm_read_edx_eax(&svm->vcpu);
        u32 index = kvm_register_read(&svm->vcpu, VCPU_REGS_RCX);
 
        if (kvm_set_xcr(&svm->vcpu, index, new_bv) == 0) {
@@ -3270,19 +3269,17 @@ static int emulate_on_interception(struct vcpu_svm *svm)
 static int rdpmc_interception(struct vcpu_svm *svm)
 {
        int err;
 
        if (!static_cpu_has(X86_FEATURE_NRIPS))
                return emulate_on_interception(svm);
 
        err = kvm_rdpmc(&svm->vcpu);
-       kvm_complete_insn_gp(&svm->vcpu, err);
-
-       return 1;
+       return kvm_complete_insn_gp(&svm->vcpu, err);
 }
 
 static bool check_selective_cr0_intercepted(struct vcpu_svm *svm,
                                            unsigned long val)
 {
        unsigned long cr0 = svm->vcpu.arch.cr0;
        bool ret = false;
        u64 intercept;
@@ -3369,19 +3366,17 @@ static int cr_interception(struct vcpu_svm *svm)
                        break;
                default:
                        WARN(1, "unhandled read from CR%d", cr);
                        kvm_queue_exception(&svm->vcpu, UD_VECTOR);
                        return 1;
                }
                kvm_register_write(&svm->vcpu, reg, val);
        }
-       kvm_complete_insn_gp(&svm->vcpu, err);
-
-       return 1;
+       return kvm_complete_insn_gp(&svm->vcpu, err);
 }
 
 static int dr_interception(struct vcpu_svm *svm)
 {
        int reg, dr;
        unsigned long val;
 
        if (svm->vcpu.guest_debug == 0) {
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index f4f6304..16a144d 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -5551,33 +5551,38 @@ static int handle_triple_fault(struct kvm_vcpu *vcpu)
 {
        vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
        return 0;
 }
 
 static int handle_io(struct kvm_vcpu *vcpu)
 {
        unsigned long exit_qualification;
-       int size, in, string;
+       int size, in, string, ret;
        unsigned port;
 
        exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
        string = (exit_qualification & 16) != 0;
        in = (exit_qualification & 8) != 0;
 
        ++vcpu->stat.io_exits;
 
        if (string || in)
                return emulate_instruction(vcpu, 0) == EMULATE_DONE;
 
        port = exit_qualification >> 16;
        size = (exit_qualification & 7) + 1;
-       skip_emulated_instruction(vcpu);
 
-       return kvm_fast_pio_out(vcpu, size, port);
+       ret = kvm_skip_emulated_instruction(vcpu);
+
+       /*
+        * TODO: we might be squashing a KVM_GUESTDBG_SINGLESTEP-triggered
+        * KVM_EXIT_DEBUG here.
+        */
+       return kvm_fast_pio_out(vcpu, size, port) && ret;
 }
 
 static void
 vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
 {
        /*
         * Patch in the VMCALL instruction:
         */
@@ -5665,80 +5670,79 @@ static void handle_clts(struct kvm_vcpu *vcpu)
 }
 
 static int handle_cr(struct kvm_vcpu *vcpu)
 {
        unsigned long exit_qualification, val;
        int cr;
        int reg;
        int err;
+       int ret;
 
        exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
        cr = exit_qualification & 15;
        reg = (exit_qualification >> 8) & 15;
        switch ((exit_qualification >> 4) & 3) {
        case 0: /* mov to cr */
                val = kvm_register_readl(vcpu, reg);
                trace_kvm_cr_write(cr, val);
                switch (cr) {
                case 0:
                        err = handle_set_cr0(vcpu, val);
-                       kvm_complete_insn_gp(vcpu, err);
-                       return 1;
+                       return kvm_complete_insn_gp(vcpu, err);
                case 3:
                        err = kvm_set_cr3(vcpu, val);
-                       kvm_complete_insn_gp(vcpu, err);
-                       return 1;
+                       return kvm_complete_insn_gp(vcpu, err);
                case 4:
                        err = handle_set_cr4(vcpu, val);
-                       kvm_complete_insn_gp(vcpu, err);
-                       return 1;
+                       return kvm_complete_insn_gp(vcpu, err);
                case 8: {
                                u8 cr8_prev = kvm_get_cr8(vcpu);
                                u8 cr8 = (u8)val;
                                err = kvm_set_cr8(vcpu, cr8);
-                               kvm_complete_insn_gp(vcpu, err);
+                               ret = kvm_complete_insn_gp(vcpu, err);
                                if (lapic_in_kernel(vcpu))
-                                       return 1;
+                                       return ret;
                                if (cr8_prev <= cr8)
-                                       return 1;
+                                       return ret;
+                               /*
+                                * TODO: we might be squashing a
+                                * KVM_GUESTDBG_SINGLESTEP-triggered
+                                * KVM_EXIT_DEBUG here.
+                                */
                                vcpu->run->exit_reason = KVM_EXIT_SET_TPR;
                                return 0;
                        }
                }
                break;
        case 2: /* clts */
                handle_clts(vcpu);
                trace_kvm_cr_write(0, kvm_read_cr0(vcpu));
                vmx_fpu_activate(vcpu);
-               skip_emulated_instruction(vcpu);
-               return 1;
+               return kvm_skip_emulated_instruction(vcpu);
        case 1: /*mov from cr*/
                switch (cr) {
                case 3:
                        val = kvm_read_cr3(vcpu);
                        kvm_register_write(vcpu, reg, val);
                        trace_kvm_cr_read(cr, val);
-                       skip_emulated_instruction(vcpu);
-                       return 1;
+                       return kvm_skip_emulated_instruction(vcpu);
                case 8:
                        val = kvm_get_cr8(vcpu);
                        kvm_register_write(vcpu, reg, val);
                        trace_kvm_cr_read(cr, val);
-                       skip_emulated_instruction(vcpu);
-                       return 1;
+                       return kvm_skip_emulated_instruction(vcpu);
                }
                break;
        case 3: /* lmsw */
                val = (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f;
                trace_kvm_cr_write(0, (kvm_read_cr0(vcpu) & ~0xful) | val);
                kvm_lmsw(vcpu, val);
 
-               skip_emulated_instruction(vcpu);
-               return 1;
+               return kvm_skip_emulated_instruction(vcpu);
        default:
                break;
        }
        vcpu->run->exit_reason = 0;
        vcpu_unimpl(vcpu, "unhandled control register: op %d cr %d\n",
               (int)(exit_qualification >> 4) & 3, cr);
        return 0;
 }
@@ -5799,18 +5803,17 @@ static int handle_dr(struct kvm_vcpu *vcpu)
 
                if (kvm_get_dr(vcpu, dr, &val))
                        return 1;
                kvm_register_write(vcpu, reg, val);
        } else
                if (kvm_set_dr(vcpu, dr, kvm_register_readl(vcpu, reg)))
                        return 1;
 
-       skip_emulated_instruction(vcpu);
-       return 1;
+       return kvm_skip_emulated_instruction(vcpu);
 }
 
 static u64 vmx_get_dr6(struct kvm_vcpu *vcpu)
 {
        return vcpu->arch.dr6;
 }
 
 static void vmx_set_dr6(struct kvm_vcpu *vcpu, unsigned long val)
@@ -5853,18 +5856,17 @@ static int handle_rdmsr(struct kvm_vcpu *vcpu)
                return 1;
        }
 
        trace_kvm_msr_read(ecx, msr_info.data);
 
        /* FIXME: handling of bits 32:63 of rax, rdx */
        vcpu->arch.regs[VCPU_REGS_RAX] = msr_info.data & -1u;
        vcpu->arch.regs[VCPU_REGS_RDX] = (msr_info.data >> 32) & -1u;
-       skip_emulated_instruction(vcpu);
-       return 1;
+       return kvm_skip_emulated_instruction(vcpu);
 }
 
 static int handle_wrmsr(struct kvm_vcpu *vcpu)
 {
        struct msr_data msr;
        u32 ecx = vcpu->arch.regs[VCPU_REGS_RCX];
        u64 data = (vcpu->arch.regs[VCPU_REGS_RAX] & -1u)
                | ((u64)(vcpu->arch.regs[VCPU_REGS_RDX] & -1u) << 32);
@@ -5874,18 +5876,17 @@ static int handle_wrmsr(struct kvm_vcpu *vcpu)
        msr.host_initiated = false;
        if (kvm_set_msr(vcpu, &msr) != 0) {
                trace_kvm_msr_write_ex(ecx, data);
                kvm_inject_gp(vcpu, 0);
                return 1;
        }
 
        trace_kvm_msr_write(ecx, data);
-       skip_emulated_instruction(vcpu);
-       return 1;
+       return kvm_skip_emulated_instruction(vcpu);
 }
 
 static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu)
 {
        kvm_make_request(KVM_REQ_EVENT, vcpu);
        return 1;
 }
 
@@ -5919,56 +5920,52 @@ static int handle_invd(struct kvm_vcpu *vcpu)
        return emulate_instruction(vcpu, 0) == EMULATE_DONE;
 }
 
 static int handle_invlpg(struct kvm_vcpu *vcpu)
 {
        unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
 
        kvm_mmu_invlpg(vcpu, exit_qualification);
-       skip_emulated_instruction(vcpu);
-       return 1;
+       return kvm_skip_emulated_instruction(vcpu);
 }
 
 static int handle_rdpmc(struct kvm_vcpu *vcpu)
 {
        int err;
 
        err = kvm_rdpmc(vcpu);
-       kvm_complete_insn_gp(vcpu, err);
-
-       return 1;
+       return kvm_complete_insn_gp(vcpu, err);
 }
 
 static int handle_wbinvd(struct kvm_vcpu *vcpu)
 {
-       kvm_emulate_wbinvd(vcpu);
-       return 1;
+       return kvm_emulate_wbinvd(vcpu);
 }
 
 static int handle_xsetbv(struct kvm_vcpu *vcpu)
 {
        u64 new_bv = kvm_read_edx_eax(vcpu);
        u32 index = kvm_register_read(vcpu, VCPU_REGS_RCX);
 
        if (kvm_set_xcr(vcpu, index, new_bv) == 0)
-               skip_emulated_instruction(vcpu);
+               return kvm_skip_emulated_instruction(vcpu);
        return 1;
 }
 
 static int handle_xsaves(struct kvm_vcpu *vcpu)
 {
-       skip_emulated_instruction(vcpu);
+       kvm_skip_emulated_instruction(vcpu);
        WARN(1, "this should never happen\n");
        return 1;
 }
 
 static int handle_xrstors(struct kvm_vcpu *vcpu)
 {
-       skip_emulated_instruction(vcpu);
+       kvm_skip_emulated_instruction(vcpu);
        WARN(1, "this should never happen\n");
        return 1;
 }
 
 static int handle_apic_access(struct kvm_vcpu *vcpu)
 {
        if (likely(fasteoi)) {
                unsigned long exit_qualification = 
vmcs_readl(EXIT_QUALIFICATION);
@@ -5979,18 +5976,17 @@ static int handle_apic_access(struct kvm_vcpu *vcpu)
                /*
                 * Sane guest uses MOV to write EOI, with written value
                 * not cared. So make a short-circuit here by avoiding
                 * heavy instruction emulation.
                 */
                if ((access_type == TYPE_LINEAR_APIC_INST_WRITE) &&
                    (offset == APIC_EOI)) {
                        kvm_lapic_set_eoi(vcpu);
-                       skip_emulated_instruction(vcpu);
-                       return 1;
+                       return kvm_skip_emulated_instruction(vcpu);
                }
        }
        return emulate_instruction(vcpu, 0) == EMULATE_DONE;
 }
 
 static int handle_apic_eoi_induced(struct kvm_vcpu *vcpu)
 {
        unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
@@ -6129,18 +6125,17 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
 static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
 {
        int ret;
        gpa_t gpa;
 
        gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
        if (!kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) {
                trace_kvm_fast_mmio(gpa);
-               skip_emulated_instruction(vcpu);
-               return 1;
+               return kvm_skip_emulated_instruction(vcpu);
        }
 
        ret = handle_mmio_page_fault(vcpu, gpa, true);
        if (likely(ret == RET_MMIO_PF_EMULATE))
                return x86_emulate_instruction(vcpu, gpa, 0, NULL, 0) ==
                                              EMULATE_DONE;
 
        if (unlikely(ret == RET_MMIO_PF_INVALID))
@@ -6503,25 +6498,22 @@ static __exit void hardware_unsetup(void)
  * exiting, so only get here on cpu with PAUSE-Loop-Exiting.
  */
 static int handle_pause(struct kvm_vcpu *vcpu)
 {
        if (ple_gap)
                grow_ple_window(vcpu);
 
        kvm_vcpu_on_spin(vcpu);
-       skip_emulated_instruction(vcpu);
-
-       return 1;
+       return kvm_skip_emulated_instruction(vcpu);
 }
 
 static int handle_nop(struct kvm_vcpu *vcpu)
 {
-       skip_emulated_instruction(vcpu);
-       return 1;
+       return kvm_skip_emulated_instruction(vcpu);
 }
 
 static int handle_mwait(struct kvm_vcpu *vcpu)
 {
        printk_once(KERN_WARNING "kvm: MWAIT instruction emulated as NOP!\n");
        return handle_nop(vcpu);
 }
 
@@ -6818,59 +6810,53 @@ static int nested_vmx_check_vmptr(struct kvm_vcpu 
*vcpu, int exit_reason,
                 *
                 * Note - IA32_VMX_BASIC[48] will never be 1
                 * for the nested case;
                 * which replaces physical address width with 32
                 *
                 */
                if (!PAGE_ALIGNED(vmptr) || (vmptr >> maxphyaddr)) {
                        nested_vmx_failInvalid(vcpu);
-                       skip_emulated_instruction(vcpu);
-                       return 1;
+                       return kvm_skip_emulated_instruction(vcpu);
                }
 
                page = nested_get_page(vcpu, vmptr);
                if (page == NULL ||
                    *(u32 *)kmap(page) != VMCS12_REVISION) {
                        nested_vmx_failInvalid(vcpu);
                        kunmap(page);
-                       skip_emulated_instruction(vcpu);
-                       return 1;
+                       return kvm_skip_emulated_instruction(vcpu);
                }
                kunmap(page);
                vmx->nested.vmxon_ptr = vmptr;
                break;
        case EXIT_REASON_VMCLEAR:
                if (!PAGE_ALIGNED(vmptr) || (vmptr >> maxphyaddr)) {
                        nested_vmx_failValid(vcpu,
                                             VMXERR_VMCLEAR_INVALID_ADDRESS);
-                       skip_emulated_instruction(vcpu);
-                       return 1;
+                       return kvm_skip_emulated_instruction(vcpu);
                }
 
                if (vmptr == vmx->nested.vmxon_ptr) {
                        nested_vmx_failValid(vcpu,
                                             VMXERR_VMCLEAR_VMXON_POINTER);
-                       skip_emulated_instruction(vcpu);
-                       return 1;
+                       return kvm_skip_emulated_instruction(vcpu);
                }
                break;
        case EXIT_REASON_VMPTRLD:
                if (!PAGE_ALIGNED(vmptr) || (vmptr >> maxphyaddr)) {
                        nested_vmx_failValid(vcpu,
                                             VMXERR_VMPTRLD_INVALID_ADDRESS);
-                       skip_emulated_instruction(vcpu);
-                       return 1;
+                       return kvm_skip_emulated_instruction(vcpu);
                }
 
                if (vmptr == vmx->nested.vmxon_ptr) {
                        nested_vmx_failValid(vcpu,
                                             VMXERR_VMCLEAR_VMXON_POINTER);
-                       skip_emulated_instruction(vcpu);
-                       return 1;
+                       return kvm_skip_emulated_instruction(vcpu);
                }
                break;
        default:
                return 1; /* shouldn't happen */
        }
 
        if (vmpointer)
                *vmpointer = vmptr;
@@ -6916,18 +6902,17 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
                return 1;
        }
 
        if (nested_vmx_check_vmptr(vcpu, EXIT_REASON_VMON, NULL))
                return 1;
 
        if (vmx->nested.vmxon) {
                nested_vmx_failValid(vcpu, VMXERR_VMXON_IN_VMX_ROOT_OPERATION);
-               skip_emulated_instruction(vcpu);
-               return 1;
+               return kvm_skip_emulated_instruction(vcpu);
        }
 
        if ((vmx->msr_ia32_feature_control & VMXON_NEEDED_FEATURES)
                        != VMXON_NEEDED_FEATURES) {
                kvm_inject_gp(vcpu, 0);
                return 1;
        }
 
@@ -6958,18 +6943,17 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
 
        hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC,
                     HRTIMER_MODE_REL_PINNED);
        vmx->nested.preemption_timer.function = vmx_preemption_timer_fn;
 
        vmx->nested.vmxon = true;
 
        nested_vmx_succeed(vcpu);
-       skip_emulated_instruction(vcpu);
-       return 1;
+       return kvm_skip_emulated_instruction(vcpu);
 
 out_shadow_vmcs:
        kfree(vmx->nested.cached_vmcs12);
 
 out_cached_vmcs12:
        free_page((unsigned long)vmx->nested.msr_bitmap);
 
 out_msr_bitmap:
@@ -7079,18 +7063,17 @@ static void free_nested(struct vcpu_vmx *vmx)
 
 /* Emulate the VMXOFF instruction */
 static int handle_vmoff(struct kvm_vcpu *vcpu)
 {
        if (!nested_vmx_check_permission(vcpu))
                return 1;
        free_nested(to_vmx(vcpu));
        nested_vmx_succeed(vcpu);
-       skip_emulated_instruction(vcpu);
-       return 1;
+       return kvm_skip_emulated_instruction(vcpu);
 }
 
 /* Emulate the VMCLEAR instruction */
 static int handle_vmclear(struct kvm_vcpu *vcpu)
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
        gpa_t vmptr;
        struct vmcs12 *vmcs12;
@@ -7120,18 +7103,17 @@ static int handle_vmclear(struct kvm_vcpu *vcpu)
        vmcs12 = kmap(page);
        vmcs12->launch_state = 0;
        kunmap(page);
        nested_release_page(page);
 
        nested_free_vmcs02(vmx, vmptr);
 
        nested_vmx_succeed(vcpu);
-       skip_emulated_instruction(vcpu);
-       return 1;
+       return kvm_skip_emulated_instruction(vcpu);
 }
 
 static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch);
 
 /* Emulate the VMLAUNCH instruction */
 static int handle_vmlaunch(struct kvm_vcpu *vcpu)
 {
        return nested_vmx_run(vcpu, true);
@@ -7335,28 +7317,25 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
        u64 field_value;
        unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
        u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
        gva_t gva = 0;
 
        if (!nested_vmx_check_permission(vcpu))
                return 1;
 
-       if (!nested_vmx_check_vmcs12(vcpu)) {
-               skip_emulated_instruction(vcpu);
-               return 1;
-       }
+       if (!nested_vmx_check_vmcs12(vcpu))
+               return kvm_skip_emulated_instruction(vcpu);
 
        /* Decode instruction info and find the field to read */
        field = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 28) & 
0xf));
        /* Read the field, zero-extended to a u64 field_value */
        if (vmcs12_read_any(vcpu, field, &field_value) < 0) {
                nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
-               skip_emulated_instruction(vcpu);
-               return 1;
+               return kvm_skip_emulated_instruction(vcpu);
        }
        /*
         * Now copy part of this value to register or memory, as requested.
         * Note that the number of bits actually copied is 32 or 64 depending
         * on the guest's mode (32 or 64 bit), not on the given field's length.
         */
        if (vmx_instruction_info & (1u << 10)) {
                kvm_register_writel(vcpu, (((vmx_instruction_info) >> 3) & 0xf),
@@ -7366,18 +7345,17 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
                                vmx_instruction_info, true, &gva))
                        return 1;
                /* _system ok, as nested_vmx_check_permission verified cpl=0 */
                kvm_write_guest_virt_system(&vcpu->arch.emulate_ctxt, gva,
                             &field_value, (is_long_mode(vcpu) ? 8 : 4), NULL);
        }
 
        nested_vmx_succeed(vcpu);
-       skip_emulated_instruction(vcpu);
-       return 1;
+       return kvm_skip_emulated_instruction(vcpu);
 }
 
 
 static int handle_vmwrite(struct kvm_vcpu *vcpu)
 {
        unsigned long field;
        gva_t gva;
        unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
@@ -7389,20 +7367,18 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
         * bits into the vmcs12 field.
         */
        u64 field_value = 0;
        struct x86_exception e;
 
        if (!nested_vmx_check_permission(vcpu))
                return 1;
 
-       if (!nested_vmx_check_vmcs12(vcpu)) {
-               skip_emulated_instruction(vcpu);
-               return 1;
-       }
+       if (!nested_vmx_check_vmcs12(vcpu))
+               return kvm_skip_emulated_instruction(vcpu);
 
        if (vmx_instruction_info & (1u << 10))
                field_value = kvm_register_readl(vcpu,
                        (((vmx_instruction_info) >> 3) & 0xf));
        else {
                if (get_vmx_mem_address(vcpu, exit_qualification,
                                vmx_instruction_info, false, &gva))
                        return 1;
@@ -7413,29 +7389,26 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
                }
        }
 
 
        field = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 28) & 
0xf));
        if (vmcs_field_readonly(field)) {
                nested_vmx_failValid(vcpu,
                        VMXERR_VMWRITE_READ_ONLY_VMCS_COMPONENT);
-               skip_emulated_instruction(vcpu);
-               return 1;
+               return kvm_skip_emulated_instruction(vcpu);
        }
 
        if (vmcs12_write_any(vcpu, field, field_value) < 0) {
                nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
-               skip_emulated_instruction(vcpu);
-               return 1;
+               return kvm_skip_emulated_instruction(vcpu);
        }
 
        nested_vmx_succeed(vcpu);
-       skip_emulated_instruction(vcpu);
-       return 1;
+       return kvm_skip_emulated_instruction(vcpu);
 }
 
 /* Emulate the VMPTRLD instruction */
 static int handle_vmptrld(struct kvm_vcpu *vcpu)
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
        gpa_t vmptr;
 
@@ -7446,27 +7419,25 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
                return 1;
 
        if (vmx->nested.current_vmptr != vmptr) {
                struct vmcs12 *new_vmcs12;
                struct page *page;
                page = nested_get_page(vcpu, vmptr);
                if (page == NULL) {
                        nested_vmx_failInvalid(vcpu);
-                       skip_emulated_instruction(vcpu);
-                       return 1;
+                       return kvm_skip_emulated_instruction(vcpu);
                }
                new_vmcs12 = kmap(page);
                if (new_vmcs12->revision_id != VMCS12_REVISION) {
                        kunmap(page);
                        nested_release_page_clean(page);
                        nested_vmx_failValid(vcpu,
                                VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID);
-                       skip_emulated_instruction(vcpu);
-                       return 1;
+                       return kvm_skip_emulated_instruction(vcpu);
                }
 
                nested_release_vmcs12(vmx);
                vmx->nested.current_vmptr = vmptr;
                vmx->nested.current_vmcs12 = new_vmcs12;
                vmx->nested.current_vmcs12_page = page;
                /*
                 * Load VMCS12 from guest memory since it is not already
@@ -7480,18 +7451,17 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
                                      SECONDARY_EXEC_SHADOW_VMCS);
                        vmcs_write64(VMCS_LINK_POINTER,
                                     __pa(vmx->vmcs01.shadow_vmcs));
                        vmx->nested.sync_shadow_vmcs = true;
                }
        }
 
        nested_vmx_succeed(vcpu);
-       skip_emulated_instruction(vcpu);
-       return 1;
+       return kvm_skip_emulated_instruction(vcpu);
 }
 
 /* Emulate the VMPTRST instruction */
 static int handle_vmptrst(struct kvm_vcpu *vcpu)
 {
        unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
        u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
        gva_t vmcs_gva;
@@ -7506,18 +7476,17 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu)
        /* ok to use *_system, as nested_vmx_check_permission verified cpl=0 */
        if (kvm_write_guest_virt_system(&vcpu->arch.emulate_ctxt, vmcs_gva,
                                 (void *)&to_vmx(vcpu)->nested.current_vmptr,
                                 sizeof(u64), &e)) {
                kvm_inject_page_fault(vcpu, &e);
                return 1;
        }
        nested_vmx_succeed(vcpu);
-       skip_emulated_instruction(vcpu);
-       return 1;
+       return kvm_skip_emulated_instruction(vcpu);
 }
 
 /* Emulate the INVEPT instruction */
 static int handle_invept(struct kvm_vcpu *vcpu)
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
        u32 vmx_instruction_info, types;
        unsigned long type;
@@ -7545,18 +7514,17 @@ static int handle_invept(struct kvm_vcpu *vcpu)
        vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
        type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf);
 
        types = (vmx->nested.nested_vmx_ept_caps >> VMX_EPT_EXTENT_SHIFT) & 6;
 
        if (type >= 32 || !(types & (1 << type))) {
                nested_vmx_failValid(vcpu,
                                VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
-               skip_emulated_instruction(vcpu);
-               return 1;
+               return kvm_skip_emulated_instruction(vcpu);
        }
 
        /* According to the Intel VMX instruction reference, the memory
         * operand is read even if it isn't needed (e.g., for type==global)
         */
        if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
                        vmx_instruction_info, false, &gva))
                return 1;
@@ -7577,18 +7545,17 @@ static int handle_invept(struct kvm_vcpu *vcpu)
                kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
                nested_vmx_succeed(vcpu);
                break;
        default:
                BUG_ON(1);
                break;
        }
 
-       skip_emulated_instruction(vcpu);
-       return 1;
+       return kvm_skip_emulated_instruction(vcpu);
 }
 
 static int handle_invvpid(struct kvm_vcpu *vcpu)
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
        u32 vmx_instruction_info;
        unsigned long type, types;
        gva_t gva;
@@ -7609,18 +7576,17 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
        type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf);
 
        types = (vmx->nested.nested_vmx_vpid_caps &
                        VMX_VPID_EXTENT_SUPPORTED_MASK) >> 8;
 
        if (type >= 32 || !(types & (1 << type))) {
                nested_vmx_failValid(vcpu,
                        VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
-               skip_emulated_instruction(vcpu);
-               return 1;
+               return kvm_skip_emulated_instruction(vcpu);
        }
 
        /* according to the intel vmx instruction reference, the memory
         * operand is read even if it isn't needed (e.g., for type==global)
         */
        if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
                        vmx_instruction_info, false, &gva))
                return 1;
@@ -7632,33 +7598,30 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
 
        switch (type) {
        case VMX_VPID_EXTENT_INDIVIDUAL_ADDR:
        case VMX_VPID_EXTENT_SINGLE_CONTEXT:
        case VMX_VPID_EXTENT_SINGLE_NON_GLOBAL:
                if (!vpid) {
                        nested_vmx_failValid(vcpu,
                                VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
-                       skip_emulated_instruction(vcpu);
-                       return 1;
+                       return kvm_skip_emulated_instruction(vcpu);
                }
                break;
        case VMX_VPID_EXTENT_ALL_CONTEXT:
                break;
        default:
                WARN_ON_ONCE(1);
-               skip_emulated_instruction(vcpu);
-               return 1;
+               return kvm_skip_emulated_instruction(vcpu);
        }
 
        __vmx_flush_tlb(vcpu, vmx->nested.vpid02);
        nested_vmx_succeed(vcpu);
 
-       skip_emulated_instruction(vcpu);
-       return 1;
+       return kvm_skip_emulated_instruction(vcpu);
 }
 
 static int handle_pml_full(struct kvm_vcpu *vcpu)
 {
        unsigned long exit_qualification;
 
        trace_kvm_pml_full(vcpu->vcpu_id);
 
@@ -10189,16 +10152,21 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool 
launch)
         * We're finally done with prerequisite checking, and can start with
         * the nested entry.
         */
 
        vmcs02 = nested_get_current_vmcs02(vmx);
        if (!vmcs02)
                return -ENOMEM;
 
+       /*
+        * After this point, the trap flag no longer triggers a singlestep trap
+        * on the vm entry instructions. Don't call
+        * kvm_skip_emulated_instruction.
+        */
        skip_emulated_instruction(vcpu);
        enter_guest_mode(vcpu);
 
        if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
                vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
 
        cpu = get_cpu();
        vmx->loaded_vmcs = vmcs02;
@@ -10233,18 +10201,17 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool 
launch)
         * Note no nested_vmx_succeed or nested_vmx_fail here. At this point
         * we are no longer running L1, and VMLAUNCH/VMRESUME has not yet
         * returned as far as L1 is concerned. It will only return (and set
         * the success flag) when L2 exits (see nested_vmx_vmexit()).
         */
        return 1;
 
 out:
-       skip_emulated_instruction(vcpu);
-       return 1;
+       return kvm_skip_emulated_instruction(vcpu);
 }
 
 /*
  * On a nested exit from L2 to L1, vmcs12.guest_cr0 might not be up-to-date
  * because L2 may have changed some cr0 bits directly (CRO_GUEST_HOST_MASK).
  * This function returns the new value we should put in vmcs12.guest_cr0.
  * It's not enough to just return the vmcs02 GUEST_CR0. Rather,
  *  1. Bits that neither L0 nor L1 trapped, were set directly by L2 and are now
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index ec59301..7b38c5e 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -420,22 +420,24 @@ void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned 
nr)
 EXPORT_SYMBOL_GPL(kvm_queue_exception);
 
 void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr)
 {
        kvm_multiple_exception(vcpu, nr, false, 0, true);
 }
 EXPORT_SYMBOL_GPL(kvm_requeue_exception);
 
-void kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err)
+int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err)
 {
        if (err)
                kvm_inject_gp(vcpu, 0);
        else
-               kvm_x86_ops->skip_emulated_instruction(vcpu);
+               return kvm_skip_emulated_instruction(vcpu);
+
+       return 1;
 }
 EXPORT_SYMBOL_GPL(kvm_complete_insn_gp);
 
 void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
 {
        ++vcpu->stat.pf_guest;
        vcpu->arch.cr2 = fault->address;
        kvm_queue_exception_e(vcpu, PF_VECTOR, fault->error_code);
@@ -4808,18 +4810,18 @@ static int kvm_emulate_wbinvd_noskip(struct kvm_vcpu 
*vcpu)
                cpumask_clear(vcpu->arch.wbinvd_dirty_mask);
        } else
                wbinvd();
        return X86EMUL_CONTINUE;
 }
 
 int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu)
 {
-       kvm_x86_ops->skip_emulated_instruction(vcpu);
-       return kvm_emulate_wbinvd_noskip(vcpu);
+       kvm_emulate_wbinvd_noskip(vcpu);
+       return kvm_skip_emulated_instruction(vcpu);
 }
 EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd);
 
 
 
 static void emulator_wbinvd(struct x86_emulate_ctxt *ctxt)
 {
        kvm_emulate_wbinvd_noskip(emul_to_vcpu(ctxt));
@@ -5425,16 +5427,27 @@ static void kvm_vcpu_check_singlestep(struct kvm_vcpu 
*vcpu, unsigned long rflag
                         */
                        vcpu->arch.dr6 &= ~15;
                        vcpu->arch.dr6 |= DR6_BS | DR6_RTM;
                        kvm_queue_exception(vcpu, DB_VECTOR);
                }
        }
 }
 
+int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu)
+{
+       unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
+       int r = EMULATE_DONE;
+
+       kvm_x86_ops->skip_emulated_instruction(vcpu);
+       kvm_vcpu_check_singlestep(vcpu, rflags, &r);
+       return r == EMULATE_DONE;
+}
+EXPORT_SYMBOL_GPL(kvm_skip_emulated_instruction);
+
 static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r)
 {
        if (unlikely(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) &&
            (vcpu->arch.guest_debug_dr7 & DR7_BP_EN_MASK)) {
                struct kvm_run *kvm_run = vcpu->run;
                unsigned long eip = kvm_get_linear_rip(vcpu);
                u32 dr6 = kvm_vcpu_check_hw_bp(eip, 0,
                                           vcpu->arch.guest_debug_dr7,
@@ -6002,18 +6015,22 @@ int kvm_vcpu_halt(struct kvm_vcpu *vcpu)
                vcpu->run->exit_reason = KVM_EXIT_HLT;
                return 0;
        }
 }
 EXPORT_SYMBOL_GPL(kvm_vcpu_halt);
 
 int kvm_emulate_halt(struct kvm_vcpu *vcpu)
 {
-       kvm_x86_ops->skip_emulated_instruction(vcpu);
-       return kvm_vcpu_halt(vcpu);
+       int ret = kvm_skip_emulated_instruction(vcpu);
+       /*
+        * TODO: we might be squashing a GUESTDBG_SINGLESTEP-triggered
+        * KVM_EXIT_DEBUG here.
+        */
+       return kvm_vcpu_halt(vcpu) && ret;
 }
 EXPORT_SYMBOL_GPL(kvm_emulate_halt);
 
 /*
  * kvm_pv_kick_cpu_op:  Kick a vcpu.
  *
  * @apicid - apicid of vcpu to be kicked.
  */
@@ -6034,19 +6051,19 @@ void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu)
 {
        vcpu->arch.apicv_active = false;
        kvm_x86_ops->refresh_apicv_exec_ctrl(vcpu);
 }
 
 int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
 {
        unsigned long nr, a0, a1, a2, a3, ret;
-       int op_64_bit, r = 1;
+       int op_64_bit, r;
 
-       kvm_x86_ops->skip_emulated_instruction(vcpu);
+       r = kvm_skip_emulated_instruction(vcpu);
 
        if (kvm_hv_hypercall_enabled(vcpu->kvm))
                return kvm_hv_hypercall(vcpu);
 
        nr = kvm_register_read(vcpu, VCPU_REGS_RAX);
        a0 = kvm_register_read(vcpu, VCPU_REGS_RBX);
        a1 = kvm_register_read(vcpu, VCPU_REGS_RCX);
        a2 = kvm_register_read(vcpu, VCPU_REGS_RDX);
-- 
2.10.2

[PATCH v2 4/5] KVM: x86: Add kvm_skip_emulated_instruction and use it.

Reply via email to