A reinjected exception is already recorded in either the IDT-vectored
event information fields or the EXITINTINFO fields; if the handling of
an exception in L0 causes a vmexit, we don't really need to keep the
reinjected exception in vcpu->arch.exception.

Teach kvm_multiple_exception to recognize this scenario through a
new kvm_x86_ops callback.

Signed-off-by: Paolo Bonzini <pbonz...@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  2 +
 arch/x86/kvm/svm.c              | 79 +++++++++++++++++++-------------------
 arch/x86/kvm/vmx.c              | 84 +++++++++++++++++++++--------------------
 arch/x86/kvm/x86.c              |  9 +++++
 4 files changed, 95 insertions(+), 79 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 6db0ed9cf59e..643308143bea 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -962,6 +962,8 @@ struct kvm_x86_ops {
                                unsigned char *hypercall_addr);
        void (*set_irq)(struct kvm_vcpu *vcpu);
        void (*set_nmi)(struct kvm_vcpu *vcpu);
+       int (*nested_check_exception)(struct kvm_vcpu *vcpu,
+                                     struct kvm_queued_exception *ex);
        void (*queue_exception)(struct kvm_vcpu *vcpu);
        void (*cancel_injection)(struct kvm_vcpu *vcpu);
        int (*interrupt_allowed)(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 7e190b21a30b..32c8d8f62985 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -291,8 +291,6 @@ struct amd_svm_iommu_ir {
 static int nested_svm_exit_handled(struct vcpu_svm *svm);
 static int nested_svm_intercept(struct vcpu_svm *svm);
 static int nested_svm_vmexit(struct vcpu_svm *svm);
-static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
-                                     bool has_error_code, u32 error_code);
 
 enum {
        VMCB_INTERCEPTS, /* Intercept vectors, TSC offset,
@@ -650,6 +648,42 @@ static void skip_emulated_instruction(struct kvm_vcpu 
*vcpu)
        svm_set_interrupt_shadow(vcpu, 0);
 }
 
+static void nested_svm_queue_exception(struct kvm_vcpu *vcpu,
+                                      struct kvm_queued_exception *ex)
+{
+       struct vcpu_svm *svm = to_svm(vcpu);
+
+       svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + ex->nr;
+       svm->vmcb->control.exit_code_hi = 0;
+       svm->vmcb->control.exit_info_1 = ex->error_code;
+
+       /*
+        * FIXME: we should not write CR2 when L1 intercepts an L2 #PF 
exception.
+        * The fix is to add the ancillary datum (CR2 or DR6) to structs
+        * kvm_queued_exception and kvm_vcpu_events, so that CR2 and DR6 can be
+        * written only when inject_pending_event runs (DR6 would written here
+        * too).  This should be conditional on a new capability---if the
+        * capability is disabled, kvm_multiple_exception would write the
+        * ancillary information to CR2 or DR6, for backwards ABI-compatibility.
+        */
+       if (ex->nested_apf)
+               svm->vmcb->control.exit_info_2 = 
svm->vcpu.arch.apf.nested_apf_token;
+       else
+               svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2;
+
+       svm->nested.exit_required = true;
+}
+
+static int nested_svm_check_exception(struct kvm_vcpu *vcpu,
+                                     struct kvm_queued_exception *ex)
+{
+       struct vcpu_svm *svm = to_svm(vcpu);
+       unsigned int nr = ex->nr;
+
+       return ((svm->nested.intercept_exceptions & (1 << nr)) ||
+               (nr == PF_VECTOR && ex->nested_apf));
+}
+
 static void svm_queue_exception(struct kvm_vcpu *vcpu)
 {
        struct vcpu_svm *svm = to_svm(vcpu);
@@ -662,9 +696,11 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu)
         * If we are within a nested VM we'd better #VMEXIT and let the guest
         * handle the exception
         */
-       if (!reinject &&
-           nested_svm_check_exception(svm, nr, has_error_code, error_code))
+       if (!reinject && is_guest_mode(vcpu) &&
+           nested_svm_check_exception(vcpu, &vcpu->arch.exception)) {
+               nested_svm_queue_exception(vcpu, &vcpu->arch.exception);
                return;
+       }
 
        if (nr == BP_VECTOR && !static_cpu_has(X86_FEATURE_NRIPS)) {
                unsigned long rip, old_rip = kvm_rip_read(&svm->vcpu);
@@ -2428,40 +2464,6 @@ static int nested_svm_check_permissions(struct vcpu_svm 
*svm)
        return 0;
 }
 
-static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
-                                     bool has_error_code, u32 error_code)
-{
-       int vmexit;
-
-       if (!is_guest_mode(&svm->vcpu))
-               return 0;
-
-       vmexit = nested_svm_intercept(svm);
-       if (vmexit != NESTED_EXIT_DONE)
-               return 0;
-
-       svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr;
-       svm->vmcb->control.exit_code_hi = 0;
-       svm->vmcb->control.exit_info_1 = error_code;
-
-       /*
-        * FIXME: we should not write CR2 when L1 intercepts an L2 #PF 
exception.
-        * The fix is to add the ancillary datum (CR2 or DR6) to structs
-        * kvm_queued_exception and kvm_vcpu_events, so that CR2 and DR6 can be
-        * written only when inject_pending_event runs (DR6 would written here
-        * too).  This should be conditional on a new capability---if the
-        * capability is disabled, kvm_multiple_exception would write the
-        * ancillary information to CR2 or DR6, for backwards ABI-compatibility.
-        */
-       if (svm->vcpu.arch.exception.nested_apf)
-               svm->vmcb->control.exit_info_2 = 
svm->vcpu.arch.apf.nested_apf_token;
-       else
-               svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2;
-
-       svm->nested.exit_required = true;
-       return vmexit;
-}
-
 /* This function returns true if it is save to enable the irq window */
 static inline bool nested_svm_intr(struct vcpu_svm *svm)
 {
@@ -5448,6 +5450,7 @@ static void svm_setup_mce(struct kvm_vcpu *vcpu)
        .patch_hypercall = svm_patch_hypercall,
        .set_irq = svm_set_irq,
        .set_nmi = svm_inject_nmi,
+       .nested_check_exception = nested_svm_check_exception,
        .queue_exception = svm_queue_exception,
        .cancel_injection = svm_cancel_injection,
        .interrupt_allowed = svm_interrupt_allowed,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index f8ef38094acc..ddabed8425b3 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2438,15 +2438,41 @@ static void skip_emulated_instruction(struct kvm_vcpu 
*vcpu)
        vmx_set_interrupt_shadow(vcpu, 0);
 }
 
-static void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu,
-                                              unsigned long exit_qual)
+static void nested_vmx_queue_exception(struct kvm_vcpu *vcpu,
+                                      struct kvm_queued_exception *ex)
 {
        struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
-       unsigned int nr = vcpu->arch.exception.nr;
-       u32 intr_info = nr | INTR_INFO_VALID_MASK;
+       unsigned int nr = ex->nr;
+       u32 intr_info = nr | INTR_INFO_VALID_MASK;
+       unsigned long exit_qual = 0;
 
-       if (vcpu->arch.exception.has_error_code) {
-               vmcs12->vm_exit_intr_error_code = 
vcpu->arch.exception.error_code;
+       /*
+        * FIXME: we must not write CR2 or DR6 when L1 intercepts an L2 #PF
+        * or #DB exception.
+        *
+        * The fix is to add the ancillary datum (CR2 or DR6) to structs
+        * kvm_queued_exception and kvm_vcpu_events, so that CR2 and DR6
+        * can be written only when inject_pending_event runs.  This should be
+        * conditional on a new capability---if the capability is disabled,
+        * kvm_multiple_exception would write the ancillary information to
+        * CR2 or DR6, for backwards ABI-compatibility.
+        */
+       switch (nr) {
+       case PF_VECTOR:
+               if (ex->nested_apf)
+                       exit_qual = vcpu->arch.apf.nested_apf_token;
+               else
+                       exit_qual = vcpu->arch.cr2;
+               break;
+       case DB_VECTOR:
+               exit_qual = vcpu->arch.dr6;
+               break;
+       default:
+               exit_qual = 0;
+       }
+
+       if (ex->has_error_code) {
+               vmcs12->vm_exit_intr_error_code = ex->error_code;
                intr_info |= INTR_INFO_DELIVER_CODE_MASK;
        }
 
@@ -2466,43 +2492,16 @@ static void nested_vmx_inject_exception_vmexit(struct 
kvm_vcpu *vcpu,
  * KVM wants to inject page-faults which it got to the guest. This function
  * checks whether in a nested guest, we need to inject them to L1 or L2.
  */
-static int nested_vmx_check_exception(struct kvm_vcpu *vcpu)
+static int nested_vmx_check_exception(struct kvm_vcpu *vcpu,
+                                     struct kvm_queued_exception *ex)
 {
        struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
-       unsigned int nr = vcpu->arch.exception.nr;
+       unsigned int nr = ex->nr;
 
-       if (nr == PF_VECTOR) {
-               if (vcpu->arch.exception.nested_apf) {
-                       nested_vmx_inject_exception_vmexit(vcpu,
-                                                          
vcpu->arch.apf.nested_apf_token);
-                       return 1;
-               }
-               /*
-                * FIXME: we must not write CR2 when L1 intercepts an L2 #PF 
exception.
-                * The fix is to add the ancillary datum (CR2 or DR6) to structs
-                * kvm_queued_exception and kvm_vcpu_events, so that CR2 and DR6
-                * can be written only when inject_pending_event runs.  This 
should be
-                * conditional on a new capability---if the capability is 
disabled,
-                * kvm_multiple_exception would write the ancillary information 
to
-                * CR2 or DR6, for backwards ABI-compatibility.
-                */
-               if (nested_vmx_is_page_fault_vmexit(vmcs12,
-                                                   
vcpu->arch.exception.error_code)) {
-                       nested_vmx_inject_exception_vmexit(vcpu, 
vcpu->arch.cr2);
-                       return 1;
-               }
-       } else {
-               unsigned long exit_qual = 0;
-               if (nr == DB_VECTOR)
-                       exit_qual = vcpu->arch.dr6;
-
-               if (vmcs12->exception_bitmap & (1u << nr)) {
-                       nested_vmx_inject_exception_vmexit(vcpu, exit_qual);
-                       return 1;
-               }
-       }
-
-       return 0;
+       return (nr == PF_VECTOR
+               ? (ex->nested_apf ||
+                  nested_vmx_is_page_fault_vmexit(vmcs12, ex->error_code))
+               : vmcs12->exception_bitmap & (1u << nr));
 }
 
 static void vmx_queue_exception(struct kvm_vcpu *vcpu)
@@ -2515,8 +2514,10 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu)
        u32 intr_info = nr | INTR_INFO_VALID_MASK;
 
        if (!reinject && is_guest_mode(vcpu) &&
-           nested_vmx_check_exception(vcpu))
+           nested_vmx_check_exception(vcpu, &vcpu->arch.exception)) {
+               nested_vmx_queue_exception(vcpu, &vcpu->arch.exception);
                return;
+       }
 
        if (has_error_code) {
                vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
@@ -11886,6 +11887,7 @@ static void vmx_setup_mce(struct kvm_vcpu *vcpu)
        .patch_hypercall = vmx_patch_hypercall,
        .set_irq = vmx_inject_irq,
        .set_nmi = vmx_inject_nmi,
+       .nested_check_exception = nested_vmx_check_exception,
        .queue_exception = vmx_queue_exception,
        .cancel_injection = vmx_cancel_injection,
        .interrupt_allowed = vmx_interrupt_allowed,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 88b91114c5a8..55c709531eb9 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -390,6 +390,15 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
        if (!vcpu->arch.exception.pending)
                goto queue;
 
+       /*
+        * If the previous exception was recorded during an L2->L0
+        * exit, we can overwrite it with one that causes an L0->L1
+        * nested vmexit.
+        */
+       if (vcpu->arch.exception.reinject && is_guest_mode(vcpu) &&
+           kvm_x86_ops->nested_check_exception(vcpu, &ex))
+               goto queue;
+
        /* to check exception */
        prev_nr = vcpu->arch.exception.nr;
        if (prev_nr == DF_VECTOR) {
-- 
1.8.3.1

Reply via email to