[ Upstream commit c8401dda2f0a00cd25c0af6a95ed50e478d25de4 ]

TF is handled a bit differently for syscall and sysret, compared
to the other instructions: TF is checked after the instruction completes,
so that the OS can disable #DB at a syscall by adding TF to FMASK.
When the sysret is executed the #DB is taken "as if" the syscall insn
just completed.

KVM emulates syscall so that it can trap 32-bit syscall on Intel processors.
Fix the behavior, otherwise you could get #DB on a user stack which is not
nice.  This does not affect Linux guests, as they use an IST or task gate
for #DB.

This fixes CVE-2017-7518.

Cc: sta...@vger.kernel.org
Reported-by: Andy Lutomirski <l...@kernel.org>
Signed-off-by: Paolo Bonzini <pbonz...@redhat.com>
Signed-off-by: Radim Krčmář <rkrc...@redhat.com>

Conflicts:
        arch/x86/kvm/x86.c
---
 arch/x86/include/asm/kvm_emulate.h |  1 +
 arch/x86/kvm/emulate.c             |  1 +
 arch/x86/kvm/x86.c                 | 52 ++++++++++++++++----------------------
 3 files changed, 24 insertions(+), 30 deletions(-)

diff --git a/arch/x86/include/asm/kvm_emulate.h 
b/arch/x86/include/asm/kvm_emulate.h
index 19d14ac23ef9..fc3c7e49c8e4 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -296,6 +296,7 @@ struct x86_emulate_ctxt {
 
        bool perm_ok; /* do not check permissions if true */
        bool ud;        /* inject an #UD if host doesn't support insn */
+       bool tf;        /* TF value before instruction (after for 
syscall/sysret) */
 
        bool have_exception;
        struct x86_exception exception;
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 04b2f3cad7ba..684edebb4a0c 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2726,6 +2726,7 @@ static int em_syscall(struct x86_emulate_ctxt *ctxt)
                ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF);
        }
 
+       ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0;
        return X86EMUL_CONTINUE;
 }
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 8e526c6fd784..3ffd5900da5b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5095,6 +5095,8 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
        kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
 
        ctxt->eflags = kvm_get_rflags(vcpu);
+       ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0;
+
        ctxt->eip = kvm_rip_read(vcpu);
        ctxt->mode = (!is_protmode(vcpu))               ? X86EMUL_MODE_REAL :
                     (ctxt->eflags & X86_EFLAGS_VM)     ? X86EMUL_MODE_VM86 :
@@ -5315,37 +5317,26 @@ static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 
type, u32 dr7,
        return dr6;
 }
 
-static void kvm_vcpu_check_singlestep(struct kvm_vcpu *vcpu, unsigned long 
rflags, int *r)
+static void kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu, int *r)
 {
        struct kvm_run *kvm_run = vcpu->run;
 
-       /*
-        * rflags is the old, "raw" value of the flags.  The new value has
-        * not been saved yet.
-        *
-        * This is correct even for TF set by the guest, because "the
-        * processor will not generate this exception after the instruction
-        * that sets the TF flag".
-        */
-       if (unlikely(rflags & X86_EFLAGS_TF)) {
-               if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
-                       kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1 |
-                                                 DR6_RTM;
-                       kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip;
-                       kvm_run->debug.arch.exception = DB_VECTOR;
-                       kvm_run->exit_reason = KVM_EXIT_DEBUG;
-                       *r = EMULATE_USER_EXIT;
-               } else {
-                       vcpu->arch.emulate_ctxt.eflags &= ~X86_EFLAGS_TF;
-                       /*
-                        * "Certain debug exceptions may clear bit 0-3.  The
-                        * remaining contents of the DR6 register are never
-                        * cleared by the processor".
-                        */
-                       vcpu->arch.dr6 &= ~15;
-                       vcpu->arch.dr6 |= DR6_BS | DR6_RTM;
-                       kvm_queue_exception(vcpu, DB_VECTOR);
-               }
+       if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
+               kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1 | DR6_RTM;
+               kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip;
+               kvm_run->debug.arch.exception = DB_VECTOR;
+               kvm_run->exit_reason = KVM_EXIT_DEBUG;
+               *r = EMULATE_USER_EXIT;
+       } else {
+               vcpu->arch.emulate_ctxt.eflags &= ~X86_EFLAGS_TF;
+               /*
+                * "Certain debug exceptions may clear bit 0-3.  The
+                * remaining contents of the DR6 register are never
+                * cleared by the processor".
+                */
+               vcpu->arch.dr6 &= ~15;
+               vcpu->arch.dr6 |= DR6_BS | DR6_RTM;
+               kvm_queue_exception(vcpu, DB_VECTOR);
        }
 }
 
@@ -5500,8 +5491,9 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
                toggle_interruptibility(vcpu, ctxt->interruptibility);
                vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
                kvm_rip_write(vcpu, ctxt->eip);
-               if (r == EMULATE_DONE)
-                       kvm_vcpu_check_singlestep(vcpu, rflags, &r);
+               if (r == EMULATE_DONE &&
+                   (ctxt->tf || (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)))
+                       kvm_vcpu_do_singlestep(vcpu, &r);
                if (!ctxt->have_exception ||
                    exception_type(ctxt->exception.vector) == EXCPT_TRAP)
                        __kvm_set_rflags(vcpu, ctxt->eflags);
-- 
1.8.3.1

Reply via email to