On 16.11.2017 19:07, Paolo Bonzini wrote:
> [ Upstream commit c8401dda2f0a00cd25c0af6a95ed50e478d25de4 ]
> 
> TF is handled a bit differently for syscall and sysret, compared
> to the other instructions: TF is checked after the instruction completes,
> so that the OS can disable #DB at a syscall by adding TF to FMASK.
> When the sysret is executed the #DB is taken "as if" the syscall insn
> just completed.
> 
> KVM emulates syscall so that it can trap 32-bit syscall on Intel processors.
> Fix the behavior, otherwise you could get #DB on a user stack which is not
> nice.  This does not affect Linux guests, as they use an IST or task gate
> for #DB.
> 
> This fixes CVE-2017-7518.
> 
> Cc: sta...@vger.kernel.org
> Reported-by: Andy Lutomirski <l...@kernel.org>
> Signed-off-by: Paolo Bonzini <pbonz...@redhat.com>
> Signed-off-by: Radim Krčmář <rkrc...@redhat.com>
> 
> Conflicts:
>       arch/x86/kvm/x86.c
> ---
>  arch/x86/include/asm/kvm_emulate.h |  1 +
>  arch/x86/kvm/emulate.c             |  1 +
>  arch/x86/kvm/x86.c                 | 52 
> ++++++++++++++++----------------------
>  3 files changed, 24 insertions(+), 30 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_emulate.h 
> b/arch/x86/include/asm/kvm_emulate.h
> index 19d14ac23ef9..fc3c7e49c8e4 100644
> --- a/arch/x86/include/asm/kvm_emulate.h
> +++ b/arch/x86/include/asm/kvm_emulate.h
> @@ -296,6 +296,7 @@ struct x86_emulate_ctxt {
>  
>       bool perm_ok; /* do not check permissions if true */
>       bool ud;        /* inject an #UD if host doesn't support insn */
> +     bool tf;        /* TF value before instruction (after for 
> syscall/sysret) */
>  
>       bool have_exception;
>       struct x86_exception exception;
> diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
> index 04b2f3cad7ba..684edebb4a0c 100644
> --- a/arch/x86/kvm/emulate.c
> +++ b/arch/x86/kvm/emulate.c
> @@ -2726,6 +2726,7 @@ static int em_syscall(struct x86_emulate_ctxt *ctxt)
>               ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF);
>       }
>  
> +     ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0;
>       return X86EMUL_CONTINUE;
>  }
>  
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 8e526c6fd784..3ffd5900da5b 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -5095,6 +5095,8 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
>       kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
>  
>       ctxt->eflags = kvm_get_rflags(vcpu);
> +     ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0;
> +
>       ctxt->eip = kvm_rip_read(vcpu);
>       ctxt->mode = (!is_protmode(vcpu))               ? X86EMUL_MODE_REAL :
>                    (ctxt->eflags & X86_EFLAGS_VM)     ? X86EMUL_MODE_VM86 :
> @@ -5315,37 +5317,26 @@ static int kvm_vcpu_check_hw_bp(unsigned long addr, 
> u32 type, u32 dr7,
>       return dr6;
>  }
>  
> -static void kvm_vcpu_check_singlestep(struct kvm_vcpu *vcpu, unsigned long 
> rflags, int *r)
> +static void kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu, int *r)
>  {
>       struct kvm_run *kvm_run = vcpu->run;
>  
> -     /*
> -      * rflags is the old, "raw" value of the flags.  The new value has
> -      * not been saved yet.
> -      *
> -      * This is correct even for TF set by the guest, because "the
> -      * processor will not generate this exception after the instruction
> -      * that sets the TF flag".
> -      */
> -     if (unlikely(rflags & X86_EFLAGS_TF)) {
> -             if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
> -                     kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1 |
> -                                               DR6_RTM;
> -                     kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip;
> -                     kvm_run->debug.arch.exception = DB_VECTOR;
> -                     kvm_run->exit_reason = KVM_EXIT_DEBUG;
> -                     *r = EMULATE_USER_EXIT;
> -             } else {
> -                     vcpu->arch.emulate_ctxt.eflags &= ~X86_EFLAGS_TF;
> -                     /*
> -                      * "Certain debug exceptions may clear bit 0-3.  The
> -                      * remaining contents of the DR6 register are never
> -                      * cleared by the processor".
> -                      */
> -                     vcpu->arch.dr6 &= ~15;
> -                     vcpu->arch.dr6 |= DR6_BS | DR6_RTM;
> -                     kvm_queue_exception(vcpu, DB_VECTOR);
> -             }
> +     if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
> +             kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1 | DR6_RTM;
> +             kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip;
> +             kvm_run->debug.arch.exception = DB_VECTOR;
> +             kvm_run->exit_reason = KVM_EXIT_DEBUG;
> +             *r = EMULATE_USER_EXIT;
> +     } else {
> +             vcpu->arch.emulate_ctxt.eflags &= ~X86_EFLAGS_TF;
> +             /*
> +              * "Certain debug exceptions may clear bit 0-3.  The
> +              * remaining contents of the DR6 register are never
> +              * cleared by the processor".
> +              */
> +             vcpu->arch.dr6 &= ~15;
> +             vcpu->arch.dr6 |= DR6_BS | DR6_RTM;
> +             kvm_queue_exception(vcpu, DB_VECTOR);
>       }
>  }

This looks good to me.

General question: how do we treat KVM single stepping and concurrent TF
in the guest? IOW, shouldn't the "else" rather be a check for ctxt->tf ?
(or is that handled later on e.g. in user space?)

But question unrelated to this patch, so

Reviewed-by: David Hildenbrand <da...@redhat.com>

>  
> @@ -5500,8 +5491,9 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
>               toggle_interruptibility(vcpu, ctxt->interruptibility);
>               vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
>               kvm_rip_write(vcpu, ctxt->eip);
> -             if (r == EMULATE_DONE)
> -                     kvm_vcpu_check_singlestep(vcpu, rflags, &r);
> +             if (r == EMULATE_DONE &&
> +                 (ctxt->tf || (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)))
> +                     kvm_vcpu_do_singlestep(vcpu, &r);
>               if (!ctxt->have_exception ||
>                   exception_type(ctxt->exception.vector) == EXCPT_TRAP)
>                       __kvm_set_rflags(vcpu, ctxt->eflags);
> 


-- 

Thanks,

David / dhildenb

Reply via email to