Author: neel
Date: Thu May 28 17:37:01 2015
New Revision: 283657
URL: https://svnweb.freebsd.org/changeset/base/283657

Log:
  Fix non-deterministic delays when accessing a vcpu that was in "running" or
  "sleeping" state. This is done by forcing the vcpu to transition to "idle"
  by returning to userspace with an exit code of VM_EXITCODE_REQIDLE.
  
  MFC after:      2 weeks

Modified:
  head/sys/amd64/include/vmm.h
  head/sys/amd64/vmm/amd/svm.c
  head/sys/amd64/vmm/intel/vmx.c
  head/sys/amd64/vmm/vmm.c
  head/sys/amd64/vmm/vmm_stat.c
  head/sys/amd64/vmm/vmm_stat.h
  head/usr.sbin/bhyve/bhyverun.c

Modified: head/sys/amd64/include/vmm.h
==============================================================================
--- head/sys/amd64/include/vmm.h        Thu May 28 17:06:50 2015        
(r283656)
+++ head/sys/amd64/include/vmm.h        Thu May 28 17:37:01 2015        
(r283657)
@@ -120,13 +120,18 @@ struct vm_object;
 struct vm_guest_paging;
 struct pmap;
 
+struct vm_eventinfo {
+       void    *rptr;          /* rendezvous cookie */
+       int     *sptr;          /* suspend cookie */
+       int     *iptr;          /* reqidle cookie */
+};
+
 typedef int    (*vmm_init_func_t)(int ipinum);
 typedef int    (*vmm_cleanup_func_t)(void);
 typedef void   (*vmm_resume_func_t)(void);
 typedef void * (*vmi_init_func_t)(struct vm *vm, struct pmap *pmap);
 typedef int    (*vmi_run_func_t)(void *vmi, int vcpu, register_t rip,
-                                 struct pmap *pmap, void *rendezvous_cookie,
-                                 void *suspend_cookie);
+                   struct pmap *pmap, struct vm_eventinfo *info);
 typedef void   (*vmi_cleanup_func_t)(void *vmi);
 typedef int    (*vmi_get_register_t)(void *vmi, int vcpu, int num,
                                      uint64_t *retval);
@@ -208,6 +213,7 @@ struct vm_exit *vm_exitinfo(struct vm *v
 void vm_exit_suspended(struct vm *vm, int vcpuid, uint64_t rip);
 void vm_exit_rendezvous(struct vm *vm, int vcpuid, uint64_t rip);
 void vm_exit_astpending(struct vm *vm, int vcpuid, uint64_t rip);
+void vm_exit_reqidle(struct vm *vm, int vcpuid, uint64_t rip);
 
 #ifdef _SYS__CPUSET_H_
 /*
@@ -232,17 +238,24 @@ cpuset_t vm_suspended_cpus(struct vm *vm
 #endif /* _SYS__CPUSET_H_ */
 
 static __inline int
-vcpu_rendezvous_pending(void *rendezvous_cookie)
+vcpu_rendezvous_pending(struct vm_eventinfo *info)
+{
+
+       return (*((uintptr_t *)(info->rptr)) != 0);
+}
+
+static __inline int
+vcpu_suspended(struct vm_eventinfo *info)
 {
 
-       return (*(uintptr_t *)rendezvous_cookie != 0);
+       return (*info->sptr);
 }
 
 static __inline int
-vcpu_suspended(void *suspend_cookie)
+vcpu_reqidle(struct vm_eventinfo *info)
 {
 
-       return (*(int *)suspend_cookie);
+       return (*info->iptr);
 }
 
 /*
@@ -506,6 +519,7 @@ enum vm_exitcode {
        VM_EXITCODE_MONITOR,
        VM_EXITCODE_MWAIT,
        VM_EXITCODE_SVM,
+       VM_EXITCODE_REQIDLE,
        VM_EXITCODE_MAX
 };
 

Modified: head/sys/amd64/vmm/amd/svm.c
==============================================================================
--- head/sys/amd64/vmm/amd/svm.c        Thu May 28 17:06:50 2015        
(r283656)
+++ head/sys/amd64/vmm/amd/svm.c        Thu May 28 17:37:01 2015        
(r283657)
@@ -1900,7 +1900,7 @@ enable_gintr(void)
  */
 static int
 svm_vmrun(void *arg, int vcpu, register_t rip, pmap_t pmap, 
-       void *rend_cookie, void *suspended_cookie)
+       struct vm_eventinfo *evinfo)
 {
        struct svm_regctx *gctx;
        struct svm_softc *svm_sc;
@@ -1975,18 +1975,24 @@ svm_vmrun(void *arg, int vcpu, register_
                 */
                disable_gintr();
 
-               if (vcpu_suspended(suspended_cookie)) {
+               if (vcpu_suspended(evinfo)) {
                        enable_gintr();
                        vm_exit_suspended(vm, vcpu, state->rip);
                        break;
                }
 
-               if (vcpu_rendezvous_pending(rend_cookie)) {
+               if (vcpu_rendezvous_pending(evinfo)) {
                        enable_gintr();
                        vm_exit_rendezvous(vm, vcpu, state->rip);
                        break;
                }
 
+               if (vcpu_reqidle(evinfo)) {
+                       enable_gintr();
+                       vm_exit_reqidle(vm, vcpu, state->rip);
+                       break;
+               }
+
                /* We are asked to give the cpu by scheduler. */
                if (vcpu_should_yield(vm, vcpu)) {
                        enable_gintr();

Modified: head/sys/amd64/vmm/intel/vmx.c
==============================================================================
--- head/sys/amd64/vmm/intel/vmx.c      Thu May 28 17:06:50 2015        
(r283656)
+++ head/sys/amd64/vmm/intel/vmx.c      Thu May 28 17:37:01 2015        
(r283657)
@@ -2554,7 +2554,7 @@ vmx_exit_handle_nmi(struct vmx *vmx, int
 
 static int
 vmx_run(void *arg, int vcpu, register_t rip, pmap_t pmap,
-    void *rendezvous_cookie, void *suspend_cookie)
+    struct vm_eventinfo *evinfo)
 {
        int rc, handled, launched;
        struct vmx *vmx;
@@ -2623,18 +2623,24 @@ vmx_run(void *arg, int vcpu, register_t 
                 * vmx_inject_interrupts() can suspend the vcpu due to a
                 * triple fault.
                 */
-               if (vcpu_suspended(suspend_cookie)) {
+               if (vcpu_suspended(evinfo)) {
                        enable_intr();
                        vm_exit_suspended(vmx->vm, vcpu, rip);
                        break;
                }
 
-               if (vcpu_rendezvous_pending(rendezvous_cookie)) {
+               if (vcpu_rendezvous_pending(evinfo)) {
                        enable_intr();
                        vm_exit_rendezvous(vmx->vm, vcpu, rip);
                        break;
                }
 
+               if (vcpu_reqidle(evinfo)) {
+                       enable_intr();
+                       vm_exit_reqidle(vmx->vm, vcpu, rip);
+                       break;
+               }
+
                if (vcpu_should_yield(vm, vcpu)) {
                        enable_intr();
                        vm_exit_astpending(vmx->vm, vcpu, rip);

Modified: head/sys/amd64/vmm/vmm.c
==============================================================================
--- head/sys/amd64/vmm/vmm.c    Thu May 28 17:06:50 2015        (r283656)
+++ head/sys/amd64/vmm/vmm.c    Thu May 28 17:37:01 2015        (r283657)
@@ -95,6 +95,7 @@ struct vcpu {
        struct mtx      mtx;            /* (o) protects 'state' and 'hostcpu' */
        enum vcpu_state state;          /* (o) vcpu state */
        int             hostcpu;        /* (o) vcpu's host cpu */
+       int             reqidle;        /* (i) request vcpu to idle */
        struct vlapic   *vlapic;        /* (i) APIC device model */
        enum x2apic_state x2apic_state; /* (i) APIC mode */
        uint64_t        exitintinfo;    /* (i) events pending at VM exit */
@@ -164,8 +165,8 @@ static struct vmm_ops *ops;
 #define        VMM_RESUME()    (ops != NULL ? (*ops->resume)() : 0)
 
 #define        VMINIT(vm, pmap) (ops != NULL ? (*ops->vminit)(vm, pmap): NULL)
-#define        VMRUN(vmi, vcpu, rip, pmap, rptr, sptr) \
-       (ops != NULL ? (*ops->vmrun)(vmi, vcpu, rip, pmap, rptr, sptr) : ENXIO)
+#define        VMRUN(vmi, vcpu, rip, pmap, evinfo) \
+       (ops != NULL ? (*ops->vmrun)(vmi, vcpu, rip, pmap, evinfo) : ENXIO)
 #define        VMCLEANUP(vmi)  (ops != NULL ? (*ops->vmcleanup)(vmi) : NULL)
 #define        VMSPACE_ALLOC(min, max) \
        (ops != NULL ? (*ops->vmspace_alloc)(min, max) : NULL)
@@ -221,6 +222,28 @@ TUNABLE_INT("hw.vmm.force_iommu", &vmm_f
 SYSCTL_INT(_hw_vmm, OID_AUTO, force_iommu, CTLFLAG_RDTUN, &vmm_force_iommu, 0,
     "Force use of I/O MMU even if no passthrough devices were found.");
 
+static void vcpu_notify_event_locked(struct vcpu *vcpu, bool lapic_intr);
+
+#ifdef KTR
+static const char *
+vcpu_state2str(enum vcpu_state state)
+{
+
+       switch (state) {
+       case VCPU_IDLE:
+               return ("idle");
+       case VCPU_FROZEN:
+               return ("frozen");
+       case VCPU_RUNNING:
+               return ("running");
+       case VCPU_SLEEPING:
+               return ("sleeping");
+       default:
+               return ("unknown");
+       }
+}
+#endif
+
 static void
 vcpu_cleanup(struct vm *vm, int i, bool destroy)
 {
@@ -255,6 +278,7 @@ vcpu_init(struct vm *vm, int vcpu_id, bo
 
        vcpu->vlapic = VLAPIC_INIT(vm->cookie, vcpu_id);
        vm_set_x2apic_state(vm, vcpu_id, X2APIC_DISABLED);
+       vcpu->reqidle = 0;
        vcpu->exitintinfo = 0;
        vcpu->nmi_pending = 0;
        vcpu->extint_pending = 0;
@@ -980,11 +1004,13 @@ save_guest_fpustate(struct vcpu *vcpu)
 static VMM_STAT(VCPU_IDLE_TICKS, "number of ticks vcpu was idle");
 
 static int
-vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate,
+vcpu_set_state_locked(struct vm *vm, int vcpuid, enum vcpu_state newstate,
     bool from_idle)
 {
+       struct vcpu *vcpu;
        int error;
 
+       vcpu = &vm->vcpu[vcpuid];
        vcpu_assert_locked(vcpu);
 
        /*
@@ -993,8 +1019,13 @@ vcpu_set_state_locked(struct vcpu *vcpu,
         * ioctl() operating on a vcpu at any point.
         */
        if (from_idle) {
-               while (vcpu->state != VCPU_IDLE)
+               while (vcpu->state != VCPU_IDLE) {
+                       vcpu->reqidle = 1;
+                       vcpu_notify_event_locked(vcpu, false);
+                       VCPU_CTR1(vm, vcpuid, "vcpu state change from %s to "
+                           "idle requested", vcpu_state2str(vcpu->state));
                        msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz);
+               }
        } else {
                KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from "
                    "vcpu idle state"));
@@ -1031,6 +1062,9 @@ vcpu_set_state_locked(struct vcpu *vcpu,
        if (error)
                return (EBUSY);
 
+       VCPU_CTR2(vm, vcpuid, "vcpu state changed from %s to %s",
+           vcpu_state2str(vcpu->state), vcpu_state2str(newstate));
+
        vcpu->state = newstate;
        if (newstate == VCPU_RUNNING)
                vcpu->hostcpu = curcpu;
@@ -1053,11 +1087,11 @@ vcpu_require_state(struct vm *vm, int vc
 }
 
 static void
-vcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate)
+vcpu_require_state_locked(struct vm *vm, int vcpuid, enum vcpu_state newstate)
 {
        int error;
 
-       if ((error = vcpu_set_state_locked(vcpu, newstate, false)) != 0)
+       if ((error = vcpu_set_state_locked(vm, vcpuid, newstate, false)) != 0)
                panic("Error %d setting state to %d", error, newstate);
 }
 
@@ -1145,7 +1179,7 @@ vm_handle_hlt(struct vm *vm, int vcpuid,
                 * vcpu returned from VMRUN() and before it acquired the
                 * vcpu lock above.
                 */
-               if (vm->rendezvous_func != NULL || vm->suspend)
+               if (vm->rendezvous_func != NULL || vm->suspend || vcpu->reqidle)
                        break;
                if (vm_nmi_pending(vm, vcpuid))
                        break;
@@ -1182,13 +1216,13 @@ vm_handle_hlt(struct vm *vm, int vcpuid,
                }
 
                t = ticks;
-               vcpu_require_state_locked(vcpu, VCPU_SLEEPING);
+               vcpu_require_state_locked(vm, vcpuid, VCPU_SLEEPING);
                /*
                 * XXX msleep_spin() cannot be interrupted by signals so
                 * wake up periodically to check pending signals.
                 */
                msleep_spin(vcpu, &vcpu->mtx, wmesg, hz);
-               vcpu_require_state_locked(vcpu, VCPU_FROZEN);
+               vcpu_require_state_locked(vm, vcpuid, VCPU_FROZEN);
                vmm_stat_incr(vm, vcpuid, VCPU_IDLE_TICKS, ticks - t);
        }
 
@@ -1350,9 +1384,9 @@ vm_handle_suspend(struct vm *vm, int vcp
 
                if (vm->rendezvous_func == NULL) {
                        VCPU_CTR0(vm, vcpuid, "Sleeping during suspend");
-                       vcpu_require_state_locked(vcpu, VCPU_SLEEPING);
+                       vcpu_require_state_locked(vm, vcpuid, VCPU_SLEEPING);
                        msleep_spin(vcpu, &vcpu->mtx, "vmsusp", hz);
-                       vcpu_require_state_locked(vcpu, VCPU_FROZEN);
+                       vcpu_require_state_locked(vm, vcpuid, VCPU_FROZEN);
                } else {
                        VCPU_CTR0(vm, vcpuid, "Rendezvous during suspend");
                        vcpu_unlock(vcpu);
@@ -1375,6 +1409,19 @@ vm_handle_suspend(struct vm *vm, int vcp
        return (0);
 }
 
+static int
+vm_handle_reqidle(struct vm *vm, int vcpuid, bool *retu)
+{
+       struct vcpu *vcpu = &vm->vcpu[vcpuid];
+
+       vcpu_lock(vcpu);
+       KASSERT(vcpu->reqidle, ("invalid vcpu reqidle %d", vcpu->reqidle));
+       vcpu->reqidle = 0;
+       vcpu_unlock(vcpu);
+       *retu = true;
+       return (0);
+}
+
 int
 vm_suspend(struct vm *vm, enum vm_suspend_how how)
 {
@@ -1432,6 +1479,18 @@ vm_exit_rendezvous(struct vm *vm, int vc
 }
 
 void
+vm_exit_reqidle(struct vm *vm, int vcpuid, uint64_t rip)
+{
+       struct vm_exit *vmexit;
+
+       vmexit = vm_exitinfo(vm, vcpuid);
+       vmexit->rip = rip;
+       vmexit->inst_length = 0;
+       vmexit->exitcode = VM_EXITCODE_REQIDLE;
+       vmm_stat_incr(vm, vcpuid, VMEXIT_REQIDLE, 1);
+}
+
+void
 vm_exit_astpending(struct vm *vm, int vcpuid, uint64_t rip)
 {
        struct vm_exit *vmexit;
@@ -1446,6 +1505,7 @@ vm_exit_astpending(struct vm *vm, int vc
 int
 vm_run(struct vm *vm, struct vm_run *vmrun)
 {
+       struct vm_eventinfo evinfo;
        int error, vcpuid;
        struct vcpu *vcpu;
        struct pcb *pcb;
@@ -1453,7 +1513,6 @@ vm_run(struct vm *vm, struct vm_run *vmr
        struct vm_exit *vme;
        bool retu, intr_disabled;
        pmap_t pmap;
-       void *rptr, *sptr;
 
        vcpuid = vmrun->cpuid;
 
@@ -1466,11 +1525,12 @@ vm_run(struct vm *vm, struct vm_run *vmr
        if (CPU_ISSET(vcpuid, &vm->suspended_cpus))
                return (EINVAL);
 
-       rptr = &vm->rendezvous_func;
-       sptr = &vm->suspend;
        pmap = vmspace_pmap(vm->vmspace);
        vcpu = &vm->vcpu[vcpuid];
        vme = &vcpu->exitinfo;
+       evinfo.rptr = &vm->rendezvous_func;
+       evinfo.sptr = &vm->suspend;
+       evinfo.iptr = &vcpu->reqidle;
 restart:
        critical_enter();
 
@@ -1485,7 +1545,7 @@ restart:
        restore_guest_fpustate(vcpu);
 
        vcpu_require_state(vm, vcpuid, VCPU_RUNNING);
-       error = VMRUN(vm->cookie, vcpuid, vcpu->nextrip, pmap, rptr, sptr);
+       error = VMRUN(vm->cookie, vcpuid, vcpu->nextrip, pmap, &evinfo);
        vcpu_require_state(vm, vcpuid, VCPU_FROZEN);
 
        save_guest_fpustate(vcpu);
@@ -1498,6 +1558,9 @@ restart:
                retu = false;
                vcpu->nextrip = vme->rip + vme->inst_length;
                switch (vme->exitcode) {
+               case VM_EXITCODE_REQIDLE:
+                       error = vm_handle_reqidle(vm, vcpuid, &retu);
+                       break;
                case VM_EXITCODE_SUSPENDED:
                        error = vm_handle_suspend(vm, vcpuid, &retu);
                        break;
@@ -1536,6 +1599,8 @@ restart:
        if (error == 0 && retu == false)
                goto restart;
 
+       VCPU_CTR2(vm, vcpuid, "retu %d/%d", error, vme->exitcode);
+
        /* copy the exit information */
        bcopy(vme, &vmrun->vm_exit, sizeof(struct vm_exit));
        return (error);
@@ -2072,7 +2137,7 @@ vcpu_set_state(struct vm *vm, int vcpuid
        vcpu = &vm->vcpu[vcpuid];
 
        vcpu_lock(vcpu);
-       error = vcpu_set_state_locked(vcpu, newstate, from_idle);
+       error = vcpu_set_state_locked(vm, vcpuid, newstate, from_idle);
        vcpu_unlock(vcpu);
 
        return (error);
@@ -2168,15 +2233,11 @@ vm_set_x2apic_state(struct vm *vm, int v
  * - If the vcpu is running on a different host_cpu then an IPI will be 
directed
  *   to the host_cpu to cause the vcpu to trap into the hypervisor.
  */
-void
-vcpu_notify_event(struct vm *vm, int vcpuid, bool lapic_intr)
+static void
+vcpu_notify_event_locked(struct vcpu *vcpu, bool lapic_intr)
 {
        int hostcpu;
-       struct vcpu *vcpu;
-
-       vcpu = &vm->vcpu[vcpuid];
 
-       vcpu_lock(vcpu);
        hostcpu = vcpu->hostcpu;
        if (vcpu->state == VCPU_RUNNING) {
                KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu"));
@@ -2201,6 +2262,15 @@ vcpu_notify_event(struct vm *vm, int vcp
                if (vcpu->state == VCPU_SLEEPING)
                        wakeup_one(vcpu);
        }
+}
+
+void
+vcpu_notify_event(struct vm *vm, int vcpuid, bool lapic_intr)
+{
+       struct vcpu *vcpu = &vm->vcpu[vcpuid];
+
+       vcpu_lock(vcpu);
+       vcpu_notify_event_locked(vcpu, lapic_intr);
        vcpu_unlock(vcpu);
 }
 

Modified: head/sys/amd64/vmm/vmm_stat.c
==============================================================================
--- head/sys/amd64/vmm/vmm_stat.c       Thu May 28 17:06:50 2015        
(r283656)
+++ head/sys/amd64/vmm/vmm_stat.c       Thu May 28 17:37:01 2015        
(r283657)
@@ -164,6 +164,7 @@ VMM_STAT(VMEXIT_NESTED_FAULT, "vm exits 
 VMM_STAT(VMEXIT_INST_EMUL, "vm exits for instruction emulation");
 VMM_STAT(VMEXIT_UNKNOWN, "number of vm exits for unknown reason");
 VMM_STAT(VMEXIT_ASTPENDING, "number of times astpending at exit");
+VMM_STAT(VMEXIT_REQIDLE, "number of times idle requested at exit");
 VMM_STAT(VMEXIT_USERSPACE, "number of vm exits handled in userspace");
 VMM_STAT(VMEXIT_RENDEZVOUS, "number of times rendezvous pending at exit");
 VMM_STAT(VMEXIT_EXCEPTION, "number of vm exits due to exceptions");

Modified: head/sys/amd64/vmm/vmm_stat.h
==============================================================================
--- head/sys/amd64/vmm/vmm_stat.h       Thu May 28 17:06:50 2015        
(r283656)
+++ head/sys/amd64/vmm/vmm_stat.h       Thu May 28 17:37:01 2015        
(r283657)
@@ -157,4 +157,5 @@ VMM_STAT_DECLARE(VMEXIT_ASTPENDING);
 VMM_STAT_DECLARE(VMEXIT_USERSPACE);
 VMM_STAT_DECLARE(VMEXIT_RENDEZVOUS);
 VMM_STAT_DECLARE(VMEXIT_EXCEPTION);
+VMM_STAT_DECLARE(VMEXIT_REQIDLE);
 #endif

Modified: head/usr.sbin/bhyve/bhyverun.c
==============================================================================
--- head/usr.sbin/bhyve/bhyverun.c      Thu May 28 17:06:50 2015        
(r283656)
+++ head/usr.sbin/bhyve/bhyverun.c      Thu May 28 17:37:01 2015        
(r283657)
@@ -100,7 +100,7 @@ static struct vm_exit vmexit[VM_MAXCPU];
 
 struct bhyvestats {
         uint64_t        vmexit_bogus;
-        uint64_t        vmexit_bogus_switch;
+       uint64_t        vmexit_reqidle;
         uint64_t        vmexit_hlt;
         uint64_t        vmexit_pause;
         uint64_t        vmexit_mtrap;
@@ -461,6 +461,17 @@ vmexit_bogus(struct vmctx *ctx, struct v
 }
 
 static int
+vmexit_reqidle(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
+{
+
+       assert(vmexit->inst_length == 0);
+
+       stats.vmexit_reqidle++;
+
+       return (VMEXIT_CONTINUE);
+}
+
+static int
 vmexit_hlt(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
 {
 
@@ -571,6 +582,7 @@ static vmexit_handler_t handler[VM_EXITC
        [VM_EXITCODE_VMX]    = vmexit_vmx,
        [VM_EXITCODE_SVM]    = vmexit_svm,
        [VM_EXITCODE_BOGUS]  = vmexit_bogus,
+       [VM_EXITCODE_REQIDLE] = vmexit_reqidle,
        [VM_EXITCODE_RDMSR]  = vmexit_rdmsr,
        [VM_EXITCODE_WRMSR]  = vmexit_wrmsr,
        [VM_EXITCODE_MTRAP]  = vmexit_mtrap,
_______________________________________________
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to