The branch main has been updated by manu:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=a5a918b7a906eaa88e0833eac70a15989d535b02

commit a5a918b7a906eaa88e0833eac70a15989d535b02
Author:     Corvin Köhne <corv...@beckhoff.com>
AuthorDate: 2022-09-07 07:07:03 +0000
Commit:     Emmanuel Vadot <m...@freebsd.org>
CommitDate: 2022-09-07 07:07:03 +0000

    vmm: permit some IPIs to be handled by userspace
    
    Add VM_EXITCODE_IPI to permit returning unhandled IPIs to userland.
    INIT and Startup IPIs are now returned to userland. Due to backward
    compatibility reasons, a new capability is added for enabling
    VM_EXITCODE_IPI.
    
    MFC after:              2 weeks
    Differential Revision:  https://reviews.freebsd.org/D35623
    Sponsored by:           Beckhoff Automation GmbH & Co. KG
---
 sys/amd64/include/vmm.h        |   8 ++
 sys/amd64/vmm/amd/svm.c        |  10 +++
 sys/amd64/vmm/intel/vmx.c      |   8 ++
 sys/amd64/vmm/io/vlapic.c      | 166 ++++++++++++++++++++++++-----------------
 sys/amd64/vmm/io/vlapic_priv.h |   2 +
 usr.sbin/bhyve/bhyverun.c      |  34 +++++++++
 usr.sbin/bhyve/spinup_ap.c     |   3 +
 7 files changed, 162 insertions(+), 69 deletions(-)

diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h
index dcf862c34264..37a74f053fb3 100644
--- a/sys/amd64/include/vmm.h
+++ b/sys/amd64/include/vmm.h
@@ -31,6 +31,7 @@
 #ifndef _VMM_H_
 #define        _VMM_H_
 
+#include <sys/cpuset.h>
 #include <sys/sdt.h>
 #include <x86/segments.h>
 
@@ -483,6 +484,7 @@ enum vm_cap_type {
        VM_CAP_BPT_EXIT,
        VM_CAP_RDPID,
        VM_CAP_RDTSCP,
+       VM_CAP_IPI_EXIT,
        VM_CAP_MAX
 };
 
@@ -630,6 +632,7 @@ enum vm_exitcode {
        VM_EXITCODE_DEBUG,
        VM_EXITCODE_VMINSN,
        VM_EXITCODE_BPT,
+       VM_EXITCODE_IPI,
        VM_EXITCODE_MAX
 };
 
@@ -737,6 +740,11 @@ struct vm_exit {
                struct {
                        enum vm_suspend_how how;
                } suspended;
+               struct {
+                       uint32_t mode;
+                       uint8_t vector;
+                       cpuset_t dmask;
+               } ipi;
                struct vm_task_switch task_switch;
        } u;
 };
diff --git a/sys/amd64/vmm/amd/svm.c b/sys/amd64/vmm/amd/svm.c
index 35e8d9833d0e..4195cc5bd049 100644
--- a/sys/amd64/vmm/amd/svm.c
+++ b/sys/amd64/vmm/amd/svm.c
@@ -2315,6 +2315,7 @@ static int
 svm_setcap(void *arg, int vcpu, int type, int val)
 {
        struct svm_softc *sc;
+       struct vlapic *vlapic;
        int error;
 
        sc = arg;
@@ -2333,6 +2334,10 @@ svm_setcap(void *arg, int vcpu, int type, int val)
                if (val == 0)
                        error = EINVAL;
                break;
+       case VM_CAP_IPI_EXIT:
+               vlapic = vm_lapic(sc->vm, vcpu);
+               vlapic->ipi_exit = val;
+               break;
        default:
                error = ENOENT;
                break;
@@ -2344,6 +2349,7 @@ static int
 svm_getcap(void *arg, int vcpu, int type, int *retval)
 {
        struct svm_softc *sc;
+       struct vlapic *vlapic;
        int error;
 
        sc = arg;
@@ -2361,6 +2367,10 @@ svm_getcap(void *arg, int vcpu, int type, int *retval)
        case VM_CAP_UNRESTRICTED_GUEST:
                *retval = 1;    /* unrestricted guest is always enabled */
                break;
+       case VM_CAP_IPI_EXIT:
+               vlapic = vm_lapic(sc->vm, vcpu);
+               *retval = vlapic->ipi_exit;
+               break;
        default:
                error = ENOENT;
                break;
diff --git a/sys/amd64/vmm/intel/vmx.c b/sys/amd64/vmm/intel/vmx.c
index 64544a6e7955..857028dcd0f1 100644
--- a/sys/amd64/vmm/intel/vmx.c
+++ b/sys/amd64/vmm/intel/vmx.c
@@ -3504,6 +3504,7 @@ vmx_getcap(void *arg, int vcpu, int type, int *retval)
                        ret = 0;
                break;
        case VM_CAP_BPT_EXIT:
+       case VM_CAP_IPI_EXIT:
                ret = 0;
                break;
        default:
@@ -3521,6 +3522,7 @@ vmx_setcap(void *arg, int vcpu, int type, int val)
 {
        struct vmx *vmx = arg;
        struct vmcs *vmcs = &vmx->vmcs[vcpu];
+       struct vlapic *vlapic;
        uint32_t baseval;
        uint32_t *pptr;
        int error;
@@ -3599,6 +3601,12 @@ vmx_setcap(void *arg, int vcpu, int type, int val)
                        reg = VMCS_EXCEPTION_BITMAP;
                }
                break;
+       case VM_CAP_IPI_EXIT:
+               retval = 0;
+
+               vlapic = vm_lapic(vmx->vm, vcpu);
+               vlapic->ipi_exit = val;
+               break;
        default:
                break;
        }
diff --git a/sys/amd64/vmm/io/vlapic.c b/sys/amd64/vmm/io/vlapic.c
index 9599b4b4e62c..dc9b00d2316e 100644
--- a/sys/amd64/vmm/io/vlapic.c
+++ b/sys/amd64/vmm/io/vlapic.c
@@ -84,6 +84,7 @@ __FBSDID("$FreeBSD$");
 
 static void vlapic_set_error(struct vlapic *, uint32_t, bool);
 static void vlapic_callout_handler(void *arg);
+static void vlapic_reset(struct vlapic *vlapic);
 
 static __inline uint32_t
 vlapic_get_id(struct vlapic *vlapic)
@@ -957,9 +958,9 @@ vlapic_icrlo_write_handler(struct vlapic *vlapic, bool 
*retu)
 {
        int i;
        bool phys;
-       cpuset_t dmask;
+       cpuset_t dmask, ipimask;
        uint64_t icrval;
-       uint32_t dest, vec, mode;
+       uint32_t dest, vec, mode, shorthand;
        struct vlapic *vlapic2;
        struct vm_exit *vmexit;
        struct LAPIC *lapic;
@@ -975,97 +976,122 @@ vlapic_icrlo_write_handler(struct vlapic *vlapic, bool 
*retu)
                dest = icrval >> (32 + 24);
        vec = icrval & APIC_VECTOR_MASK;
        mode = icrval & APIC_DELMODE_MASK;
+       phys = (icrval & APIC_DESTMODE_LOG) == 0;
+       shorthand = icrval & APIC_DEST_MASK;
+
+       maxcpus = vm_get_maxcpus(vlapic->vm);
 
-       if (mode == APIC_DELMODE_FIXED && vec < 16) {
-               vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR, false);
-               VLAPIC_CTR1(vlapic, "Ignoring invalid IPI %d", vec);
-               return (0);
-       }
 
        VLAPIC_CTR2(vlapic, "icrlo 0x%016lx triggered ipi %d", icrval, vec);
 
-       if (mode == APIC_DELMODE_FIXED || mode == APIC_DELMODE_NMI) {
-               switch (icrval & APIC_DEST_MASK) {
-               case APIC_DEST_DESTFLD:
-                       phys = ((icrval & APIC_DESTMODE_LOG) == 0);
-                       vlapic_calcdest(vlapic->vm, &dmask, dest, phys, false,
-                           x2apic(vlapic));
-                       break;
-               case APIC_DEST_SELF:
-                       CPU_SETOF(vlapic->vcpuid, &dmask);
-                       break;
-               case APIC_DEST_ALLISELF:
-                       dmask = vm_active_cpus(vlapic->vm);
-                       break;
-               case APIC_DEST_ALLESELF:
-                       dmask = vm_active_cpus(vlapic->vm);
-                       CPU_CLR(vlapic->vcpuid, &dmask);
-                       break;
-               default:
-                       CPU_ZERO(&dmask);       /* satisfy gcc */
-                       break;
+       switch (shorthand) {
+       case APIC_DEST_DESTFLD:
+               vlapic_calcdest(vlapic->vm, &dmask, dest, phys, false, 
x2apic(vlapic));
+               break;
+       case APIC_DEST_SELF:
+               CPU_SETOF(vlapic->vcpuid, &dmask);
+               break;
+       case APIC_DEST_ALLISELF:
+               dmask = vm_active_cpus(vlapic->vm);
+               break;
+       case APIC_DEST_ALLESELF:
+               dmask = vm_active_cpus(vlapic->vm);
+               CPU_CLR(vlapic->vcpuid, &dmask);
+               break;
+       default:
+               __assert_unreachable();
+       }
+
+       /*
+        * ipimask is a set of vCPUs needing userland handling of the current
+        * IPI.
+        */
+       CPU_ZERO(&ipimask);
+
+       switch (mode) {
+       case APIC_DELMODE_FIXED:
+               if (vec < 16) {
+                       vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR,
+                           false);
+                       VLAPIC_CTR1(vlapic, "Ignoring invalid IPI %d", vec);
+                       return (0);
                }
 
                CPU_FOREACH_ISSET(i, &dmask) {
-                       if (mode == APIC_DELMODE_FIXED) {
-                               lapic_intr_edge(vlapic->vm, i, vec);
-                               vmm_stat_array_incr(vlapic->vm, vlapic->vcpuid,
-                                                   IPIS_SENT, i, 1);
-                               VLAPIC_CTR2(vlapic, "vlapic sending ipi %d "
-                                   "to vcpuid %d", vec, i);
-                       } else {
-                               vm_inject_nmi(vlapic->vm, i);
-                               VLAPIC_CTR1(vlapic, "vlapic sending ipi nmi "
-                                   "to vcpuid %d", i);
-                       }
+                       lapic_intr_edge(vlapic->vm, i, vec);
+                       vmm_stat_array_incr(vlapic->vm, vlapic->vcpuid,
+                           IPIS_SENT, i, 1);
+                       VLAPIC_CTR2(vlapic,
+                           "vlapic sending ipi %d to vcpuid %d", vec, i);
                }
 
-               return (0);     /* handled completely in the kernel */
-       }
+               break;
+       case APIC_DELMODE_NMI:
+               CPU_FOREACH_ISSET(i, &dmask) {
+                       vm_inject_nmi(vlapic->vm, i);
+                       VLAPIC_CTR1(vlapic,
+                           "vlapic sending ipi nmi to vcpuid %d", i);
+               }
 
-       maxcpus = vm_get_maxcpus(vlapic->vm);
-       if (mode == APIC_DELMODE_INIT) {
+               break;
+       case APIC_DELMODE_INIT:
                if ((icrval & APIC_LEVEL_MASK) == APIC_LEVEL_DEASSERT)
-                       return (0);
-
-               if (vlapic->vcpuid == 0 && dest != 0 && dest < maxcpus) {
-                       vlapic2 = vm_lapic(vlapic->vm, dest);
-
-                       /* move from INIT to waiting-for-SIPI state */
-                       if (vlapic2->boot_state == BS_INIT) {
-                               vlapic2->boot_state = BS_SIPI;
-                       }
+                       break;
 
-                       return (0);
+               CPU_FOREACH_ISSET(i, &dmask) {
+                       vlapic2 = vm_lapic(vlapic->vm, i);
+                       vlapic2->boot_state = BS_SIPI;
+                       CPU_SET(i, &ipimask);
                }
-       }
-
-       if (mode == APIC_DELMODE_STARTUP) {
-               if (vlapic->vcpuid == 0 && dest != 0 && dest < maxcpus) {
-                       vlapic2 = vm_lapic(vlapic->vm, dest);
 
+               break;
+       case APIC_DELMODE_STARTUP:
+               CPU_FOREACH_ISSET(i, &dmask) {
+                       vlapic2 = vm_lapic(vlapic->vm, i);
                        /*
                         * Ignore SIPIs in any state other than wait-for-SIPI
                         */
                        if (vlapic2->boot_state != BS_SIPI)
-                               return (0);
-
+                               continue;
+                       /*
+                        * TODO:
+                        * This should be triggered from userspace.
+                        */
+                       vlapic_reset(vlapic2);
                        vlapic2->boot_state = BS_RUNNING;
+                       CPU_SET(i, &ipimask);
+               }
 
-                       *retu = true;
-                       vmexit = vm_exitinfo(vlapic->vm, vlapic->vcpuid);
-                       vmexit->exitcode = VM_EXITCODE_SPINUP_AP;
-                       vmexit->u.spinup_ap.vcpu = dest;
-                       vmexit->u.spinup_ap.rip = vec << PAGE_SHIFT;
+               break;
+       default:
+               return (1);
+       }
 
-                       return (0);
+       if (!CPU_EMPTY(&ipimask)) {
+               vmexit = vm_exitinfo(vlapic->vm, vlapic->vcpuid);
+               vmexit->exitcode = VM_EXITCODE_IPI;
+               vmexit->u.ipi.mode = mode;
+               vmexit->u.ipi.vector = vec;
+               vmexit->u.ipi.dmask = dmask;
+
+               *retu = true;
+
+               /*
+                * Old bhyve versions don't support the IPI exit. Translate it
+                * into the old style.
+                */
+               if (!vlapic->ipi_exit) {
+                       if (mode == APIC_DELMODE_STARTUP) {
+                               vmexit->exitcode = VM_EXITCODE_SPINUP_AP;
+                               vmexit->u.spinup_ap.vcpu = CPU_FFS(&ipimask) - 
1;
+                               vmexit->u.spinup_ap.rip = vec << PAGE_SHIFT;
+                       } else {
+                               *retu = false;
+                       }
                }
        }
 
-       /*
-        * This will cause a return to userland.
-        */
-       return (1);
+       return (0);
 }
 
 void
@@ -1467,6 +1493,8 @@ vlapic_init(struct vlapic *vlapic)
        if (vlapic->vcpuid == 0)
                vlapic->msr_apicbase |= APICBASE_BSP;
 
+       vlapic->ipi_exit = false;
+
        vlapic_reset(vlapic);
 }
 
diff --git a/sys/amd64/vmm/io/vlapic_priv.h b/sys/amd64/vmm/io/vlapic_priv.h
index fe7965cb65d7..4b3e9009e68c 100644
--- a/sys/amd64/vmm/io/vlapic_priv.h
+++ b/sys/amd64/vmm/io/vlapic_priv.h
@@ -183,6 +183,8 @@ struct vlapic {
         */
        uint32_t        svr_last;
        uint32_t        lvt_last[VLAPIC_MAXLVT_INDEX + 1];
+
+       bool            ipi_exit;
 };
 
 void vlapic_init(struct vlapic *vlapic);
diff --git a/usr.sbin/bhyve/bhyverun.c b/usr.sbin/bhyve/bhyverun.c
index 550cc9d15477..a1849519996c 100644
--- a/usr.sbin/bhyve/bhyverun.c
+++ b/usr.sbin/bhyve/bhyverun.c
@@ -46,6 +46,7 @@ __FBSDID("$FreeBSD$");
 #endif
 
 #include <amd64/vmm/intel/vmcs.h>
+#include <x86/apicreg.h>
 
 #include <machine/atomic.h>
 #include <machine/segments.h>
@@ -935,6 +936,35 @@ vmexit_breakpoint(struct vmctx *ctx, struct vm_exit 
*vmexit, int *pvcpu)
        return (VMEXIT_CONTINUE);
 }
 
+static int
+vmexit_ipi(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
+{
+       int error = -1;
+       int i;
+       switch (vmexit->u.ipi.mode) {
+       case APIC_DELMODE_INIT:
+               CPU_FOREACH_ISSET (i, &vmexit->u.ipi.dmask) {
+                       error = vm_suspend_cpu(ctx, i);
+                       if (error) {
+                               warnx("%s: failed to suspend cpu %d\n",
+                                   __func__, i);
+                               break;
+                       }
+               }
+               break;
+       case APIC_DELMODE_STARTUP:
+               CPU_FOREACH_ISSET (i, &vmexit->u.ipi.dmask) {
+                       spinup_ap(ctx, i, vmexit->u.ipi.vector << PAGE_SHIFT);
+               }
+               error = 0;
+               break;
+       default:
+               break;
+       }
+
+       return (error);
+}
+
 static vmexit_handler_t handler[VM_EXITCODE_MAX] = {
        [VM_EXITCODE_INOUT]  = vmexit_inout,
        [VM_EXITCODE_INOUT_STR]  = vmexit_inout,
@@ -951,6 +981,7 @@ static vmexit_handler_t handler[VM_EXITCODE_MAX] = {
        [VM_EXITCODE_TASK_SWITCH] = vmexit_task_switch,
        [VM_EXITCODE_DEBUG] = vmexit_debug,
        [VM_EXITCODE_BPT] = vmexit_breakpoint,
+       [VM_EXITCODE_IPI] = vmexit_ipi,
 };
 
 static void
@@ -1151,6 +1182,9 @@ spinup_vcpu(struct vmctx *ctx, int vcpu, bool suspend)
        error = vm_set_capability(ctx, vcpu, VM_CAP_UNRESTRICTED_GUEST, 1);
        assert(error == 0);
 
+       error = vm_set_capability(ctx, vcpu, VM_CAP_IPI_EXIT, 1);
+       assert(error == 0);
+
        fbsdrun_addcpu(ctx, vcpu, rip, suspend);
 }
 
diff --git a/usr.sbin/bhyve/spinup_ap.c b/usr.sbin/bhyve/spinup_ap.c
index 2b7e602f8003..438091e564e7 100644
--- a/usr.sbin/bhyve/spinup_ap.c
+++ b/usr.sbin/bhyve/spinup_ap.c
@@ -98,6 +98,9 @@ spinup_ap(struct vmctx *ctx, int newcpu, uint64_t rip)
        error = vm_set_capability(ctx, newcpu, VM_CAP_UNRESTRICTED_GUEST, 1);
        assert(error == 0);
 
+       error = vm_set_capability(ctx, newcpu, VM_CAP_IPI_EXIT, 1);
+       assert(error == 0);
+
        spinup_ap_realmode(ctx, newcpu, &rip);
 
        vm_resume_cpu(ctx, newcpu);

Reply via email to