Author: neel
Date: Wed Apr 10 02:12:39 2013
New Revision: 249321
URL: http://svnweb.freebsd.org/changeset/base/249321
Log:
  Improve PCI BAR emulation:
  - Respect the MEMEN and PORTEN bits in the command register
  - Allow the guest to reprogram the address decoded by the BAR
  
  Submitted by: Gopakumar T
  Obtained from:        NetApp

Modified:
  head/usr.sbin/bhyve/consport.c
  head/usr.sbin/bhyve/dbgport.c
  head/usr.sbin/bhyve/inout.c
  head/usr.sbin/bhyve/inout.h
  head/usr.sbin/bhyve/mem.c
  head/usr.sbin/bhyve/mem.h
  head/usr.sbin/bhyve/pci_emul.c

Modified: head/usr.sbin/bhyve/consport.c
==============================================================================
--- head/usr.sbin/bhyve/consport.c      Wed Apr 10 00:35:08 2013        
(r249320)
+++ head/usr.sbin/bhyve/consport.c      Wed Apr 10 02:12:39 2013        
(r249321)
@@ -128,6 +128,7 @@ console_handler(struct vmctx *ctx, int v
 static struct inout_port consport = {
        "bvmcons",
        BVM_CONSOLE_PORT,
+       1,
        IOPORT_F_INOUT,
        console_handler
 };

Modified: head/usr.sbin/bhyve/dbgport.c
==============================================================================
--- head/usr.sbin/bhyve/dbgport.c       Wed Apr 10 00:35:08 2013        
(r249320)
+++ head/usr.sbin/bhyve/dbgport.c       Wed Apr 10 02:12:39 2013        
(r249321)
@@ -105,6 +105,7 @@ again:
 static struct inout_port dbgport = {
        "bvmdbg",
        BVM_DBG_PORT,
+       1,
        IOPORT_F_INOUT,
        dbg_handler
 };

Modified: head/usr.sbin/bhyve/inout.c
==============================================================================
--- head/usr.sbin/bhyve/inout.c Wed Apr 10 00:35:08 2013        (r249320)
+++ head/usr.sbin/bhyve/inout.c Wed Apr 10 02:12:39 2013        (r249321)
@@ -33,6 +33,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/linker_set.h>
 
 #include <stdio.h>
+#include <string.h>
 #include <assert.h>
 
 #include "inout.h"
@@ -41,6 +42,9 @@ SET_DECLARE(inout_port_set, struct inout
 
 #define        MAX_IOPORTS     (1 << 16)
 
+#define        VERIFY_IOPORT(port, size) \
+       assert((port) >= 0 && (size) > 0 && ((port) + (size)) <= MAX_IOPORTS)
+
 static struct {
        const char      *name;
        int             flags;
@@ -69,6 +73,23 @@ default_inout(struct vmctx *ctx, int vcp
         return (0);
 }
 
+static void 
+register_default_iohandler(int start, int size)
+{
+       struct inout_port iop;
+       
+       VERIFY_IOPORT(start, size);
+
+       bzero(&iop, sizeof(iop));
+       iop.name = "default";
+       iop.port = start;
+       iop.size = size;
+       iop.flags = IOPORT_F_INOUT;
+       iop.handler = default_inout;
+
+       register_inout(&iop);
+}
+
 int
 emulate_inout(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
              uint32_t *eax, int strict)
@@ -113,17 +134,11 @@ void
 init_inout(void)
 {
        struct inout_port **iopp, *iop;
-       int i;
 
        /*
         * Set up the default handler for all ports
         */
-       for (i = 0; i < MAX_IOPORTS; i++) {
-               inout_handlers[i].name = "default";
-               inout_handlers[i].flags = IOPORT_F_IN | IOPORT_F_OUT;
-               inout_handlers[i].handler = default_inout;
-               inout_handlers[i].arg = NULL;
-       }
+       register_default_iohandler(0, MAX_IOPORTS);
 
        /*
         * Overwrite with specified handlers
@@ -141,11 +156,28 @@ init_inout(void)
 int
 register_inout(struct inout_port *iop)
 {
-       assert(iop->port < MAX_IOPORTS);
-       inout_handlers[iop->port].name = iop->name;
-       inout_handlers[iop->port].flags = iop->flags;
-       inout_handlers[iop->port].handler = iop->handler;
-       inout_handlers[iop->port].arg = iop->arg;
+       int i;
+
+       VERIFY_IOPORT(iop->port, iop->size);
+       
+       for (i = iop->port; i < iop->port + iop->size; i++) {
+               inout_handlers[i].name = iop->name;
+               inout_handlers[i].flags = iop->flags;
+               inout_handlers[i].handler = iop->handler;
+               inout_handlers[i].arg = iop->arg;
+       }
+
+       return (0);
+}
+
+int
+unregister_inout(struct inout_port *iop)
+{
+
+       VERIFY_IOPORT(iop->port, iop->size);
+       assert(inout_handlers[iop->port].name == iop->name);
+
+       register_default_iohandler(iop->port, iop->size);
 
        return (0);
 }

Modified: head/usr.sbin/bhyve/inout.h
==============================================================================
--- head/usr.sbin/bhyve/inout.h Wed Apr 10 00:35:08 2013        (r249320)
+++ head/usr.sbin/bhyve/inout.h Wed Apr 10 02:12:39 2013        (r249321)
@@ -39,6 +39,7 @@ typedef int (*inout_func_t)(struct vmctx
 struct inout_port {
        const char      *name;
        int             port;
+       int             size;
        int             flags;
        inout_func_t    handler;
        void            *arg;
@@ -51,6 +52,7 @@ struct inout_port {
        static struct inout_port __CONCAT(__inout_port, __LINE__) = {   \
                #name,                                                  \
                (port),                                                 \
+               1,                                                      \
                (flags),                                                \
                (handler),                                              \
                0                                                       \
@@ -61,7 +63,7 @@ void  init_inout(void);
 int    emulate_inout(struct vmctx *, int vcpu, int in, int port, int bytes,
                      uint32_t *eax, int strict);
 int    register_inout(struct inout_port *iop);
-
+int    unregister_inout(struct inout_port *iop);
 void   init_bvmcons(void);
 
 #endif /* _INOUT_H_ */

Modified: head/usr.sbin/bhyve/mem.c
==============================================================================
--- head/usr.sbin/bhyve/mem.c   Wed Apr 10 00:35:08 2013        (r249320)
+++ head/usr.sbin/bhyve/mem.c   Wed Apr 10 02:12:39 2013        (r249321)
@@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$");
 #include <stdio.h>
 #include <stdlib.h>
 #include <assert.h>
+#include <pthread.h>
 
 #include "mem.h"
 
@@ -71,6 +72,8 @@ RB_HEAD(mmio_rb_tree, mmio_rb_range) mmi
  */
 static struct mmio_rb_range    *mmio_hint[VM_MAXCPU];
 
+static pthread_rwlock_t rwlock;
+
 static int
 mmio_rb_range_compare(struct mmio_rb_range *a, struct mmio_rb_range *b)
 {
@@ -125,10 +128,12 @@ mmio_rb_dump(struct mmio_rb_tree *rbt)
 {
        struct mmio_rb_range *np;
 
+       pthread_rwlock_rdlock(&rwlock);
        RB_FOREACH(np, mmio_rb_tree, rbt) {
                printf(" %lx:%lx, %s\n", np->mr_base, np->mr_end,
                       np->mr_param.name);
        }
+       pthread_rwlock_unlock(&rwlock);
 }
 #endif
 
@@ -161,7 +166,8 @@ emulate_mem(struct vmctx *ctx, int vcpu,
 {
        struct mmio_rb_range *entry;
        int err;
-
+       
+       pthread_rwlock_rdlock(&rwlock);
        /*
         * First check the per-vCPU cache
         */
@@ -173,10 +179,11 @@ emulate_mem(struct vmctx *ctx, int vcpu,
                entry = NULL;
 
        if (entry == NULL) {
-               if (!mmio_rb_lookup(&mmio_rb_root, paddr, &entry)) {
+               if (mmio_rb_lookup(&mmio_rb_root, paddr, &entry) == 0) {
                        /* Update the per-vCPU cache */
                        mmio_hint[vcpu] = entry;                        
                } else if (mmio_rb_lookup(&mmio_rb_fallback, paddr, &entry)) {
+                       pthread_rwlock_unlock(&rwlock);
                        return (ESRCH);
                }
        }
@@ -184,25 +191,29 @@ emulate_mem(struct vmctx *ctx, int vcpu,
        assert(entry != NULL);
        err = vmm_emulate_instruction(ctx, vcpu, paddr, vie,
                                      mem_read, mem_write, &entry->mr_param);
+       pthread_rwlock_unlock(&rwlock);
+       
        return (err);
 }
 
 static int
 register_mem_int(struct mmio_rb_tree *rbt, struct mem_range *memp)
 {
-       struct mmio_rb_range *mrp;
+       struct mmio_rb_range *entry, *mrp;
        int             err;
 
        err = 0;
 
        mrp = malloc(sizeof(struct mmio_rb_range));
-
+       
        if (mrp != NULL) {
                mrp->mr_param = *memp;
                mrp->mr_base = memp->base;
                mrp->mr_end = memp->base + memp->size - 1;
-
-               err = mmio_rb_add(rbt, mrp);
+               pthread_rwlock_wrlock(&rwlock);
+               if (mmio_rb_lookup(rbt, memp->base, &entry) != 0)
+                       err = mmio_rb_add(rbt, mrp);
+               pthread_rwlock_unlock(&rwlock);
                if (err)
                        free(mrp);
        } else
@@ -225,10 +236,40 @@ register_mem_fallback(struct mem_range *
        return (register_mem_int(&mmio_rb_fallback, memp));
 }
 
+int 
+unregister_mem(struct mem_range *memp)
+{
+       struct mem_range *mr;
+       struct mmio_rb_range *entry = NULL;
+       int err, i;
+       
+       pthread_rwlock_wrlock(&rwlock);
+       err = mmio_rb_lookup(&mmio_rb_root, memp->base, &entry);
+       if (err == 0) {
+               mr = &entry->mr_param;
+               assert(mr->name == memp->name);
+               assert(mr->base == memp->base && mr->size == memp->size); 
+               RB_REMOVE(mmio_rb_tree, &mmio_rb_root, entry);
+
+               /* flush Per-vCPU cache */      
+               for (i=0; i < VM_MAXCPU; i++) {
+                       if (mmio_hint[i] == entry)
+                               mmio_hint[i] = NULL;
+               }
+       }
+       pthread_rwlock_unlock(&rwlock);
+
+       if (entry)
+               free(entry);
+       
+       return (err);
+}
+
 void
 init_mem(void)
 {
 
        RB_INIT(&mmio_rb_root);
        RB_INIT(&mmio_rb_fallback);
+       pthread_rwlock_init(&rwlock, NULL);
 }

Modified: head/usr.sbin/bhyve/mem.h
==============================================================================
--- head/usr.sbin/bhyve/mem.h   Wed Apr 10 00:35:08 2013        (r249320)
+++ head/usr.sbin/bhyve/mem.h   Wed Apr 10 02:12:39 2013        (r249321)
@@ -54,5 +54,6 @@ int     emulate_mem(struct vmctx *, int 
                    
 int    register_mem(struct mem_range *memp);
 int    register_mem_fallback(struct mem_range *memp);
+int    unregister_mem(struct mem_range *memp);
 
 #endif /* _MEM_H_ */

Modified: head/usr.sbin/bhyve/pci_emul.c
==============================================================================
--- head/usr.sbin/bhyve/pci_emul.c      Wed Apr 10 00:35:08 2013        
(r249320)
+++ head/usr.sbin/bhyve/pci_emul.c      Wed Apr 10 02:12:39 2013        
(r249321)
@@ -31,6 +31,7 @@ __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/linker_set.h>
+#include <sys/errno.h>
 
 #include <ctype.h>
 #include <stdio.h>
@@ -38,6 +39,7 @@ __FBSDID("$FreeBSD$");
 #include <string.h>
 #include <strings.h>
 #include <assert.h>
+#include <stdbool.h>
 
 #include <machine/vmm.h>
 #include <vmmapi.h>
@@ -353,20 +355,150 @@ pci_emul_alloc_bar(struct pci_devinst *p
        return (pci_emul_alloc_pbar(pdi, idx, 0, type, size));
 }
 
+/*
+ * Register (or unregister) the MMIO or I/O region associated with the BAR
+ * register 'idx' of an emulated pci device.
+ */
+static void
+modify_bar_registration(struct pci_devinst *pi, int idx, int registration)
+{
+       int error;
+       struct inout_port iop;
+       struct mem_range mr;
+
+       switch (pi->pi_bar[idx].type) {
+       case PCIBAR_IO:
+               bzero(&iop, sizeof(struct inout_port));
+               iop.name = pi->pi_name;
+               iop.port = pi->pi_bar[idx].addr;
+               iop.size = pi->pi_bar[idx].size;
+               if (registration) {
+                       iop.flags = IOPORT_F_INOUT;
+                       iop.handler = pci_emul_io_handler;
+                       iop.arg = pi;
+                       error = register_inout(&iop);
+               } else 
+                       error = unregister_inout(&iop);
+               break;
+       case PCIBAR_MEM32:
+       case PCIBAR_MEM64:
+               bzero(&mr, sizeof(struct mem_range));
+               mr.name = pi->pi_name;
+               mr.base = pi->pi_bar[idx].addr;
+               mr.size = pi->pi_bar[idx].size;
+               if (registration) {
+                       mr.flags = MEM_F_RW;
+                       mr.handler = pci_emul_mem_handler;
+                       mr.arg1 = pi;
+                       mr.arg2 = idx;
+                       error = register_mem(&mr);
+               } else
+                       error = unregister_mem(&mr);
+               break;
+       default:
+               error = EINVAL;
+               break;
+       }
+       assert(error == 0);
+}
+
+static void
+unregister_bar(struct pci_devinst *pi, int idx)
+{
+
+       modify_bar_registration(pi, idx, 0);
+}
+
+static void
+register_bar(struct pci_devinst *pi, int idx)
+{
+
+       modify_bar_registration(pi, idx, 1);
+}
+
+/* Are we decoding i/o port accesses for the emulated pci device? */
+static int
+porten(struct pci_devinst *pi)
+{
+       uint16_t cmd;
+
+       cmd = pci_get_cfgdata16(pi, PCIR_COMMAND);
+
+       return (cmd & PCIM_CMD_PORTEN);
+}
+
+/* Are we decoding memory accesses for the emulated pci device? */
+static int
+memen(struct pci_devinst *pi)
+{
+       uint16_t cmd;
+
+       cmd = pci_get_cfgdata16(pi, PCIR_COMMAND);
+
+       return (cmd & PCIM_CMD_MEMEN);
+}
+
+/*
+ * Update the MMIO or I/O address that is decoded by the BAR register.
+ *
+ * If the pci device has enabled the address space decoding then intercept
+ * the address range decoded by the BAR register.
+ */
+static void
+update_bar_address(struct  pci_devinst *pi, uint64_t addr, int idx, int type)
+{
+       int decode;
+
+       if (pi->pi_bar[idx].type == PCIBAR_IO)
+               decode = porten(pi);
+       else
+               decode = memen(pi);
+
+       if (decode)
+               unregister_bar(pi, idx);
+
+       switch (type) {
+       case PCIBAR_IO:
+       case PCIBAR_MEM32:
+               pi->pi_bar[idx].addr = addr;
+               break;
+       case PCIBAR_MEM64:
+               pi->pi_bar[idx].addr &= ~0xffffffffUL;
+               pi->pi_bar[idx].addr |= addr;
+               break;
+       case PCIBAR_MEMHI64:
+               pi->pi_bar[idx].addr &= 0xffffffff;
+               pi->pi_bar[idx].addr |= addr;
+               break;
+       default:
+               assert(0);
+       }
+
+       if (decode)
+               register_bar(pi, idx);
+}
+
 int
 pci_emul_alloc_pbar(struct pci_devinst *pdi, int idx, uint64_t hostbase,
                    enum pcibar_type type, uint64_t size)
 {
-       int i, error;
+       int error;
        uint64_t *baseptr, limit, addr, mask, lobits, bar;
-       struct inout_port iop;
-       struct mem_range memp;
 
        assert(idx >= 0 && idx <= PCI_BARMAX);
 
        if ((size & (size - 1)) != 0)
                size = 1UL << flsl(size);       /* round up to a power of 2 */
 
+       /* Enforce minimum BAR sizes required by the PCI standard */
+       if (type == PCIBAR_IO) {
+               if (size < 4)
+                       size = 4;
+       } else {
+               if (size < 16)
+                       size = 16;
+       }
+
        switch (type) {
        case PCIBAR_NONE:
                baseptr = NULL;
@@ -443,30 +575,7 @@ pci_emul_alloc_pbar(struct pci_devinst *
                pci_set_cfgdata32(pdi, PCIR_BAR(idx + 1), bar >> 32);
        }
        
-       /* add a handler to intercept accesses to the I/O bar */
-       if (type == PCIBAR_IO) {
-               iop.name = pdi->pi_name;
-               iop.flags = IOPORT_F_INOUT;
-               iop.handler = pci_emul_io_handler;
-               iop.arg = pdi;
-
-               for (i = 0; i < size; i++) {
-                       iop.port = addr + i;
-                       register_inout(&iop);
-               }
-       } else if (type == PCIBAR_MEM32 || type == PCIBAR_MEM64) {
-               /* add memory bar intercept handler */
-               memp.name = pdi->pi_name;
-               memp.flags = MEM_F_RW;
-               memp.base = addr;
-               memp.size = size;
-               memp.handler = pci_emul_mem_handler;
-               memp.arg1 = pdi;
-               memp.arg2 = idx;
-
-               error = register_mem(&memp);
-               assert(error == 0);
-       }
+       register_bar(pdi, idx);
 
        return (0);
 }
@@ -1101,6 +1210,62 @@ pci_emul_cfgaddr(struct vmctx *ctx, int 
 }
 INOUT_PORT(pci_cfgaddr, CONF1_ADDR_PORT, IOPORT_F_OUT, pci_emul_cfgaddr);
 
+static uint32_t
+bits_changed(uint32_t old, uint32_t new, uint32_t mask)
+{
+
+       return ((old ^ new) & mask);
+}
+
+static void
+pci_emul_cmdwrite(struct pci_devinst *pi, uint32_t new, int bytes)
+{
+       int i;
+       uint16_t old;
+
+       /*
+        * The command register is at an offset of 4 bytes and thus the
+        * guest could write 1, 2 or 4 bytes starting at this offset.
+        */
+
+       old = pci_get_cfgdata16(pi, PCIR_COMMAND);      /* stash old value */
+       CFGWRITE(pi, PCIR_COMMAND, new, bytes);         /* update config */
+       new = pci_get_cfgdata16(pi, PCIR_COMMAND);      /* get updated value */
+
+       /*
+        * If the MMIO or I/O address space decoding has changed then
+        * register/unregister all BARs that decode that address space.
+        */
+       for (i = 0; i < PCI_BARMAX; i++) {
+               switch (pi->pi_bar[i].type) {
+                       case PCIBAR_NONE:
+                       case PCIBAR_MEMHI64:
+                               break;
+                       case PCIBAR_IO:
+                               /* I/O address space decoding changed? */
+                               if (bits_changed(old, new, PCIM_CMD_PORTEN)) {
+                                       if (porten(pi))
+                                               register_bar(pi, i);
+                                       else
+                                               unregister_bar(pi, i);
+                               }
+                               break;
+                       case PCIBAR_MEM32:
+                       case PCIBAR_MEM64:
+                               /* MMIO address space decoding changed? */
+                               if (bits_changed(old, new, PCIM_CMD_MEMEN)) {
+                                       if (memen(pi))
+                                               register_bar(pi, i);
+                                       else
+                                               unregister_bar(pi, i);
+                               }
+                               break; 
+                       default:
+                               assert(0); 
+               }
+       }
+}      
+
 static int
 pci_emul_cfgdata(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
                 uint32_t *eax, void *arg)
@@ -1108,7 +1273,7 @@ pci_emul_cfgdata(struct vmctx *ctx, int 
        struct pci_devinst *pi;
        struct pci_devemu *pe;
        int coff, idx, needcfg;
-       uint64_t mask, bar;
+       uint64_t addr, bar, mask;
 
        assert(bytes == 1 || bytes == 2 || bytes == 4);
        
@@ -1175,33 +1340,48 @@ pci_emul_cfgdata(struct vmctx *ctx, int 
                        if (bytes != 4 || (coff & 0x3) != 0)
                                return (0);
                        idx = (coff - PCIR_BAR(0)) / 4;
+                       mask = ~(pi->pi_bar[idx].size - 1);
                        switch (pi->pi_bar[idx].type) {
                        case PCIBAR_NONE:
-                               bar = 0;
+                               pi->pi_bar[idx].addr = bar = 0;
                                break;
                        case PCIBAR_IO:
-                               mask = ~(pi->pi_bar[idx].size - 1);
-                               mask &= PCIM_BAR_IO_BASE;
-                               bar = (*eax & mask) | PCIM_BAR_IO_SPACE;
+                               addr = *eax & mask;
+                               addr &= 0xffff;
+                               bar = addr | PCIM_BAR_IO_SPACE;
+                               /*
+                                * Register the new BAR value for interception
+                                */
+                               if (addr != pi->pi_bar[idx].addr) {
+                                       update_bar_address(pi, addr, idx,
+                                                          PCIBAR_IO);
+                               }
                                break;
                        case PCIBAR_MEM32:
-                               mask = ~(pi->pi_bar[idx].size - 1);
-                               mask &= PCIM_BAR_MEM_BASE;
-                               bar = *eax & mask;
+                               addr = bar = *eax & mask;
                                bar |= PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_32;
+                               if (addr != pi->pi_bar[idx].addr) {
+                                       update_bar_address(pi, addr, idx,
+                                                          PCIBAR_MEM32);
+                               }
                                break;
                        case PCIBAR_MEM64:
-                               mask = ~(pi->pi_bar[idx].size - 1);
-                               mask &= PCIM_BAR_MEM_BASE;
-                               bar = *eax & mask;
+                               addr = bar = *eax & mask;
                                bar |= PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64 |
                                       PCIM_BAR_MEM_PREFETCH;
+                               if (addr != (uint32_t)pi->pi_bar[idx].addr) {
+                                       update_bar_address(pi, addr, idx,
+                                                          PCIBAR_MEM64);
+                               }
                                break;
                        case PCIBAR_MEMHI64:
                                mask = ~(pi->pi_bar[idx - 1].size - 1);
-                               mask &= PCIM_BAR_MEM_BASE;
-                               bar = ((uint64_t)*eax << 32) & mask;
-                               bar = bar >> 32;
+                               addr = ((uint64_t)*eax << 32) & mask;
+                               bar = addr >> 32;
+                               if (bar != pi->pi_bar[idx - 1].addr >> 32) {
+                                       update_bar_address(pi, addr, idx - 1,
+                                                          PCIBAR_MEMHI64);
+                               }
                                break;
                        default:
                                assert(0);
@@ -1210,6 +1390,8 @@ pci_emul_cfgdata(struct vmctx *ctx, int 
 
                } else if (pci_emul_iscap(pi, coff)) {
                        pci_emul_capwrite(pi, coff, bytes, *eax);
+               } else if (coff == PCIR_COMMAND) {
+                       pci_emul_cmdwrite(pi, *eax, bytes);
                } else {
                        CFGWRITE(pi, coff, *eax, bytes);
                }
_______________________________________________
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to