And huge thanks to you for committing this.

On Mon, 1 Feb 2016 14:56:11 +0000 (UTC)
Peter Grehan <gre...@freebsd.org> wrote:

> Author: grehan
> Date: Mon Feb  1 14:56:11 2016
> New Revision: 295124
> URL: https://svnweb.freebsd.org/changeset/base/295124
> 
> Log:
>   MFC r284539, r284630, r284688, r284877, r285217, r285218,
>       r286837, r286838, r288470, r288522, r288524, r288826,
>       r289001
>   
>   Pull in bhyve bug fixes and changes to allow UEFI booting.
>   This provides Windows support.
>   
>   Tested on Intel and AMD with:
>     - Arch Linux i386+amd64 (kernel 4.3.3)
>     - Ubuntu 15.10 server 64-bit
>     - FreeBSD-CURRENT/amd64 20160127 snap
>     - FreeBSD 10.2 i386+amd64
>     - OpenBSD 5.8 i386+amd64
>     - SmartOS latest
>     - Windows 10 build 1511'
>   
>   Huge thanks to Yamagi Burmeister who submitted the patch
>   and did the majority of the testing.
>   
>   r284539 - bootrom mem allocation support
>   r284630 - Add SO_REUSEADDR when starting debug port
>   r284688 - Fix a regression in "movs" emulation
>   r284877 - verify_gla() non-zero segment base fix
>   r285217 - Always assert DCD and DSR in the uart
>   r285218 - devmem nodes moved to /dev/vmm.io/
>   r286837 - Add define for SATA Check-Power-Mode
>   r286838 - Add simple (no-op) SATA cmd emulations
>   r288470 - Increase virtio-blk indirect descs
>   r288522 - Firmware guest query interface
>   r288524 - Fix post-test typo
>   r288826 - Clean up SATA unimplemented cmd msg
>   r289001 - Add -l option to specify userboot path
>   
>   Submitted by:       Yamagi Burmeister
>   Approved by:        re (kib)
> 
> Added:
>   stable/10/usr.sbin/bhyve/bootrom.c
>      - copied unchanged from r284539, head/usr.sbin/bhyve/bootrom.c
>   stable/10/usr.sbin/bhyve/bootrom.h
>      - copied unchanged from r284539, head/usr.sbin/bhyve/bootrom.h
>   stable/10/usr.sbin/bhyve/fwctl.c
>      - copied, changed from r288522, head/usr.sbin/bhyve/fwctl.c
>   stable/10/usr.sbin/bhyve/fwctl.h
>      - copied unchanged from r288522, head/usr.sbin/bhyve/fwctl.h
> Modified:
>   stable/10/lib/libvmmapi/vmmapi.c
>   stable/10/lib/libvmmapi/vmmapi.h
>   stable/10/share/examples/bhyve/vmrun.sh
>   stable/10/sys/amd64/include/vmm.h
>   stable/10/sys/amd64/include/vmm_dev.h
>   stable/10/sys/amd64/vmm/amd/svm.c
>   stable/10/sys/amd64/vmm/intel/vmx.c
>   stable/10/sys/amd64/vmm/io/ppt.c
>   stable/10/sys/amd64/vmm/vmm.c
>   stable/10/sys/amd64/vmm/vmm_dev.c
>   stable/10/sys/amd64/vmm/vmm_instruction_emul.c
>   stable/10/sys/amd64/vmm/vmm_mem.c
>   stable/10/sys/amd64/vmm/vmm_mem.h
>   stable/10/sys/sys/ata.h
>   stable/10/usr.sbin/bhyve/Makefile
>   stable/10/usr.sbin/bhyve/bhyve.8
>   stable/10/usr.sbin/bhyve/bhyverun.c
>   stable/10/usr.sbin/bhyve/dbgport.c
>   stable/10/usr.sbin/bhyve/pci_ahci.c
>   stable/10/usr.sbin/bhyve/pci_lpc.c
>   stable/10/usr.sbin/bhyve/pci_lpc.h
>   stable/10/usr.sbin/bhyve/pci_passthru.c
>   stable/10/usr.sbin/bhyve/pci_virtio_net.c
>   stable/10/usr.sbin/bhyve/uart_emul.c
>   stable/10/usr.sbin/bhyvectl/bhyvectl.c
>   stable/10/usr.sbin/bhyveload/bhyveload.8
>   stable/10/usr.sbin/bhyveload/bhyveload.c
> Directory Properties:
>   stable/10/   (props changed)
> 
> Modified: stable/10/lib/libvmmapi/vmmapi.c
> ==============================================================================
> --- stable/10/lib/libvmmapi/vmmapi.c  Mon Feb  1 14:28:58 2016        
> (r295123)
> +++ stable/10/lib/libvmmapi/vmmapi.c  Mon Feb  1 14:56:11 2016        
> (r295124)
> @@ -58,15 +58,23 @@ __FBSDID("$FreeBSD$");
>  #define      MB      (1024 * 1024UL)
>  #define      GB      (1024 * 1024 * 1024UL)
>  
> +/*
> + * Size of the guard region before and after the virtual address space
> + * mapping the guest physical memory. This must be a multiple of the
> + * superpage size for performance reasons.
> + */
> +#define      VM_MMAP_GUARD_SIZE      (4 * MB)
> +
> +#define      PROT_RW         (PROT_READ | PROT_WRITE)
> +#define      PROT_ALL        (PROT_READ | PROT_WRITE | PROT_EXEC)
> +
>  struct vmctx {
>       int     fd;
>       uint32_t lowmem_limit;
> -     enum vm_mmap_style vms;
>       int     memflags;
>       size_t  lowmem;
> -     char    *lowmem_addr;
>       size_t  highmem;
> -     char    *highmem_addr;
> +     char    *baseaddr;
>       char    *name;
>  };
>  
> @@ -157,22 +165,6 @@ vm_parse_memsize(const char *optarg, siz
>       return (error);
>  }
>  
> -int
> -vm_get_memory_seg(struct vmctx *ctx, vm_paddr_t gpa, size_t *ret_len,
> -               int *wired)
> -{
> -     int error;
> -     struct vm_memory_segment seg;
> -
> -     bzero(&seg, sizeof(seg));
> -     seg.gpa = gpa;
> -     error = ioctl(ctx->fd, VM_GET_MEMORY_SEG, &seg);
> -     *ret_len = seg.len;
> -     if (wired != NULL)
> -             *wired = seg.wired;
> -     return (error);
> -}
> -
>  uint32_t
>  vm_get_lowmem_limit(struct vmctx *ctx)
>  {
> @@ -194,39 +186,184 @@ vm_set_memflags(struct vmctx *ctx, int f
>       ctx->memflags = flags;
>  }
>  
> +int
> +vm_get_memflags(struct vmctx *ctx)
> +{
> +
> +     return (ctx->memflags);
> +}
> +
> +/*
> + * Map segment 'segid' starting at 'off' into guest address range 
> [gpa,gpa+len).
> + */
> +int
> +vm_mmap_memseg(struct vmctx *ctx, vm_paddr_t gpa, int segid, vm_ooffset_t 
> off,
> +    size_t len, int prot)
> +{
> +     struct vm_memmap memmap;
> +     int error, flags;
> +
> +     memmap.gpa = gpa;
> +     memmap.segid = segid;
> +     memmap.segoff = off;
> +     memmap.len = len;
> +     memmap.prot = prot;
> +     memmap.flags = 0;
> +
> +     if (ctx->memflags & VM_MEM_F_WIRED)
> +             memmap.flags |= VM_MEMMAP_F_WIRED;
> +
> +     /*
> +      * If this mapping already exists then don't create it again. This
> +      * is the common case for SYSMEM mappings created by bhyveload(8).
> +      */
> +     error = vm_mmap_getnext(ctx, &gpa, &segid, &off, &len, &prot, &flags);
> +     if (error == 0 && gpa == memmap.gpa) {
> +             if (segid != memmap.segid || off != memmap.segoff ||
> +                 prot != memmap.prot || flags != memmap.flags) {
> +                     errno = EEXIST;
> +                     return (-1);
> +             } else {
> +                     return (0);
> +             }
> +     }
> +
> +     error = ioctl(ctx->fd, VM_MMAP_MEMSEG, &memmap);
> +     return (error);
> +}
> +
> +int
> +vm_mmap_getnext(struct vmctx *ctx, vm_paddr_t *gpa, int *segid,
> +    vm_ooffset_t *segoff, size_t *len, int *prot, int *flags)
> +{
> +     struct vm_memmap memmap;
> +     int error;
> +
> +     bzero(&memmap, sizeof(struct vm_memmap));
> +     memmap.gpa = *gpa;
> +     error = ioctl(ctx->fd, VM_MMAP_GETNEXT, &memmap);
> +     if (error == 0) {
> +             *gpa = memmap.gpa;
> +             *segid = memmap.segid;
> +             *segoff = memmap.segoff;
> +             *len = memmap.len;
> +             *prot = memmap.prot;
> +             *flags = memmap.flags;
> +     }
> +     return (error);
> +}
> +
> +/*
> + * Return 0 if the segments are identical and non-zero otherwise.
> + *
> + * This is slightly complicated by the fact that only device memory segments
> + * are named.
> + */
>  static int
> -setup_memory_segment(struct vmctx *ctx, vm_paddr_t gpa, size_t len, char 
> **addr)
> +cmpseg(size_t len, const char *str, size_t len2, const char *str2)
>  {
> -     int error, mmap_flags;
> -     struct vm_memory_segment seg;
> +
> +     if (len == len2) {
> +             if ((!str && !str2) || (str && str2 && !strcmp(str, str2)))
> +                     return (0);
> +     }
> +     return (-1);
> +}
> +
> +static int
> +vm_alloc_memseg(struct vmctx *ctx, int segid, size_t len, const char *name)
> +{
> +     struct vm_memseg memseg;
> +     size_t n;
> +     int error;
>  
>       /*
> -      * Create and optionally map 'len' bytes of memory at guest
> -      * physical address 'gpa'
> +      * If the memory segment has already been created then just return.
> +      * This is the usual case for the SYSMEM segment created by userspace
> +      * loaders like bhyveload(8).
>        */
> -     bzero(&seg, sizeof(seg));
> -     seg.gpa = gpa;
> -     seg.len = len;
> -     error = ioctl(ctx->fd, VM_MAP_MEMORY, &seg);
> -     if (error == 0 && addr != NULL) {
> -             mmap_flags = MAP_SHARED;
> -             if ((ctx->memflags & VM_MEM_F_INCORE) == 0)
> -                     mmap_flags |= MAP_NOCORE;
> -             *addr = mmap(NULL, len, PROT_READ | PROT_WRITE, mmap_flags,
> -                 ctx->fd, gpa);
> +     error = vm_get_memseg(ctx, segid, &memseg.len, memseg.name,
> +         sizeof(memseg.name));
> +     if (error)
> +             return (error);
> +
> +     if (memseg.len != 0) {
> +             if (cmpseg(len, name, memseg.len, VM_MEMSEG_NAME(&memseg))) {
> +                     errno = EINVAL;
> +                     return (-1);
> +             } else {
> +                     return (0);
> +             }
> +     }
> +
> +     bzero(&memseg, sizeof(struct vm_memseg));
> +     memseg.segid = segid;
> +     memseg.len = len;
> +     if (name != NULL) {
> +             n = strlcpy(memseg.name, name, sizeof(memseg.name));
> +             if (n >= sizeof(memseg.name)) {
> +                     errno = ENAMETOOLONG;
> +                     return (-1);
> +             }
> +     }
> +
> +     error = ioctl(ctx->fd, VM_ALLOC_MEMSEG, &memseg);
> +     return (error);
> +}
> +
> +int
> +vm_get_memseg(struct vmctx *ctx, int segid, size_t *lenp, char *namebuf,
> +    size_t bufsize)
> +{
> +     struct vm_memseg memseg;
> +     size_t n;
> +     int error;
> +
> +     memseg.segid = segid;
> +     error = ioctl(ctx->fd, VM_GET_MEMSEG, &memseg);
> +     if (error == 0) {
> +             *lenp = memseg.len;
> +             n = strlcpy(namebuf, memseg.name, bufsize);
> +             if (n >= bufsize) {
> +                     errno = ENAMETOOLONG;
> +                     error = -1;
> +             }
>       }
>       return (error);
>  }
>  
> +static int
> +setup_memory_segment(struct vmctx *ctx, vm_paddr_t gpa, size_t len, char 
> *base)
> +{
> +     char *ptr;
> +     int error, flags;
> +
> +     /* Map 'len' bytes starting at 'gpa' in the guest address space */
> +     error = vm_mmap_memseg(ctx, gpa, VM_SYSMEM, gpa, len, PROT_ALL);
> +     if (error)
> +             return (error);
> +
> +     flags = MAP_SHARED | MAP_FIXED;
> +     if ((ctx->memflags & VM_MEM_F_INCORE) == 0)
> +             flags |= MAP_NOCORE;
> +
> +     /* mmap into the process address space on the host */
> +     ptr = mmap(base + gpa, len, PROT_RW, flags, ctx->fd, gpa);
> +     if (ptr == MAP_FAILED)
> +             return (-1);
> +
> +     return (0);
> +}
> +
>  int
>  vm_setup_memory(struct vmctx *ctx, size_t memsize, enum vm_mmap_style vms)
>  {
> -     char **addr;
> -     int error;
> +     size_t objsize, len;
> +     vm_paddr_t gpa;
> +     char *baseaddr, *ptr;
> +     int error, flags;
>  
> -     /* XXX VM_MMAP_SPARSE not implemented yet */
> -     assert(vms == VM_MMAP_NONE || vms == VM_MMAP_ALL);
> -     ctx->vms = vms;
> +     assert(vms == VM_MMAP_ALL);
>  
>       /*
>        * If 'memsize' cannot fit entirely in the 'lowmem' segment then
> @@ -234,43 +371,69 @@ vm_setup_memory(struct vmctx *ctx, size_
>        */
>       if (memsize > ctx->lowmem_limit) {
>               ctx->lowmem = ctx->lowmem_limit;
> -             ctx->highmem = memsize - ctx->lowmem;
> +             ctx->highmem = memsize - ctx->lowmem_limit;
> +             objsize = 4*GB + ctx->highmem;
>       } else {
>               ctx->lowmem = memsize;
>               ctx->highmem = 0;
> +             objsize = ctx->lowmem;
>       }
>  
> -     if (ctx->lowmem > 0) {
> -             addr = (vms == VM_MMAP_ALL) ? &ctx->lowmem_addr : NULL;
> -             error = setup_memory_segment(ctx, 0, ctx->lowmem, addr);
> +     error = vm_alloc_memseg(ctx, VM_SYSMEM, objsize, NULL);
> +     if (error)
> +             return (error);
> +
> +     /*
> +      * Stake out a contiguous region covering the guest physical memory
> +      * and the adjoining guard regions.
> +      */
> +     len = VM_MMAP_GUARD_SIZE + objsize + VM_MMAP_GUARD_SIZE;
> +     flags = MAP_PRIVATE | MAP_ANON | MAP_NOCORE | MAP_ALIGNED_SUPER;
> +     ptr = mmap(NULL, len, PROT_NONE, flags, -1, 0);
> +     if (ptr == MAP_FAILED)
> +             return (-1);
> +
> +     baseaddr = ptr + VM_MMAP_GUARD_SIZE;
> +     if (ctx->highmem > 0) {
> +             gpa = 4*GB;
> +             len = ctx->highmem;
> +             error = setup_memory_segment(ctx, gpa, len, baseaddr);
>               if (error)
>                       return (error);
>       }
>  
> -     if (ctx->highmem > 0) {
> -             addr = (vms == VM_MMAP_ALL) ? &ctx->highmem_addr : NULL;
> -             error = setup_memory_segment(ctx, 4*GB, ctx->highmem, addr);
> +     if (ctx->lowmem > 0) {
> +             gpa = 0;
> +             len = ctx->lowmem;
> +             error = setup_memory_segment(ctx, gpa, len, baseaddr);
>               if (error)
>                       return (error);
>       }
>  
> +     ctx->baseaddr = baseaddr;
> +
>       return (0);
>  }
>  
> +/*
> + * Returns a non-NULL pointer if [gaddr, gaddr+len) is entirely contained in
> + * the lowmem or highmem regions.
> + *
> + * In particular return NULL if [gaddr, gaddr+len) falls in guest MMIO 
> region.
> + * The instruction emulation code depends on this behavior.
> + */
>  void *
>  vm_map_gpa(struct vmctx *ctx, vm_paddr_t gaddr, size_t len)
>  {
>  
> -     /* XXX VM_MMAP_SPARSE not implemented yet */
> -     assert(ctx->vms == VM_MMAP_ALL);
> -
> -     if (gaddr < ctx->lowmem && gaddr + len <= ctx->lowmem)
> -             return ((void *)(ctx->lowmem_addr + gaddr));
> +     if (ctx->lowmem > 0) {
> +             if (gaddr < ctx->lowmem && gaddr + len <= ctx->lowmem)
> +                     return (ctx->baseaddr + gaddr);
> +     }
>  
> -     if (gaddr >= 4*GB) {
> -             gaddr -= 4*GB;
> -             if (gaddr < ctx->highmem && gaddr + len <= ctx->highmem)
> -                     return ((void *)(ctx->highmem_addr + gaddr));
> +     if (ctx->highmem > 0) {
> +             if (gaddr >= 4*GB && gaddr + len <= 4*GB + ctx->highmem)
> +                     return (ctx->baseaddr + gaddr);
>       }
>  
>       return (NULL);
> @@ -290,6 +453,56 @@ vm_get_highmem_size(struct vmctx *ctx)
>       return (ctx->highmem);
>  }
>  
> +void *
> +vm_create_devmem(struct vmctx *ctx, int segid, const char *name, size_t len)
> +{
> +     char pathname[MAXPATHLEN];
> +     size_t len2;
> +     char *base, *ptr;
> +     int fd, error, flags;
> +
> +     fd = -1;
> +     ptr = MAP_FAILED;
> +     if (name == NULL || strlen(name) == 0) {
> +             errno = EINVAL;
> +             goto done;
> +     }
> +
> +     error = vm_alloc_memseg(ctx, segid, len, name);
> +     if (error)
> +             goto done;
> +
> +     strlcpy(pathname, "/dev/vmm.io/", sizeof(pathname));
> +     strlcat(pathname, ctx->name, sizeof(pathname));
> +     strlcat(pathname, ".", sizeof(pathname));
> +     strlcat(pathname, name, sizeof(pathname));
> +
> +     fd = open(pathname, O_RDWR);
> +     if (fd < 0)
> +             goto done;
> +
> +     /*
> +      * Stake out a contiguous region covering the device memory and the
> +      * adjoining guard regions.
> +      */
> +     len2 = VM_MMAP_GUARD_SIZE + len + VM_MMAP_GUARD_SIZE;
> +     flags = MAP_PRIVATE | MAP_ANON | MAP_NOCORE | MAP_ALIGNED_SUPER;
> +     base = mmap(NULL, len2, PROT_NONE, flags, -1, 0);
> +     if (base == MAP_FAILED)
> +             goto done;
> +
> +     flags = MAP_SHARED | MAP_FIXED;
> +     if ((ctx->memflags & VM_MEM_F_INCORE) == 0)
> +             flags |= MAP_NOCORE;
> +
> +     /* mmap the devmem region in the host address space */
> +     ptr = mmap(base + VM_MMAP_GUARD_SIZE, len, PROT_RW, flags, fd, 0);
> +done:
> +     if (fd >= 0)
> +             close(fd);
> +     return (ptr);
> +}
> +
>  int
>  vm_set_desc(struct vmctx *ctx, int vcpu, int reg,
>           uint64_t base, uint32_t limit, uint32_t access)
> 
> Modified: stable/10/lib/libvmmapi/vmmapi.h
> ==============================================================================
> --- stable/10/lib/libvmmapi/vmmapi.h  Mon Feb  1 14:28:58 2016        
> (r295123)
> +++ stable/10/lib/libvmmapi/vmmapi.h  Mon Feb  1 14:56:11 2016        
> (r295124)
> @@ -36,7 +36,7 @@
>   * API version for out-of-tree consumers like grub-bhyve for making compile
>   * time decisions.
>   */
> -#define      VMMAPI_VERSION  0101    /* 2 digit major followed by 2 digit 
> minor */
> +#define      VMMAPI_VERSION  0102    /* 2 digit major followed by 2 digit 
> minor */
>  
>  struct iovec;
>  struct vmctx;
> @@ -52,14 +52,59 @@ enum vm_mmap_style {
>       VM_MMAP_SPARSE,         /* mappings created on-demand */
>  };
>  
> +/*
> + * 'flags' value passed to 'vm_set_memflags()'.
> + */
>  #define      VM_MEM_F_INCORE 0x01    /* include guest memory in core file */
> +#define      VM_MEM_F_WIRED  0x02    /* guest memory is wired */
> +
> +/*
> + * Identifiers for memory segments:
> + * - vm_setup_memory() uses VM_SYSMEM for the system memory segment.
> + * - the remaining identifiers can be used to create devmem segments.
> + */
> +enum {
> +     VM_SYSMEM,
> +     VM_BOOTROM,
> +     VM_FRAMEBUFFER,
> +};
> +
> +/*
> + * Get the length and name of the memory segment identified by 'segid'.
> + * Note that system memory segments are identified with a nul name.
> + *
> + * Returns 0 on success and non-zero otherwise.
> + */
> +int  vm_get_memseg(struct vmctx *ctx, int ident, size_t *lenp, char *name,
> +         size_t namesiz);
> +
> +/*
> + * Iterate over the guest address space. This function finds an address range
> + * that starts at an address >= *gpa.
> + *
> + * Returns 0 if the next address range was found and non-zero otherwise.
> + */
> +int  vm_mmap_getnext(struct vmctx *ctx, vm_paddr_t *gpa, int *segid,
> +         vm_ooffset_t *segoff, size_t *len, int *prot, int *flags);
> +/*
> + * Create a device memory segment identified by 'segid'.
> + *
> + * Returns a pointer to the memory segment on success and MAP_FAILED 
> otherwise.
> + */
> +void *vm_create_devmem(struct vmctx *ctx, int segid, const char *name,
> +         size_t len);
> +
> +/*
> + * Map the memory segment identified by 'segid' into the guest address space
> + * at [gpa,gpa+len) with protection 'prot'.
> + */
> +int  vm_mmap_memseg(struct vmctx *ctx, vm_paddr_t gpa, int segid,
> +         vm_ooffset_t segoff, size_t len, int prot);
>  
>  int  vm_create(const char *name);
>  struct vmctx *vm_open(const char *name);
>  void vm_destroy(struct vmctx *ctx);
>  int  vm_parse_memsize(const char *optarg, size_t *memsize);
> -int  vm_get_memory_seg(struct vmctx *ctx, vm_paddr_t gpa, size_t *ret_len,
> -                       int *wired);
>  int  vm_setup_memory(struct vmctx *ctx, size_t len, enum vm_mmap_style s);
>  void *vm_map_gpa(struct vmctx *ctx, vm_paddr_t gaddr, size_t len);
>  int  vm_get_gpa_pmap(struct vmctx *, uint64_t gpa, uint64_t *pte, int *num);
> @@ -68,6 +113,7 @@ int        vm_gla2gpa(struct vmctx *, int vcpui
>  uint32_t vm_get_lowmem_limit(struct vmctx *ctx);
>  void vm_set_lowmem_limit(struct vmctx *ctx, uint32_t limit);
>  void vm_set_memflags(struct vmctx *ctx, int flags);
> +int  vm_get_memflags(struct vmctx *ctx);
>  size_t       vm_get_lowmem_size(struct vmctx *ctx);
>  size_t       vm_get_highmem_size(struct vmctx *ctx);
>  int  vm_set_desc(struct vmctx *ctx, int vcpu, int reg,
> 
> Modified: stable/10/share/examples/bhyve/vmrun.sh
> ==============================================================================
> --- stable/10/share/examples/bhyve/vmrun.sh   Mon Feb  1 14:28:58 2016        
> (r295123)
> +++ stable/10/share/examples/bhyve/vmrun.sh   Mon Feb  1 14:56:11 2016        
> (r295124)
> @@ -48,8 +48,8 @@ usage() {
>  
>       echo "Usage: vmrun.sh [-ahi] [-c <CPUs>] [-C <console>] [-d <disk 
> file>]"
>       echo "                [-e <name=value>] [-g <gdbport> ] [-H 
> <directory>]"
> -     echo "                [-I <location of installation iso>] [-m 
> <memsize>]"
> -     echo "                [-t <tapdev>] <vmname>"
> +     echo "                [-I <location of installation iso>] [-l <loader>]"
> +     echo "                [-m <memsize>] [-t <tapdev>] <vmname>"
>       echo ""
>       echo "       -h: display this help message"
>       echo "       -a: force memory mapped local APIC access"
> @@ -61,6 +61,7 @@ usage() {
>       echo "       -H: host filesystem to export to the loader"
>       echo "       -i: force boot of the Installation CDROM image"
>       echo "       -I: Installation CDROM image location (default is 
> ${DEFAULT_ISOFILE})"
> +     echo "       -l: the OS loader to use (default is /boot/userboot.so)"
>       echo "       -m: memory size (default is ${DEFAULT_MEMSIZE})"
>       echo "       -p: pass-through a host PCI device at bus/slot/func (e.g. 
> 10/0/0)"
>       echo "       -t: tap device for virtio-net (default is $DEFAULT_TAPDEV)"
> @@ -87,15 +88,15 @@ console=${DEFAULT_CONSOLE}
>  cpus=${DEFAULT_CPUS}
>  tap_total=0
>  disk_total=0
> -apic_opt=""
>  gdbport=0
>  loader_opt=""
> +bhyverun_opt="-H -A -P"
>  pass_total=0
>  
> -while getopts ac:C:d:e:g:hH:iI:m:p:t: c ; do
> +while getopts ac:C:d:e:g:hH:iI:l:m:p:t: c ; do
>       case $c in
>       a)
> -             apic_opt="-a"
> +             bhyverun_opt="${bhyverun_opt} -a"
>               ;;
>       c)
>               cpus=${OPTARG}
> @@ -125,6 +126,9 @@ while getopts ac:C:d:e:g:hH:iI:m:p:t: c 
>       I)
>               isofile=${OPTARG}
>               ;;
> +     l)
> +             loader_opt="${loader_opt} -l ${OPTARG}"
> +             ;;
>       m)
>               memsize=${OPTARG}
>               ;;
> @@ -163,6 +167,12 @@ if [ -n "${host_base}" ]; then
>       loader_opt="${loader_opt} -h ${host_base}"
>  fi
>  
> +# If PCI passthru devices are configured then guest memory must be wired
> +if [ ${pass_total} -gt 0 ]; then
> +     loader_opt="${loader_opt} -S"
> +     bhyverun_opt="${bhyverun_opt} -S"
> +fi
> +
>  make_and_check_diskdev()
>  {
>      local virtio_diskdev="$1"
> @@ -263,7 +273,7 @@ while [ 1 ]; do
>           i=$(($i + 1))
>          done
>  
> -     ${FBSDRUN} -c ${cpus} -m ${memsize} ${apic_opt} -A -H -P        \
> +     ${FBSDRUN} -c ${cpus} -m ${memsize} ${bhyverun_opt}             \
>               -g ${gdbport}                                           \
>               -s 0:0,hostbridge                                       \
>               -s 1:0,lpc                                              \
> 
> Modified: stable/10/sys/amd64/include/vmm.h
> ==============================================================================
> --- stable/10/sys/amd64/include/vmm.h Mon Feb  1 14:28:58 2016        
> (r295123)
> +++ stable/10/sys/amd64/include/vmm.h Mon Feb  1 14:56:11 2016        
> (r295124)
> @@ -108,7 +108,6 @@ enum x2apic_state {
>  
>  struct vm;
>  struct vm_exception;
> -struct vm_memory_segment;
>  struct seg_desc;
>  struct vm_exit;
>  struct vm_run;
> @@ -175,17 +174,33 @@ int vm_create(const char *name, struct v
>  void vm_destroy(struct vm *vm);
>  int vm_reinit(struct vm *vm);
>  const char *vm_name(struct vm *vm);
> -int vm_malloc(struct vm *vm, vm_paddr_t gpa, size_t len);
> +
> +/*
> + * APIs that modify the guest memory map require all vcpus to be frozen.
> + */
> +int vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t 
> off,
> +    size_t len, int prot, int flags);
> +int vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem);
> +void vm_free_memseg(struct vm *vm, int ident);
>  int vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa);
>  int vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len);
> -void *vm_gpa_hold(struct vm *, vm_paddr_t gpa, size_t len, int prot,
> -               void **cookie);
> +int vm_assign_pptdev(struct vm *vm, int bus, int slot, int func);
> +int vm_unassign_pptdev(struct vm *vm, int bus, int slot, int func);
> +
> +/*
> + * APIs that inspect the guest memory map require only a *single* vcpu to
> + * be frozen. This acts like a read lock on the guest memory map since any
> + * modification requires *all* vcpus to be frozen.
> + */
> +int vm_mmap_getnext(struct vm *vm, vm_paddr_t *gpa, int *segid,
> +    vm_ooffset_t *segoff, size_t *len, int *prot, int *flags);
> +int vm_get_memseg(struct vm *vm, int ident, size_t *len, bool *sysmem,
> +    struct vm_object **objptr);
> +void *vm_gpa_hold(struct vm *, int vcpuid, vm_paddr_t gpa, size_t len,
> +    int prot, void **cookie);
>  void vm_gpa_release(void *cookie);
> -int vm_gpabase2memseg(struct vm *vm, vm_paddr_t gpabase,
> -           struct vm_memory_segment *seg);
> -int vm_get_memobj(struct vm *vm, vm_paddr_t gpa, size_t len,
> -               vm_offset_t *offset, struct vm_object **object);
> -boolean_t vm_mem_allocated(struct vm *vm, vm_paddr_t gpa);
> +bool vm_mem_allocated(struct vm *vm, int vcpuid, vm_paddr_t gpa);
> +
>  int vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval);
>  int vm_set_register(struct vm *vm, int vcpu, int reg, uint64_t val);
>  int vm_get_seg_desc(struct vm *vm, int vcpu, int reg,
> @@ -302,8 +317,6 @@ vcpu_should_yield(struct vm *vm, int vcp
>  void *vcpu_stats(struct vm *vm, int vcpu);
>  void vcpu_notify_event(struct vm *vm, int vcpuid, bool lapic_intr);
>  struct vmspace *vm_get_vmspace(struct vm *vm);
> -int vm_assign_pptdev(struct vm *vm, int bus, int slot, int func);
> -int vm_unassign_pptdev(struct vm *vm, int bus, int slot, int func);
>  struct vatpic *vm_atpic(struct vm *vm);
>  struct vatpit *vm_atpit(struct vm *vm);
>  struct vpmtmr *vm_pmtmr(struct vm *vm);
> 
> Modified: stable/10/sys/amd64/include/vmm_dev.h
> ==============================================================================
> --- stable/10/sys/amd64/include/vmm_dev.h     Mon Feb  1 14:28:58 2016        
> (r295123)
> +++ stable/10/sys/amd64/include/vmm_dev.h     Mon Feb  1 14:56:11 2016        
> (r295124)
> @@ -34,10 +34,22 @@ void      vmmdev_init(void);
>  int  vmmdev_cleanup(void);
>  #endif
>  
> -struct vm_memory_segment {
> -     vm_paddr_t      gpa;    /* in */
> +struct vm_memmap {
> +     vm_paddr_t      gpa;
> +     int             segid;          /* memory segment */
> +     vm_ooffset_t    segoff;         /* offset into memory segment */
> +     size_t          len;            /* mmap length */
> +     int             prot;           /* RWX */
> +     int             flags;
> +};
> +#define      VM_MEMMAP_F_WIRED       0x01
> +#define      VM_MEMMAP_F_IOMMU       0x02
> +
> +#define      VM_MEMSEG_NAME(m)       ((m)->name[0] != '\0' ? (m)->name : 
> NULL)
> +struct vm_memseg {
> +     int             segid;
>       size_t          len;
> -     int             wired;
> +     char            name[SPECNAMELEN + 1];
>  };
>  
>  struct vm_register {
> @@ -214,10 +226,14 @@ enum {
>       IOCNUM_REINIT = 5,
>  
>       /* memory apis */
> -     IOCNUM_MAP_MEMORY = 10,
> -     IOCNUM_GET_MEMORY_SEG = 11,
> +     IOCNUM_MAP_MEMORY = 10,                 /* deprecated */
> +     IOCNUM_GET_MEMORY_SEG = 11,             /* deprecated */
>       IOCNUM_GET_GPA_PMAP = 12,
>       IOCNUM_GLA2GPA = 13,
> +     IOCNUM_ALLOC_MEMSEG = 14,
> +     IOCNUM_GET_MEMSEG = 15,
> +     IOCNUM_MMAP_MEMSEG = 16,
> +     IOCNUM_MMAP_GETNEXT = 17,
>  
>       /* register/state accessors */
>       IOCNUM_SET_REGISTER = 20,
> @@ -278,10 +294,14 @@ enum {
>       _IOW('v', IOCNUM_SUSPEND, struct vm_suspend)
>  #define      VM_REINIT       \
>       _IO('v', IOCNUM_REINIT)
> -#define      VM_MAP_MEMORY   \
> -     _IOWR('v', IOCNUM_MAP_MEMORY, struct vm_memory_segment)
> -#define      VM_GET_MEMORY_SEG \
> -     _IOWR('v', IOCNUM_GET_MEMORY_SEG, struct vm_memory_segment)
> +#define      VM_ALLOC_MEMSEG \
> +     _IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg)
> +#define      VM_GET_MEMSEG   \
> +     _IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg)
> +#define      VM_MMAP_MEMSEG  \
> +     _IOW('v', IOCNUM_MMAP_MEMSEG, struct vm_memmap)
> +#define      VM_MMAP_GETNEXT \
> +     _IOWR('v', IOCNUM_MMAP_GETNEXT, struct vm_memmap)
>  #define      VM_SET_REGISTER \
>       _IOW('v', IOCNUM_SET_REGISTER, struct vm_register)
>  #define      VM_GET_REGISTER \
> 
> Modified: stable/10/sys/amd64/vmm/amd/svm.c
> ==============================================================================
> --- stable/10/sys/amd64/vmm/amd/svm.c Mon Feb  1 14:28:58 2016        
> (r295123)
> +++ stable/10/sys/amd64/vmm/amd/svm.c Mon Feb  1 14:56:11 2016        
> (r295124)
> @@ -1477,7 +1477,7 @@ svm_vmexit(struct svm_softc *svm_sc, int
>                       VCPU_CTR2(svm_sc->vm, vcpu, "nested page fault with "
>                           "reserved bits set: info1(%#lx) info2(%#lx)",
>                           info1, info2);
> -             } else if (vm_mem_allocated(svm_sc->vm, info2)) {
> +             } else if (vm_mem_allocated(svm_sc->vm, vcpu, info2)) {
>                       vmexit->exitcode = VM_EXITCODE_PAGING;
>                       vmexit->u.paging.gpa = info2;
>                       vmexit->u.paging.fault_type = npf_fault_type(info1);
> 
> Modified: stable/10/sys/amd64/vmm/intel/vmx.c
> ==============================================================================
> --- stable/10/sys/amd64/vmm/intel/vmx.c       Mon Feb  1 14:28:58 2016        
> (r295123)
> +++ stable/10/sys/amd64/vmm/intel/vmx.c       Mon Feb  1 14:56:11 2016        
> (r295124)
> @@ -2426,7 +2426,7 @@ vmx_exit_process(struct vmx *vmx, int vc
>                * this must be an instruction that accesses MMIO space.
>                */
>               gpa = vmcs_gpa();
> -             if (vm_mem_allocated(vmx->vm, gpa) ||
> +             if (vm_mem_allocated(vmx->vm, vcpu, gpa) ||
>                   apic_access_fault(vmx, vcpu, gpa)) {
>                       vmexit->exitcode = VM_EXITCODE_PAGING;
>                       vmexit->inst_length = 0;
> 
> Modified: stable/10/sys/amd64/vmm/io/ppt.c
> ==============================================================================
> --- stable/10/sys/amd64/vmm/io/ppt.c  Mon Feb  1 14:28:58 2016        
> (r295123)
> +++ stable/10/sys/amd64/vmm/io/ppt.c  Mon Feb  1 14:56:11 2016        
> (r295124)
> @@ -76,11 +76,17 @@ struct pptintr_arg {                              /* 
> pptintr(pptin
>       uint64_t        msg_data;
>  };
>  
> +struct pptseg {
> +     vm_paddr_t      gpa;
> +     size_t          len;
> +     int             wired;
> +};
> +
>  struct pptdev {
>       device_t        dev;
>       struct vm       *vm;                    /* owner of this device */
>       TAILQ_ENTRY(pptdev)     next;
> -     struct vm_memory_segment mmio[MAX_MMIOSEGS];
> +     struct pptseg mmio[MAX_MMIOSEGS];
>       struct {
>               int     num_msgs;               /* guest state */
>  
> @@ -207,14 +213,14 @@ static void
>  ppt_unmap_mmio(struct vm *vm, struct pptdev *ppt)
>  {
>       int i;
> -     struct vm_memory_segment *seg;
> +     struct pptseg *seg;
>  
>       for (i = 0; i < MAX_MMIOSEGS; i++) {
>               seg = &ppt->mmio[i];
>               if (seg->len == 0)
>                       continue;
>               (void)vm_unmap_mmio(vm, seg->gpa, seg->len);
> -             bzero(seg, sizeof(struct vm_memory_segment));
> +             bzero(seg, sizeof(struct pptseg));
>       }
>  }
>  
> @@ -324,7 +330,7 @@ ppt_is_mmio(struct vm *vm, vm_paddr_t gp
>  {
>       int i;
>       struct pptdev *ppt;
> -     struct vm_memory_segment *seg;
> +     struct pptseg *seg;
>  
>       TAILQ_FOREACH(ppt, &pptdev_list, next) {
>               if (ppt->vm != vm)
> @@ -410,7 +416,7 @@ ppt_map_mmio(struct vm *vm, int bus, int
>            vm_paddr_t gpa, size_t len, vm_paddr_t hpa)
>  {
>       int i, error;
> -     struct vm_memory_segment *seg;
> +     struct pptseg *seg;
>       struct pptdev *ppt;
>  
>       ppt = ppt_find(bus, slot, func);
> 
> Modified: stable/10/sys/amd64/vmm/vmm.c
> ==============================================================================
> --- stable/10/sys/amd64/vmm/vmm.c     Mon Feb  1 14:28:58 2016        
> (r295123)
> +++ stable/10/sys/amd64/vmm/vmm.c     Mon Feb  1 14:56:11 2016        
> (r295124)
> @@ -120,12 +120,21 @@ struct vcpu {
>  #define      vcpu_assert_locked(v)   mtx_assert(&((v)->mtx), MA_OWNED)
>  
>  struct mem_seg {
> +     size_t  len;
> +     bool    sysmem;
> +     struct vm_object *object;
> +};
> +#define      VM_MAX_MEMSEGS  2
> +
> +struct mem_map {
>       vm_paddr_t      gpa;
>       size_t          len;
> -     boolean_t       wired;
> -     vm_object_t     object;
> +     vm_ooffset_t    segoff;
> +     int             segid;
> +     int             prot;
> +     int             flags;
>  };
> -#define      VM_MAX_MEMORY_SEGMENTS  2
> +#define      VM_MAX_MEMMAPS  4
>  
>  /*
>   * Initialization:
> @@ -151,8 +160,8 @@ struct vm {
>       void            *rendezvous_arg;        /* (x) rendezvous func/arg */
>       vm_rendezvous_func_t rendezvous_func;
>       struct mtx      rendezvous_mtx;         /* (o) rendezvous lock */
> -     int             num_mem_segs;           /* (o) guest memory segments */
> -     struct mem_seg  mem_segs[VM_MAX_MEMORY_SEGMENTS];
> +     struct mem_map  mem_maps[VM_MAX_MEMMAPS]; /* (i) guest address space */
> +     struct mem_seg  mem_segs[VM_MAX_MEMSEGS]; /* (o) guest memory regions */
>       struct vmspace  *vmspace;               /* (o) guest's address space */
>       char            name[VM_MAX_NAMELEN];   /* (o) virtual machine name */
>       struct vcpu     vcpu[VM_MAXCPU];        /* (i) guest vcpus */
> @@ -224,6 +233,8 @@ TUNABLE_INT("hw.vmm.force_iommu", &vmm_f
>  SYSCTL_INT(_hw_vmm, OID_AUTO, force_iommu, CTLFLAG_RDTUN, &vmm_force_iommu, 
> 0,
>      "Force use of I/O MMU even if no passthrough devices were found.");
>  
> +static void vm_free_memmap(struct vm *vm, int ident);
> +static bool sysmem_mapping(struct vm *vm, struct mem_map *mm);
>  static void vcpu_notify_event_locked(struct vcpu *vcpu, bool lapic_intr);
>  
>  #ifdef KTR
> @@ -444,7 +455,6 @@ vm_create(const char *name, struct vm **
>  
>       vm = malloc(sizeof(struct vm), M_VM, M_WAITOK | M_ZERO);
>       strcpy(vm->name, name);
> -     vm->num_mem_segs = 0;
>       vm->vmspace = vmspace;
>       mtx_init(&vm->rendezvous_mtx, "vm rendezvous lock", 0, MTX_DEF);
>  
> @@ -455,18 +465,9 @@ vm_create(const char *name, struct vm **
>  }
>  
>  static void
> -vm_free_mem_seg(struct vm *vm, struct mem_seg *seg)
> -{
> -
> -     if (seg->object != NULL)
> -             vmm_mem_free(vm->vmspace, seg->gpa, seg->len);
> -
> -     bzero(seg, sizeof(*seg));
> -}
> -
> -static void
>  vm_cleanup(struct vm *vm, bool destroy)
>  {
> +     struct mem_map *mm;
>       int i;
>  
>       ppt_unassign_all(vm);
> @@ -489,11 +490,23 @@ vm_cleanup(struct vm *vm, bool destroy)
>  
>       VMCLEANUP(vm->cookie);
>  
> -     if (destroy) {
> -             for (i = 0; i < vm->num_mem_segs; i++)
> -                     vm_free_mem_seg(vm, &vm->mem_segs[i]);
> +     /*
> +      * System memory is removed from the guest address space only when
> +      * the VM is destroyed. This is because the mapping remains the same
> +      * across VM reset.
> +      *
> +      * Device memory can be relocated by the guest (e.g. using PCI BARs)
> +      * so those mappings are removed on a VM reset.
> +      */
> +     for (i = 0; i < VM_MAX_MEMMAPS; i++) {
> +             mm = &vm->mem_maps[i];
> +             if (destroy || !sysmem_mapping(vm, mm))
> +                     vm_free_memmap(vm, i);
> +     }
>  
> -             vm->num_mem_segs = 0;
> +     if (destroy) {
> +             for (i = 0; i < VM_MAX_MEMSEGS; i++)
> +                     vm_free_memseg(vm, i);
>  
>               VMSPACE_FREE(vm->vmspace);
>               vm->vmspace = NULL;
> @@ -551,146 +564,243 @@ vm_unmap_mmio(struct vm *vm, vm_paddr_t 
>       return (0);
>  }
>  
> -boolean_t
> -vm_mem_allocated(struct vm *vm, vm_paddr_t gpa)
> +/*
> + * Return 'true' if 'gpa' is allocated in the guest address space.
> + *
> + * This function is called in the context of a running vcpu which acts as
> + * an implicit lock on 'vm->mem_maps[]'.
> + */
> +bool
> +vm_mem_allocated(struct vm *vm, int vcpuid, vm_paddr_t gpa)
>  {
> +     struct mem_map *mm;
>       int i;
> -     vm_paddr_t gpabase, gpalimit;
>  
> -     for (i = 0; i < vm->num_mem_segs; i++) {
> -             gpabase = vm->mem_segs[i].gpa;
> -             gpalimit = gpabase + vm->mem_segs[i].len;
> -             if (gpa >= gpabase && gpa < gpalimit)
> -                     return (TRUE);          /* 'gpa' is regular memory */
> +#ifdef INVARIANTS
> +     int hostcpu, state;
> +     state = vcpu_get_state(vm, vcpuid, &hostcpu);
> +     KASSERT(state == VCPU_RUNNING && hostcpu == curcpu,
> +         ("%s: invalid vcpu state %d/%d", __func__, state, hostcpu));
> +#endif
> +
> +     for (i = 0; i < VM_MAX_MEMMAPS; i++) {
> +             mm = &vm->mem_maps[i];
> +             if (mm->len != 0 && gpa >= mm->gpa && gpa < mm->gpa + mm->len)
> +                     return (true);          /* 'gpa' is sysmem or devmem */
>       }
>  
>       if (ppt_is_mmio(vm, gpa))
> -             return (TRUE);                  /* 'gpa' is pci passthru mmio */
> +             return (true);                  /* 'gpa' is pci passthru mmio */
>  
> -     return (FALSE);
> +     return (false);
>  }
>  
>  int
> -vm_malloc(struct vm *vm, vm_paddr_t gpa, size_t len)
> +vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem)
>  {
> -     int available, allocated;
>       struct mem_seg *seg;
> -     vm_object_t object;
> -     vm_paddr_t g;
> +     vm_object_t obj;
>  
> -     if ((gpa & PAGE_MASK) || (len & PAGE_MASK) || len == 0)
> +     if (ident < 0 || ident >= VM_MAX_MEMSEGS)
>               return (EINVAL);
> -     
> -     available = allocated = 0;
> -     g = gpa;
> -     while (g < gpa + len) {
> -             if (vm_mem_allocated(vm, g))
> -                     allocated++;
> -             else
> -                     available++;
>  
> -             g += PAGE_SIZE;
> -     }
> -
> -     /*
> -      * If there are some allocated and some available pages in the address
> -      * range then it is an error.
> -      */
> -     if (allocated && available)
> +     if (len == 0 || (len & PAGE_MASK))
>               return (EINVAL);
>  
> -     /*
> -      * If the entire address range being requested has already been
> -      * allocated then there isn't anything more to do.
> -      */
> -     if (allocated && available == 0)
> -             return (0);
> -
> -     if (vm->num_mem_segs >= VM_MAX_MEMORY_SEGMENTS)
> -             return (E2BIG);
> -
> -     seg = &vm->mem_segs[vm->num_mem_segs];
> +     seg = &vm->mem_segs[ident];
> +     if (seg->object != NULL) {
> +             if (seg->len == len && seg->sysmem == sysmem)
> +                     return (EEXIST);
> +             else
> +                     return (EINVAL);
> +     }
>  
> -     if ((object = vmm_mem_alloc(vm->vmspace, gpa, len)) == NULL)
> +     obj = vm_object_allocate(OBJT_DEFAULT, len >> PAGE_SHIFT);
> +     if (obj == NULL)
>               return (ENOMEM);
>  
> -     seg->gpa = gpa;
>       seg->len = len;
> -     seg->object = object;
> -     seg->wired = FALSE;
> +     seg->object = obj;
> +     seg->sysmem = sysmem;
> +     return (0);
> +}
>  
> -     vm->num_mem_segs++;
> +int
> +vm_get_memseg(struct vm *vm, int ident, size_t *len, bool *sysmem,
> +    vm_object_t *objptr)
> +{
> +     struct mem_seg *seg;
> +
> +     if (ident < 0 || ident >= VM_MAX_MEMSEGS)
> +             return (EINVAL);
>  
> +     seg = &vm->mem_segs[ident];
> +     if (len)
> +             *len = seg->len;
> +     if (sysmem)
> +             *sysmem = seg->sysmem;
> +     if (objptr)
> +             *objptr = seg->object;
>       return (0);
>  }
>  
> -static vm_paddr_t
> -vm_maxmem(struct vm *vm)
> +void
> +vm_free_memseg(struct vm *vm, int ident)
>  {
> -     int i;
> -     vm_paddr_t gpa, maxmem;
> +     struct mem_seg *seg;
>  
> -     maxmem = 0;
> 
> *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
> _______________________________________________
> svn-src-...@freebsd.org mailing list
> https://lists.freebsd.org/mailman/listinfo/svn-src-all
> To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"


-- 
Homepage:  www.yamagi.org
XMPP:      yam...@yamagi.org
GnuPG/GPG: 0xEFBCCBCB
_______________________________________________
svn-src-stable-10@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-stable-10
To unsubscribe, send any mail to "svn-src-stable-10-unsubscr...@freebsd.org"

Reply via email to