From: Jagannathan Raman <jag.ra...@oracle.com> Add support for per-region info fds. Unlike kernel vfio, vfio-user can have a separate fd to support mmap() of individual regions; add ->use_regfds as needed to support this difference.
Originally-by: John Johnson <john.g.john...@oracle.com> Signed-off-by: Elena Ufimtseva <elena.ufimts...@oracle.com> Signed-off-by: Jagannathan Raman <jag.ra...@oracle.com> Signed-off-by: John Levon <john.le...@nutanix.com> --- hw/vfio-user/common.c | 68 +++++++++++++++++++++++++++++++++++ hw/vfio-user/common.h | 2 ++ hw/vfio-user/pci.c | 2 ++ hw/vfio-user/protocol.h | 14 ++++++++ hw/vfio-user/trace-events | 1 + hw/vfio/ap.c | 2 ++ hw/vfio/ccw.c | 2 ++ hw/vfio/container.c | 7 ++++ hw/vfio/helpers.c | 28 +++++++++++++-- hw/vfio/pci.c | 2 ++ hw/vfio/platform.c | 2 ++ include/hw/vfio/vfio-common.h | 5 ++- 12 files changed, 131 insertions(+), 4 deletions(-) diff --git a/hw/vfio-user/common.c b/hw/vfio-user/common.c index 55d1da4e51..7b96d715cb 100644 --- a/hw/vfio-user/common.c +++ b/hw/vfio-user/common.c @@ -1106,3 +1106,71 @@ int vfio_user_get_info(VFIOUserProxy *proxy, struct vfio_device_info *info) return 0; } + +static int vfio_user_get_region_info(VFIOUserProxy *proxy, + struct vfio_region_info *info, + VFIOUserFDs *fds) +{ + g_autofree VFIOUserRegionInfo *msgp = NULL; + uint32_t size; + + /* data returned can be larger than vfio_region_info */ + if (info->argsz < sizeof(*info)) { + error_printf("vfio_user_get_region_info argsz too small\n"); + return -E2BIG; + } + if (fds != NULL && fds->send_fds != 0) { + error_printf("vfio_user_get_region_info can't send FDs\n"); + return -EINVAL; + } + + size = info->argsz + sizeof(VFIOUserHdr); + msgp = g_malloc0(size); + + vfio_user_request_msg(&msgp->hdr, VFIO_USER_DEVICE_GET_REGION_INFO, + sizeof(*msgp), 0); + msgp->argsz = info->argsz; + msgp->index = info->index; + + vfio_user_send_wait(proxy, &msgp->hdr, fds, size); + if (msgp->hdr.flags & VFIO_USER_ERROR) { + return -msgp->hdr.error_reply; + } + trace_vfio_user_get_region_info(msgp->index, msgp->flags, msgp->size); + + memcpy(info, &msgp->argsz, info->argsz); + return 0; +} + + +/* + * Socket-based io_ops + */ + +static int vfio_user_io_get_region_info(VFIODevice *vbasedev, + struct vfio_region_info *info, + int *fd) +{ + int ret; + VFIOUserFDs fds = { 0, 1, fd}; + + ret = vfio_user_get_region_info(vbasedev->proxy, info, &fds); + if (ret) { + return ret; + } + + if (info->index > vbasedev->num_regions) { + return -EINVAL; + } + /* cap_offset in valid area */ + if ((info->flags & VFIO_REGION_INFO_FLAG_CAPS) && + (info->cap_offset < sizeof(*info) || info->cap_offset > info->argsz)) { + return -EINVAL; + } + + return 0; +} + +VFIODeviceIO vfio_dev_io_sock = { + .get_region_info = vfio_user_io_get_region_info, +}; diff --git a/hw/vfio-user/common.h b/hw/vfio-user/common.h index 11a80d4e7c..30a3125ea3 100644 --- a/hw/vfio-user/common.h +++ b/hw/vfio-user/common.h @@ -95,4 +95,6 @@ void vfio_user_set_handler(VFIODevice *vbasedev, bool vfio_user_validate_version(VFIOUserProxy *proxy, Error **errp); int vfio_user_get_info(VFIOUserProxy *proxy, struct vfio_device_info *info); +extern VFIODeviceIO vfio_dev_io_sock; + #endif /* VFIO_USER_COMMON_H */ diff --git a/hw/vfio-user/pci.c b/hw/vfio-user/pci.c index 69806eb863..fdff6c99e6 100644 --- a/hw/vfio-user/pci.c +++ b/hw/vfio-user/pci.c @@ -108,6 +108,8 @@ static void vfio_user_pci_realize(PCIDevice *pdev, Error **errp) vbasedev->ops = &vfio_user_pci_ops; vbasedev->type = VFIO_DEVICE_TYPE_PCI; vbasedev->dev = DEVICE(vdev); + vbasedev->io = &vfio_dev_io_sock; + vbasedev->use_regfds = true; /* * vfio-user devices are effectively mdevs (don't use a host iommu). diff --git a/hw/vfio-user/protocol.h b/hw/vfio-user/protocol.h index 5f9ef1768f..6f70a48905 100644 --- a/hw/vfio-user/protocol.h +++ b/hw/vfio-user/protocol.h @@ -125,4 +125,18 @@ typedef struct { uint32_t num_irqs; } VFIOUserDeviceInfo; +/* + * VFIO_USER_DEVICE_GET_REGION_INFO + * imported from struct vfio_region_info + */ +typedef struct { + VFIOUserHdr hdr; + uint32_t argsz; + uint32_t flags; + uint32_t index; + uint32_t cap_offset; + uint64_t size; + uint64_t offset; +} VFIOUserRegionInfo; + #endif /* VFIO_USER_PROTOCOL_H */ diff --git a/hw/vfio-user/trace-events b/hw/vfio-user/trace-events index 6b06a3ed82..1860430301 100644 --- a/hw/vfio-user/trace-events +++ b/hw/vfio-user/trace-events @@ -7,3 +7,4 @@ vfio_user_recv_request(uint16_t cmd) " command 0x%x" vfio_user_send_write(uint16_t id, int wrote) " id 0x%x wrote 0x%x" vfio_user_version(uint16_t major, uint16_t minor, const char *caps) " major %d minor %d caps: %s" vfio_user_get_info(uint32_t nregions, uint32_t nirqs) " #regions %d #irqs %d" +vfio_user_get_region_info(uint32_t index, uint32_t flags, uint64_t size) " index %d flags 0x%x size 0x%"PRIx64 diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c index 1adce1ab40..54b1815f1d 100644 --- a/hw/vfio/ap.c +++ b/hw/vfio/ap.c @@ -162,6 +162,8 @@ static void vfio_ap_realize(DeviceState *dev, Error **errp) return; } + vbasedev->use_regfds = false; + if (!vfio_attach_device(vbasedev->name, vbasedev, &address_space_memory, errp)) { goto error; diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c index 8c16648819..085a3fc6e6 100644 --- a/hw/vfio/ccw.c +++ b/hw/vfio/ccw.c @@ -586,6 +586,8 @@ static void vfio_ccw_realize(DeviceState *dev, Error **errp) goto out_unrealize; } + vbasedev->use_regfds = false; + if (!vfio_attach_device(cdev->mdevid, vbasedev, &address_space_memory, errp)) { goto out_attach_dev_err; diff --git a/hw/vfio/container.c b/hw/vfio/container.c index 0e1af34ce4..ddb86edb65 100644 --- a/hw/vfio/container.c +++ b/hw/vfio/container.c @@ -890,10 +890,17 @@ void vfio_put_base_device(VFIODevice *vbasedev) int i; for (i = 0; i < vbasedev->num_regions; i++) { + if (vbasedev->regfds != NULL && vbasedev->regfds[i] != -1) { + close(vbasedev->regfds[i]); + } g_free(vbasedev->regions[i]); } g_free(vbasedev->regions); vbasedev->regions = NULL; + if (vbasedev->regfds != NULL) { + g_free(vbasedev->regfds); + vbasedev->regfds = NULL; + } } if (!vbasedev->group) { diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c index bef1540295..4ed393420e 100644 --- a/hw/vfio/helpers.c +++ b/hw/vfio/helpers.c @@ -365,6 +365,12 @@ int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion *region, region->size = info->size; region->fd_offset = info->offset; region->nr = index; + if (vbasedev->regfds != NULL) { + region->fd = vbasedev->regfds[index]; + } else { + region->fd = vbasedev->fd; + } + if (region->size) { region->mem = g_new0(MemoryRegion, 1); @@ -443,7 +449,7 @@ int vfio_region_mmap(VFIORegion *region) region->mmaps[i].mmap = mmap(map_align, region->mmaps[i].size, prot, MAP_SHARED | MAP_FIXED, - region->vbasedev->fd, + region->fd, region->fd_offset + region->mmaps[i].offset); if (region->mmaps[i].mmap == MAP_FAILED) { @@ -568,12 +574,16 @@ int vfio_get_region_info(VFIODevice *vbasedev, int index, struct vfio_region_info **info) { size_t argsz = sizeof(struct vfio_region_info); + int fd = -1; int ret; /* create region cache */ if (vbasedev->regions == NULL) { vbasedev->regions = g_new0(struct vfio_region_info *, vbasedev->num_regions); + if (vbasedev->use_regfds) { + vbasedev->regfds = g_new0(int, vbasedev->num_regions); + } } /* check cache */ if (vbasedev->regions[index] != NULL) { @@ -587,22 +597,33 @@ int vfio_get_region_info(VFIODevice *vbasedev, int index, retry: (*info)->argsz = argsz; - ret = vbasedev->io->get_region_info(vbasedev, *info); + ret = vbasedev->io->get_region_info(vbasedev, *info, &fd); if (ret != 0) { g_free(*info); *info = NULL; + if (vbasedev->regfds != NULL) { + vbasedev->regfds[index] = -1; + } + return ret; } if ((*info)->argsz > argsz) { argsz = (*info)->argsz; *info = g_realloc(*info, argsz); + if (fd != -1) { + close(fd); + fd = -1; + } goto retry; } /* fill cache */ vbasedev->regions[index] = *info; + if (vbasedev->regfds != NULL) { + vbasedev->regfds[index] = fd; + } return 0; } @@ -775,10 +796,11 @@ static int vfio_io_device_feature(VFIODevice *vbasedev, } static int vfio_io_get_region_info(VFIODevice *vbasedev, - struct vfio_region_info *info) + struct vfio_region_info *info, int *fd) { int ret; + *fd = -1; ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, info); return ret < 0 ? -errno : ret; diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index a7084a7690..4ab3bfa2fa 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -3175,6 +3175,8 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) name = g_strdup(vbasedev->name); } + vbasedev->use_regfds = false; + if (!vfio_attach_device(name, vbasedev, pci_device_iommu_address_space(pdev), errp)) { goto error; diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c index 51534fd941..a76cc6a194 100644 --- a/hw/vfio/platform.c +++ b/hw/vfio/platform.c @@ -575,6 +575,8 @@ static void vfio_platform_realize(DeviceState *dev, Error **errp) VFIODevice *vbasedev = &vdev->vbasedev; int i; + vbasedev->use_regfds = false; + qemu_mutex_init(&vdev->intp_mutex); trace_vfio_platform_realize(vbasedev->sysfsdev ? diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h index 699e8a9376..ab7fa7135a 100644 --- a/include/hw/vfio/vfio-common.h +++ b/include/hw/vfio/vfio-common.h @@ -59,6 +59,7 @@ typedef struct VFIORegion { uint32_t nr_mmaps; VFIOMmap *mmaps; uint8_t nr; /* cache the region number for debug */ + int fd; /* fd to mmap() region */ } VFIORegion; typedef struct VFIOMigration { @@ -139,6 +140,7 @@ typedef struct VFIODevice { bool ram_block_discard_allowed; OnOffAuto enable_migration; bool migration_events; + bool use_regfds; VFIODeviceOps *ops; VFIODeviceIO *io; unsigned int num_irqs; @@ -158,6 +160,7 @@ typedef struct VFIODevice { QLIST_ENTRY(VFIODevice) hwpt_next; VFIOUserProxy *proxy; struct vfio_region_info **regions; + int *regfds; } VFIODevice; struct VFIODeviceOps { @@ -202,7 +205,7 @@ struct VFIODeviceOps { struct VFIODeviceIO { int (*device_feature)(VFIODevice *vdev, struct vfio_device_feature *); int (*get_region_info)(VFIODevice *vdev, - struct vfio_region_info *info); + struct vfio_region_info *info, int *fd); int (*get_irq_info)(VFIODevice *vdev, struct vfio_irq_info *irq); int (*set_irqs)(VFIODevice *vdev, struct vfio_irq_set *irqs); int (*region_read)(VFIODevice *vdev, uint8_t nr, off_t off, uint32_t size, -- 2.34.1