For vfio-user, device operations such as IRQ handling and region read/writes are implemented in userspace over the control socket, not ioctl() to the vfio kernel driver; add an ops vector to generalize this, and implement vfio_device_io_ops_ioctl for interacting with the kernel vfio driver.
Originally-by: John Johnson <john.g.john...@oracle.com> Signed-off-by: Elena Ufimtseva <elena.ufimts...@oracle.com> Signed-off-by: Jagannathan Raman <jag.ra...@oracle.com> Signed-off-by: John Levon <john.le...@nutanix.com> --- hw/vfio/container-base.c | 6 +-- hw/vfio/device.c | 77 ++++++++++++++++++++++++++++++----- hw/vfio/listener.c | 13 +++--- hw/vfio/pci.c | 10 ++--- include/hw/vfio/vfio-device.h | 38 +++++++++++++++++ 5 files changed, 117 insertions(+), 27 deletions(-) diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c index 3ff473a45c..1c6ca94b60 100644 --- a/hw/vfio/container-base.c +++ b/hw/vfio/container-base.c @@ -198,11 +198,7 @@ static int vfio_device_dma_logging_report(VFIODevice *vbasedev, hwaddr iova, feature->flags = VFIO_DEVICE_FEATURE_GET | VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT; - if (ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature)) { - return -errno; - } - - return 0; + return vbasedev->io_ops->device_feature(vbasedev, feature); } static int vfio_container_iommu_query_dirty_bitmap(const VFIOContainerBase *bcontainer, diff --git a/hw/vfio/device.c b/hw/vfio/device.c index 5d837092cb..468fb50eac 100644 --- a/hw/vfio/device.c +++ b/hw/vfio/device.c @@ -82,7 +82,7 @@ void vfio_device_irq_disable(VFIODevice *vbasedev, int index) .count = 0, }; - ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set); + vbasedev->io_ops->set_irqs(vbasedev, &irq_set); } void vfio_device_irq_unmask(VFIODevice *vbasedev, int index) @@ -95,7 +95,7 @@ void vfio_device_irq_unmask(VFIODevice *vbasedev, int index) .count = 1, }; - ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set); + vbasedev->io_ops->set_irqs(vbasedev, &irq_set); } void vfio_device_irq_mask(VFIODevice *vbasedev, int index) @@ -108,7 +108,7 @@ void vfio_device_irq_mask(VFIODevice *vbasedev, int index) .count = 1, }; - ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set); + vbasedev->io_ops->set_irqs(vbasedev, &irq_set); } static inline const char *action_to_str(int action) @@ -155,6 +155,7 @@ bool vfio_device_irq_set_signaling(VFIODevice *vbasedev, int index, int subindex int argsz; const char *name; int32_t *pfd; + int ret; argsz = sizeof(*irq_set) + sizeof(*pfd); @@ -167,7 +168,9 @@ bool vfio_device_irq_set_signaling(VFIODevice *vbasedev, int index, int subindex pfd = (int32_t *)&irq_set->data; *pfd = fd; - if (!ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, irq_set)) { + ret = vbasedev->io_ops->set_irqs(vbasedev, irq_set); + + if (!ret) { return true; } @@ -188,22 +191,19 @@ bool vfio_device_irq_set_signaling(VFIODevice *vbasedev, int index, int subindex int vfio_device_get_irq_info(VFIODevice *vbasedev, int index, struct vfio_irq_info *info) { - int ret; - memset(info, 0, sizeof(*info)); info->argsz = sizeof(*info); info->index = index; - ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_IRQ_INFO, info); - - return ret < 0 ? -errno : ret; + return vbasedev->io_ops->get_irq_info(vbasedev, info); } int vfio_device_get_region_info(VFIODevice *vbasedev, int index, struct vfio_region_info **info) { size_t argsz = sizeof(struct vfio_region_info); + int ret; *info = g_malloc0(argsz); @@ -211,10 +211,11 @@ int vfio_device_get_region_info(VFIODevice *vbasedev, int index, retry: (*info)->argsz = argsz; - if (ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, *info)) { + ret = vbasedev->io_ops->get_region_info(vbasedev, *info); + if (ret != 0) { g_free(*info); *info = NULL; - return -errno; + return ret; } if ((*info)->argsz > argsz) { @@ -320,11 +321,14 @@ void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp) vbasedev->fd = fd; } +static VFIODeviceIOOps vfio_device_io_ops_ioctl; + void vfio_device_init(VFIODevice *vbasedev, int type, VFIODeviceOps *ops, DeviceState *dev, bool ram_discard) { vbasedev->type = type; vbasedev->ops = ops; + vbasedev->io_ops = &vfio_device_io_ops_ioctl; vbasedev->dev = dev; vbasedev->fd = -1; @@ -442,3 +446,54 @@ void vfio_device_unprepare(VFIODevice *vbasedev) QLIST_REMOVE(vbasedev, global_next); vbasedev->bcontainer = NULL; } + +/* + * Traditional ioctl() based io + */ + +static int vfio_device_io_device_feature(VFIODevice *vbasedev, + struct vfio_device_feature *feature) +{ + int ret; + + ret = ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature); + + return ret < 0 ? -errno : ret; +} + +static int vfio_device_io_get_region_info(VFIODevice *vbasedev, + struct vfio_region_info *info) +{ + int ret; + + ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, info); + + return ret < 0 ? -errno : ret; +} + +static int vfio_device_io_get_irq_info(VFIODevice *vbasedev, + struct vfio_irq_info *info) +{ + int ret; + + ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_IRQ_INFO, info); + + return ret < 0 ? -errno : ret; +} + +static int vfio_device_io_set_irqs(VFIODevice *vbasedev, + struct vfio_irq_set *irqs) +{ + int ret; + + ret = ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, irqs); + + return ret < 0 ? -errno : ret; +} + +static VFIODeviceIOOps vfio_device_io_ops_ioctl = { + .device_feature = vfio_device_io_device_feature, + .get_region_info = vfio_device_io_get_region_info, + .get_irq_info = vfio_device_io_get_irq_info, + .set_irqs = vfio_device_io_set_irqs, +}; diff --git a/hw/vfio/listener.c b/hw/vfio/listener.c index e7ade7d62e..2b93ca55b6 100644 --- a/hw/vfio/listener.c +++ b/hw/vfio/listener.c @@ -794,13 +794,17 @@ static void vfio_devices_dma_logging_stop(VFIOContainerBase *bcontainer) VFIO_DEVICE_FEATURE_DMA_LOGGING_STOP; QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) { + int ret; + if (!vbasedev->dirty_tracking) { continue; } - if (ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature)) { + ret = vbasedev->io_ops->device_feature(vbasedev, feature); + + if (ret != 0) { warn_report("%s: Failed to stop DMA logging, err %d (%s)", - vbasedev->name, -errno, strerror(errno)); + vbasedev->name, -ret, strerror(-ret)); } vbasedev->dirty_tracking = false; } @@ -901,10 +905,9 @@ static bool vfio_devices_dma_logging_start(VFIOContainerBase *bcontainer, continue; } - ret = ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature); + ret = vbasedev->io_ops->device_feature(vbasedev, feature); if (ret) { - ret = -errno; - error_setg_errno(errp, errno, "%s: Failed to start DMA logging", + error_setg_errno(errp, -ret, "%s: Failed to start DMA logging", vbasedev->name); goto out; } diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 8455010d62..bbf95215cc 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -381,7 +381,7 @@ static void vfio_msi_interrupt(void *opaque) static int vfio_enable_msix_no_vec(VFIOPCIDevice *vdev) { g_autofree struct vfio_irq_set *irq_set = NULL; - int ret = 0, argsz; + int argsz; int32_t *fd; argsz = sizeof(*irq_set) + sizeof(*fd); @@ -396,9 +396,7 @@ static int vfio_enable_msix_no_vec(VFIOPCIDevice *vdev) fd = (int32_t *)&irq_set->data; *fd = -1; - ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_SET_IRQS, irq_set); - - return ret < 0 ? -errno : ret; + return vdev->vbasedev.io_ops->set_irqs(&vdev->vbasedev, irq_set); } static int vfio_enable_vectors(VFIOPCIDevice *vdev, bool msix) @@ -455,11 +453,11 @@ static int vfio_enable_vectors(VFIOPCIDevice *vdev, bool msix) fds[i] = fd; } - ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_SET_IRQS, irq_set); + ret = vdev->vbasedev.io_ops->set_irqs(&vdev->vbasedev, irq_set); g_free(irq_set); - return ret < 0 ? -errno : ret; + return ret; } static void vfio_add_kvm_msi_virq(VFIOPCIDevice *vdev, VFIOMSIVector *vector, diff --git a/include/hw/vfio/vfio-device.h b/include/hw/vfio/vfio-device.h index 5b833868c9..e89ed02c0e 100644 --- a/include/hw/vfio/vfio-device.h +++ b/include/hw/vfio/vfio-device.h @@ -41,6 +41,7 @@ enum { }; typedef struct VFIODeviceOps VFIODeviceOps; +typedef struct VFIODeviceIOOps VFIODeviceIOOps; typedef struct VFIOMigration VFIOMigration; typedef struct IOMMUFDBackend IOMMUFDBackend; @@ -66,6 +67,7 @@ typedef struct VFIODevice { OnOffAuto migration_multifd_transfer; bool migration_events; VFIODeviceOps *ops; + VFIODeviceIOOps *io_ops; unsigned int num_irqs; unsigned int num_regions; unsigned int flags; @@ -141,6 +143,42 @@ typedef QLIST_HEAD(VFIODeviceList, VFIODevice) VFIODeviceList; extern VFIODeviceList vfio_device_list; #ifdef CONFIG_LINUX +/* + * How devices communicate with the server. The default option is through + * ioctl() to the kernel VFIO driver, but vfio-user can use a socket to a remote + * process. + */ +struct VFIODeviceIOOps { + /** + * @device_feature + * + * Fill in feature info for the given device. + */ + int (*device_feature)(VFIODevice *vdev, struct vfio_device_feature *); + + /** + * @get_region_info + * + * Fill in @info with information on the region given by @info->index. + */ + int (*get_region_info)(VFIODevice *vdev, + struct vfio_region_info *info); + + /** + * @get_irq_info + * + * Fill in @irq with information on the IRQ given by @info->index. + */ + int (*get_irq_info)(VFIODevice *vdev, struct vfio_irq_info *irq); + + /** + * @set_irqs + * + * Configure IRQs as defined by @irqs. + */ + int (*set_irqs)(VFIODevice *vdev, struct vfio_irq_set *irqs); +}; + int vfio_device_get_region_info(VFIODevice *vbasedev, int index, struct vfio_region_info **info); int vfio_device_get_region_info_type(VFIODevice *vbasedev, uint32_t type, -- 2.43.0