This gives management tools like libvirt a chance to open the vfio cdev with privilege and pass FD to qemu. This way qemu never needs to have privilege to open a VFIO or iommu cdev node.
Together with the earlier support of pre-opening /dev/iommu device, now we have full support of passing a vfio device to unprivileged qemu by management tool. This mode is no more considered for the legacy backend. So let's remove the "TODO" comment. Add a helper function vfio_device_get_name() to check fd and get device name, it will also be used by other vfio devices. There is no easy way to check if a device is mdev with FD passing, so fail the x-balloon-allowed check unconditionally in this case. There is also no easy way to get BDF as name with FD passing, so we fake a name by VFIO_FD[fd]. Signed-off-by: Zhenzhong Duan <zhenzhong.d...@intel.com> --- include/hw/vfio/vfio-common.h | 1 + hw/vfio/helpers.c | 33 +++++++++++++++++++++++++++++ hw/vfio/iommufd.c | 12 +++++++---- hw/vfio/pci.c | 40 ++++++++++++++++++++++++----------- 4 files changed, 70 insertions(+), 16 deletions(-) diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h index 3f1a39a991..854c32e4ce 100644 --- a/include/hw/vfio/vfio-common.h +++ b/include/hw/vfio/vfio-common.h @@ -250,6 +250,7 @@ struct vfio_info_cap_header * vfio_get_device_info_cap(struct vfio_device_info *info, uint16_t id); struct vfio_info_cap_header * vfio_get_cap(void *ptr, uint32_t cap_offset, uint16_t id); +int vfio_device_get_name(VFIODevice *vbasedev, Error **errp); #endif bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp); diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c index 168847e7c5..044dbbc501 100644 --- a/hw/vfio/helpers.c +++ b/hw/vfio/helpers.c @@ -609,3 +609,36 @@ bool vfio_has_region_cap(VFIODevice *vbasedev, int region, uint16_t cap_type) return ret; } + +int vfio_device_get_name(VFIODevice *vbasedev, Error **errp) +{ + struct stat st; + + if (vbasedev->fd < 0) { + if (stat(vbasedev->sysfsdev, &st) < 0) { + error_setg_errno(errp, errno, "no such host device"); + error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->sysfsdev); + return -errno; + } + /* User may specify a name, e.g: VFIO platform device */ + if (!vbasedev->name) { + vbasedev->name = g_path_get_basename(vbasedev->sysfsdev); + } + } +#ifdef CONFIG_IOMMUFD + else { + if (!vbasedev->iommufd) { + error_setg(errp, "Use FD passing only with iommufd backend"); + return -EINVAL; + } + /* + * Give a name with fd so any function printing out vbasedev->name + * will not break. + */ + if (!vbasedev->name) { + vbasedev->name = g_strdup_printf("VFIO_FD%d", vbasedev->fd); + } + } +#endif + return 0; +} diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c index aedfe31c3c..1fb1c7e853 100644 --- a/hw/vfio/iommufd.c +++ b/hw/vfio/iommufd.c @@ -399,11 +399,15 @@ static int iommufd_attach_device(const char *name, VFIODevice *vbasedev, uint32_t ioas_id; Error *err = NULL; - devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp); - if (devfd < 0) { - return devfd; + if (vbasedev->fd < 0) { + devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp); + if (devfd < 0) { + return devfd; + } + vbasedev->fd = devfd; + } else { + devfd = vbasedev->fd; } - vbasedev->fd = devfd; ret = iommufd_connect_and_bind(vbasedev, errp); if (ret) { diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 7a6696ca55..d8f658ea47 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -43,6 +43,7 @@ #include "migration/blocker.h" #include "migration/qemu-file.h" #include "sysemu/iommufd.h" +#include "monitor/monitor.h" #define TYPE_VFIO_PCI_NOHOTPLUG "vfio-pci-nohotplug" @@ -3108,18 +3109,23 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) VFIODevice *vbasedev = &vdev->vbasedev; char *tmp, *subsys; Error *err = NULL; - struct stat st; int i, ret; bool is_mdev; char uuid[UUID_STR_LEN]; char *name; - if (!vbasedev->sysfsdev) { + if (vbasedev->fd < 0 && !vbasedev->sysfsdev) { if (!(~vdev->host.domain || ~vdev->host.bus || ~vdev->host.slot || ~vdev->host.function)) { error_setg(errp, "No provided host device"); +#ifdef CONFIG_IOMMUFD + error_append_hint(errp, "Use -device vfio-pci,host=DDDD:BB:DD.F, " + "-device vfio-pci,sysfsdev=PATH_TO_DEVICE " + "or -device vfio-pci,fd=DEVICE_FD\n"); +#else error_append_hint(errp, "Use -device vfio-pci,host=DDDD:BB:DD.F " "or -device vfio-pci,sysfsdev=PATH_TO_DEVICE\n"); +#endif return; } vbasedev->sysfsdev = @@ -3128,13 +3134,9 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) vdev->host.slot, vdev->host.function); } - if (stat(vbasedev->sysfsdev, &st) < 0) { - error_setg_errno(errp, errno, "no such host device"); - error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->sysfsdev); + if (vfio_device_get_name(vbasedev, errp)) { return; } - - vbasedev->name = g_path_get_basename(vbasedev->sysfsdev); vbasedev->ops = &vfio_pci_ops; vbasedev->type = VFIO_DEVICE_TYPE_PCI; vbasedev->dev = DEVICE(vdev); @@ -3494,6 +3496,7 @@ static void vfio_instance_init(Object *obj) vdev->host.bus = ~0U; vdev->host.slot = ~0U; vdev->host.function = ~0U; + vdev->vbasedev.fd = -1; vdev->nv_gpudirect_clique = 0xFF; @@ -3547,11 +3550,6 @@ static Property vfio_pci_dev_properties[] = { qdev_prop_nv_gpudirect_clique, uint8_t), DEFINE_PROP_OFF_AUTO_PCIBAR("x-msix-relocation", VFIOPCIDevice, msix_relo, OFF_AUTOPCIBAR_OFF), - /* - * TODO - support passed fds... is this necessary? - * DEFINE_PROP_STRING("vfiofd", VFIOPCIDevice, vfiofd_name), - * DEFINE_PROP_STRING("vfiogroupfd, VFIOPCIDevice, vfiogroupfd_name), - */ #ifdef CONFIG_IOMMUFD DEFINE_PROP_LINK("iommufd", VFIOPCIDevice, vbasedev.iommufd, TYPE_IOMMUFD_BACKEND, IOMMUFDBackend *), @@ -3559,6 +3557,21 @@ static Property vfio_pci_dev_properties[] = { DEFINE_PROP_END_OF_LIST(), }; +#ifdef CONFIG_IOMMUFD +static void vfio_pci_set_fd(Object *obj, const char *str, Error **errp) +{ + VFIOPCIDevice *vdev = VFIO_PCI(obj); + int fd = -1; + + fd = monitor_fd_param(monitor_cur(), str, errp); + if (fd == -1) { + error_prepend(errp, "Could not parse remote object fd %s:", str); + return; + } + vdev->vbasedev.fd = fd; +} +#endif + static void vfio_pci_dev_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); @@ -3566,6 +3579,9 @@ static void vfio_pci_dev_class_init(ObjectClass *klass, void *data) dc->reset = vfio_pci_reset; device_class_set_props(dc, vfio_pci_dev_properties); +#ifdef CONFIG_IOMMUFD + object_class_property_add_str(klass, "fd", NULL, vfio_pci_set_fd); +#endif dc->desc = "VFIO-based PCI device assignment"; set_bit(DEVICE_CATEGORY_MISC, dc->categories); pdc->realize = vfio_realize; -- 2.34.1