>-----Original Message----- >From: Cédric Le Goater <c...@redhat.com> >Sent: Friday, November 10, 2023 6:53 PM >Subject: Re: [PATCH v5 10/20] vfio/pci: Make vfio cdev pre-openable by passing >a >file handle > >On 11/9/23 12:45, Zhenzhong Duan wrote: >> This gives management tools like libvirt a chance to open the vfio >> cdev with privilege and pass FD to qemu. This way qemu never needs >> to have privilege to open a VFIO or iommu cdev node. >> >> Together with the earlier support of pre-opening /dev/iommu device, >> now we have full support of passing a vfio device to unprivileged >> qemu by management tool. This mode is no more considered for the >> legacy backend. So let's remove the "TODO" comment. >> >> Add a helper function vfio_device_get_name() to check fd and get >> device name, it will also be used by other vfio devices. >> >> There is no easy way to check if a device is mdev with FD passing, >> so fail the x-balloon-allowed check unconditionally in this case. >> >> There is also no easy way to get BDF as name with FD passing, so >> we fake a name by VFIO_FD[fd]. >> >> Signed-off-by: Zhenzhong Duan <zhenzhong.d...@intel.com> >> --- >> include/hw/vfio/vfio-common.h | 1 + >> hw/vfio/helpers.c | 34 +++++++++++++++++++++++++++++ >> hw/vfio/iommufd.c | 12 +++++++---- >> hw/vfio/pci.c | 40 ++++++++++++++++++++++++----------- >> 4 files changed, 71 insertions(+), 16 deletions(-) >> >> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h >> index 3dac5c167e..960a14e8d8 100644 >> --- a/include/hw/vfio/vfio-common.h >> +++ b/include/hw/vfio/vfio-common.h >> @@ -238,6 +238,7 @@ struct vfio_info_cap_header * >> vfio_get_device_info_cap(struct vfio_device_info *info, uint16_t id); >> struct vfio_info_cap_header * >> vfio_get_cap(void *ptr, uint32_t cap_offset, uint16_t id); >> +int vfio_device_get_name(VFIODevice *vbasedev, Error **errp); >> #endif >> >> bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp); >> diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c >> index 168847e7c5..d80aa58719 100644 >> --- a/hw/vfio/helpers.c >> +++ b/hw/vfio/helpers.c >> @@ -20,6 +20,7 @@ >> */ >> >> #include "qemu/osdep.h" >> +#include CONFIG_DEVICES /* CONFIG_IOMMUFD */ >> #include <sys/ioctl.h> >> >> #include "hw/vfio/vfio-common.h" >> @@ -609,3 +610,36 @@ bool vfio_has_region_cap(VFIODevice *vbasedev, int >region, uint16_t cap_type) >> >> return ret; >> } >> + >> +int vfio_device_get_name(VFIODevice *vbasedev, Error **errp) >> +{ >> + struct stat st; >> + >> + if (vbasedev->fd < 0) { >> + if (stat(vbasedev->sysfsdev, &st) < 0) { >> + error_setg_errno(errp, errno, "no such host device"); >> + error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->sysfsdev); >> + return -errno; >> + } >> + /* User may specify a name, e.g: VFIO platform device */ >> + if (!vbasedev->name) { >> + vbasedev->name = g_path_get_basename(vbasedev->sysfsdev); >> + } >> + } >> +#ifdef CONFIG_IOMMUFD >> + else { >> + if (!vbasedev->iommufd) { > > >Can we handle with this case without CONFIG_IOMMUFD, simply by >testing vbasedev->iommufd ?
Sure, will do. > >> + error_setg(errp, "Use FD passing only with iommufd backend"); >> + return -EINVAL; >> + } >> + /* >> + * Give a name with fd so any function printing out vbasedev->name >> + * will not break. >> + */ >> + if (!vbasedev->name) { >> + vbasedev->name = g_strdup_printf("VFIO_FD%d", vbasedev->fd); >> + } >> + } >> +#endif >> + return 0; >> +} >> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c >> index 44dc6848bf..fd30477275 100644 >> --- a/hw/vfio/iommufd.c >> +++ b/hw/vfio/iommufd.c >> @@ -326,11 +326,15 @@ static int iommufd_attach_device(const char *name, >VFIODevice *vbasedev, >> uint32_t ioas_id; >> Error *err = NULL; >> >> - devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp); >> - if (devfd < 0) { >> - return devfd; >> + if (vbasedev->fd < 0) { >> + devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp); >> + if (devfd < 0) { >> + return devfd; >> + } >> + vbasedev->fd = devfd; >> + } else { >> + devfd = vbasedev->fd; >> } >> - vbasedev->fd = devfd; >> >> ret = iommufd_connect_and_bind(vbasedev, errp); >> if (ret) { >> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c >> index e9a426200b..f95725ed16 100644 >> --- a/hw/vfio/pci.c >> +++ b/hw/vfio/pci.c >> @@ -44,6 +44,7 @@ >> #include "migration/blocker.h" >> #include "migration/qemu-file.h" >> #include "sysemu/iommufd.h" >> +#include "monitor/monitor.h" >> >> #define TYPE_VFIO_PCI_NOHOTPLUG "vfio-pci-nohotplug" >> >> @@ -2934,18 +2935,23 @@ static void vfio_realize(PCIDevice *pdev, Error >**errp) >> VFIODevice *vbasedev = &vdev->vbasedev; >> char *tmp, *subsys; >> Error *err = NULL; >> - struct stat st; >> int i, ret; >> bool is_mdev; >> char uuid[UUID_STR_LEN]; >> char *name; >> >> - if (!vbasedev->sysfsdev) { >> + if (vbasedev->fd < 0 && !vbasedev->sysfsdev) { >> if (!(~vdev->host.domain || ~vdev->host.bus || >> ~vdev->host.slot || ~vdev->host.function)) { >> error_setg(errp, "No provided host device"); >> +#ifdef CONFIG_IOMMUFD >> + error_append_hint(errp, "Use -device >> vfio-pci,host=DDDD:BB:DD.F, " >> + "-device vfio-pci,sysfsdev=PATH_TO_DEVICE " >> + "or -device vfio-pci,fd=DEVICE_FD\n"); >> +#else >> error_append_hint(errp, "Use -device >> vfio-pci,host=DDDD:BB:DD.F " >> "or -device >> vfio-pci,sysfsdev=PATH_TO_DEVICE\n"); >> +#endif > >or simply : > > > error_append_hint(errp, "Use -device > vfio-pci,host=DDDD:BB:DD.F " > +#ifdef CONFIG_IOMMUFD > + "or -device vfio-pci,fd=DEVICE_FD " > +#endif > "or -device > vfio-pci,sysfsdev=PATH_TO_DEVICE\n"); Good idea, will do. > > > >> return; >> } >> vbasedev->sysfsdev = >> @@ -2954,13 +2960,9 @@ static void vfio_realize(PCIDevice *pdev, Error >**errp) >> vdev->host.slot, vdev->host.function); >> } >> >> - if (stat(vbasedev->sysfsdev, &st) < 0) { >> - error_setg_errno(errp, errno, "no such host device"); >> - error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->sysfsdev); >> + if (vfio_device_get_name(vbasedev, errp)) { >> return; >> } >> - >> - vbasedev->name = g_path_get_basename(vbasedev->sysfsdev); >> vbasedev->ops = &vfio_pci_ops; >> vbasedev->type = VFIO_DEVICE_TYPE_PCI; >> vbasedev->dev = DEVICE(vdev); >> @@ -3320,6 +3322,7 @@ static void vfio_instance_init(Object *obj) >> vdev->host.bus = ~0U; >> vdev->host.slot = ~0U; >> vdev->host.function = ~0U; >> + vdev->vbasedev.fd = -1; >We should probably move the all VFIODevice initializations : > > vbasedev->ops = &vfio_pci_ops; > vbasedev->type = VFIO_DEVICE_TYPE_PCI; > vbasedev->dev = DEVICE(vdev); > >under vfio_instance_init (should be called vfio_pci_instance_init). > >This is true for all other VFIO devices. May be not for this series, >it can come later. Sure, Let me send a separate series addressing this. > > >> >> vdev->nv_gpudirect_clique = 0xFF; >> >> @@ -3373,11 +3376,6 @@ static Property vfio_pci_dev_properties[] = { >> qdev_prop_nv_gpudirect_clique, uint8_t), >> DEFINE_PROP_OFF_AUTO_PCIBAR("x-msix-relocation", VFIOPCIDevice, >msix_relo, >> OFF_AUTOPCIBAR_OFF), >> - /* >> - * TODO - support passed fds... is this necessary? >> - * DEFINE_PROP_STRING("vfiofd", VFIOPCIDevice, vfiofd_name), >> - * DEFINE_PROP_STRING("vfiogroupfd, VFIOPCIDevice, vfiogroupfd_name), >> - */ >> #ifdef CONFIG_IOMMUFD >> DEFINE_PROP_LINK("iommufd", VFIOPCIDevice, vbasedev.iommufd, >> TYPE_IOMMUFD_BACKEND, IOMMUFDBackend *), >> @@ -3385,6 +3383,21 @@ static Property vfio_pci_dev_properties[] = { >> DEFINE_PROP_END_OF_LIST(), >> }; >> >> +#ifdef CONFIG_IOMMUFD >> +static void vfio_pci_set_fd(Object *obj, const char *str, Error **errp) >> +{ >> + VFIOPCIDevice *vdev = VFIO_PCI(obj); >> + int fd = -1; >> + >> + fd = monitor_fd_param(monitor_cur(), str, errp); >> + if (fd == -1) { >> + error_prepend(errp, "Could not parse remote object fd %s:", str); >> + return; >> + } >> + vdev->vbasedev.fd = fd; > >We could introduce a common helper in hw/vfio/common.c to remove code >duplication : > >#ifdef CONFIG_IOMMUFD >static void vfio_pci_set_fd(Object *obj, const char *str, Error **errp) >{ > vfio_device_set_fd(&VFIO_PCI(obj)->vbasedev, str, errp); >} >#endif Good suggestions! Will do. Thanks Zhenzhong