>-----Original Message----- >From: Cédric Le Goater <c...@redhat.com> >Sent: Monday, November 13, 2023 7:08 PM >Subject: Re: [PATCH v5 10/20] vfio/pci: Make vfio cdev pre-openable by passing >a >file handle > >On 11/13/23 04:00, Duan, Zhenzhong wrote: >> >> >>> -----Original Message----- >>> From: Cédric Le Goater <c...@redhat.com> >>> Sent: Friday, November 10, 2023 6:53 PM >>> Subject: Re: [PATCH v5 10/20] vfio/pci: Make vfio cdev pre-openable by >passing a >>> file handle >>> >>> On 11/9/23 12:45, Zhenzhong Duan wrote: >>>> This gives management tools like libvirt a chance to open the vfio >>>> cdev with privilege and pass FD to qemu. This way qemu never needs >>>> to have privilege to open a VFIO or iommu cdev node. >>>> >>>> Together with the earlier support of pre-opening /dev/iommu device, >>>> now we have full support of passing a vfio device to unprivileged >>>> qemu by management tool. This mode is no more considered for the >>>> legacy backend. So let's remove the "TODO" comment. >>>> >>>> Add a helper function vfio_device_get_name() to check fd and get >>>> device name, it will also be used by other vfio devices. >>>> >>>> There is no easy way to check if a device is mdev with FD passing, >>>> so fail the x-balloon-allowed check unconditionally in this case. >>>> >>>> There is also no easy way to get BDF as name with FD passing, so >>>> we fake a name by VFIO_FD[fd]. >>>> >>>> Signed-off-by: Zhenzhong Duan <zhenzhong.d...@intel.com> >>>> --- >>>> include/hw/vfio/vfio-common.h | 1 + >>>> hw/vfio/helpers.c | 34 +++++++++++++++++++++++++++++ >>>> hw/vfio/iommufd.c | 12 +++++++---- >>>> hw/vfio/pci.c | 40 ++++++++++++++++++++++++----------- >>>> 4 files changed, 71 insertions(+), 16 deletions(-) >>>> >>>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio- >common.h >>>> index 3dac5c167e..960a14e8d8 100644 >>>> --- a/include/hw/vfio/vfio-common.h >>>> +++ b/include/hw/vfio/vfio-common.h >>>> @@ -238,6 +238,7 @@ struct vfio_info_cap_header * >>>> vfio_get_device_info_cap(struct vfio_device_info *info, uint16_t id); >>>> struct vfio_info_cap_header * >>>> vfio_get_cap(void *ptr, uint32_t cap_offset, uint16_t id); >>>> +int vfio_device_get_name(VFIODevice *vbasedev, Error **errp); >>>> #endif >>>> >>>> bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp); >>>> diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c >>>> index 168847e7c5..d80aa58719 100644 >>>> --- a/hw/vfio/helpers.c >>>> +++ b/hw/vfio/helpers.c >>>> @@ -20,6 +20,7 @@ >>>> */ >>>> >>>> #include "qemu/osdep.h" >>>> +#include CONFIG_DEVICES /* CONFIG_IOMMUFD */ >>>> #include <sys/ioctl.h> >>>> >>>> #include "hw/vfio/vfio-common.h" >>>> @@ -609,3 +610,36 @@ bool vfio_has_region_cap(VFIODevice *vbasedev, >int >>> region, uint16_t cap_type) >>>> >>>> return ret; >>>> } >>>> + >>>> +int vfio_device_get_name(VFIODevice *vbasedev, Error **errp) >>>> +{ >>>> + struct stat st; >>>> + >>>> + if (vbasedev->fd < 0) { >>>> + if (stat(vbasedev->sysfsdev, &st) < 0) { >>>> + error_setg_errno(errp, errno, "no such host device"); >>>> + error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->sysfsdev); >>>> + return -errno; >>>> + } >>>> + /* User may specify a name, e.g: VFIO platform device */ >>>> + if (!vbasedev->name) { >>>> + vbasedev->name = g_path_get_basename(vbasedev->sysfsdev); >>>> + } >>>> + } >>>> +#ifdef CONFIG_IOMMUFD >>>> + else { >>>> + if (!vbasedev->iommufd) { >>> >>> >>> Can we handle with this case without CONFIG_IOMMUFD, simply by >>> testing vbasedev->iommufd ? >> >> Sure, will do. >> >>> >>>> + error_setg(errp, "Use FD passing only with iommufd backend"); >>>> + return -EINVAL; >>>> + } >>>> + /* >>>> + * Give a name with fd so any function printing out vbasedev->name >>>> + * will not break. >>>> + */ >>>> + if (!vbasedev->name) { >>>> + vbasedev->name = g_strdup_printf("VFIO_FD%d", vbasedev->fd); >>>> + } >>>> + } >>>> +#endif >>>> + return 0; >>>> +} >>>> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c >>>> index 44dc6848bf..fd30477275 100644 >>>> --- a/hw/vfio/iommufd.c >>>> +++ b/hw/vfio/iommufd.c >>>> @@ -326,11 +326,15 @@ static int iommufd_attach_device(const char >*name, >>> VFIODevice *vbasedev, >>>> uint32_t ioas_id; >>>> Error *err = NULL; >>>> >>>> - devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp); >>>> - if (devfd < 0) { >>>> - return devfd; >>>> + if (vbasedev->fd < 0) { >>>> + devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp); >>>> + if (devfd < 0) { >>>> + return devfd; >>>> + } >>>> + vbasedev->fd = devfd; >>>> + } else { >>>> + devfd = vbasedev->fd; >>>> } >>>> - vbasedev->fd = devfd; >>>> >>>> ret = iommufd_connect_and_bind(vbasedev, errp); >>>> if (ret) { >>>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c >>>> index e9a426200b..f95725ed16 100644 >>>> --- a/hw/vfio/pci.c >>>> +++ b/hw/vfio/pci.c >>>> @@ -44,6 +44,7 @@ >>>> #include "migration/blocker.h" >>>> #include "migration/qemu-file.h" >>>> #include "sysemu/iommufd.h" >>>> +#include "monitor/monitor.h" >>>> >>>> #define TYPE_VFIO_PCI_NOHOTPLUG "vfio-pci-nohotplug" >>>> >>>> @@ -2934,18 +2935,23 @@ static void vfio_realize(PCIDevice *pdev, Error >>> **errp) >>>> VFIODevice *vbasedev = &vdev->vbasedev; >>>> char *tmp, *subsys; >>>> Error *err = NULL; >>>> - struct stat st; >>>> int i, ret; >>>> bool is_mdev; >>>> char uuid[UUID_STR_LEN]; >>>> char *name; >>>> >>>> - if (!vbasedev->sysfsdev) { >>>> + if (vbasedev->fd < 0 && !vbasedev->sysfsdev) { >>>> if (!(~vdev->host.domain || ~vdev->host.bus || >>>> ~vdev->host.slot || ~vdev->host.function)) { >>>> error_setg(errp, "No provided host device"); >>>> +#ifdef CONFIG_IOMMUFD >>>> + error_append_hint(errp, "Use -device >>>> vfio-pci,host=DDDD:BB:DD.F, " >>>> + "-device vfio-pci,sysfsdev=PATH_TO_DEVICE " >>>> + "or -device vfio-pci,fd=DEVICE_FD\n"); >>>> +#else >>>> error_append_hint(errp, "Use -device >>>> vfio-pci,host=DDDD:BB:DD.F " >>>> "or -device >>>> vfio-pci,sysfsdev=PATH_TO_DEVICE\n"); >>>> +#endif >>> >>> or simply : >>> >>> >>> error_append_hint(errp, "Use -device >>> vfio-pci,host=DDDD:BB:DD.F " >>> +#ifdef CONFIG_IOMMUFD >>> + "or -device vfio-pci,fd=DEVICE_FD " >>> +#endif >>> "or -device >>> vfio-pci,sysfsdev=PATH_TO_DEVICE\n"); >> >> Good idea, will do. >> >>> >>> >>> >>>> return; >>>> } >>>> vbasedev->sysfsdev = >>>> @@ -2954,13 +2960,9 @@ static void vfio_realize(PCIDevice *pdev, Error >>> **errp) >>>> vdev->host.slot, vdev->host.function); >>>> } >>>> >>>> - if (stat(vbasedev->sysfsdev, &st) < 0) { >>>> - error_setg_errno(errp, errno, "no such host device"); >>>> - error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->sysfsdev); >>>> + if (vfio_device_get_name(vbasedev, errp)) { >>>> return; >>>> } >>>> - >>>> - vbasedev->name = g_path_get_basename(vbasedev->sysfsdev); >>>> vbasedev->ops = &vfio_pci_ops; >>>> vbasedev->type = VFIO_DEVICE_TYPE_PCI; >>>> vbasedev->dev = DEVICE(vdev); >>>> @@ -3320,6 +3322,7 @@ static void vfio_instance_init(Object *obj) >>>> vdev->host.bus = ~0U; >>>> vdev->host.slot = ~0U; >>>> vdev->host.function = ~0U; >>>> + vdev->vbasedev.fd = -1; >>> We should probably move the all VFIODevice initializations : >>> >>> vbasedev->ops = &vfio_pci_ops; >>> vbasedev->type = VFIO_DEVICE_TYPE_PCI; >>> vbasedev->dev = DEVICE(vdev); >>> >>> under vfio_instance_init (should be called vfio_pci_instance_init). >>> >>> This is true for all other VFIO devices. May be not for this series, >>> it can come later. >> >> Sure, Let me send a separate series addressing this. >> >>> >>> >>>> >>>> vdev->nv_gpudirect_clique = 0xFF; >>>> >>>> @@ -3373,11 +3376,6 @@ static Property vfio_pci_dev_properties[] = { >>>> qdev_prop_nv_gpudirect_clique, >>>> uint8_t), >>>> DEFINE_PROP_OFF_AUTO_PCIBAR("x-msix-relocation", VFIOPCIDevice, >>> msix_relo, >>>> OFF_AUTOPCIBAR_OFF), >>>> - /* >>>> - * TODO - support passed fds... is this necessary? >>>> - * DEFINE_PROP_STRING("vfiofd", VFIOPCIDevice, vfiofd_name), >>>> - * DEFINE_PROP_STRING("vfiogroupfd, VFIOPCIDevice, >vfiogroupfd_name), >>>> - */ >>>> #ifdef CONFIG_IOMMUFD >>>> DEFINE_PROP_LINK("iommufd", VFIOPCIDevice, vbasedev.iommufd, >>>> TYPE_IOMMUFD_BACKEND, IOMMUFDBackend *), >>>> @@ -3385,6 +3383,21 @@ static Property vfio_pci_dev_properties[] = { >>>> DEFINE_PROP_END_OF_LIST(), >>>> }; >>>> >>>> +#ifdef CONFIG_IOMMUFD >>>> +static void vfio_pci_set_fd(Object *obj, const char *str, Error **errp) >>>> +{ >>>> + VFIOPCIDevice *vdev = VFIO_PCI(obj); >>>> + int fd = -1; >>>> + >>>> + fd = monitor_fd_param(monitor_cur(), str, errp); >>>> + if (fd == -1) { >>>> + error_prepend(errp, "Could not parse remote object fd %s:", str); >>>> + return; >>>> + } >>>> + vdev->vbasedev.fd = fd; >>> >>> We could introduce a common helper in hw/vfio/common.c to remove code >>> duplication : >>> >>> #ifdef CONFIG_IOMMUFD >>> static void vfio_pci_set_fd(Object *obj, const char *str, Error **errp) >>> { >>> vfio_device_set_fd(&VFIO_PCI(obj)->vbasedev, str, errp); >>> } >>> #endif >> >> Good suggestions! Will do. > > >The idead is to isolate the iommufd addition in some common extension. >I tried with a QOM interface but it is not really satifying. See >previous email. I am not loosing faith though to find a solution for >this multi inheritance issue with VFIO devices
I know, always keep this in mind😊 Thanks Zhenzhong