On 11/9/23 12:45, Zhenzhong Duan wrote:
This gives management tools like libvirt a chance to open the vfio
cdev with privilege and pass FD to qemu. This way qemu never needs
to have privilege to open a VFIO or iommu cdev node.

Together with the earlier support of pre-opening /dev/iommu device,
now we have full support of passing a vfio device to unprivileged
qemu by management tool. This mode is no more considered for the
legacy backend. So let's remove the "TODO" comment.

Add a helper function vfio_device_get_name() to check fd and get
device name, it will also be used by other vfio devices.

There is no easy way to check if a device is mdev with FD passing,
so fail the x-balloon-allowed check unconditionally in this case.

There is also no easy way to get BDF as name with FD passing, so
we fake a name by VFIO_FD[fd].

Signed-off-by: Zhenzhong Duan <zhenzhong.d...@intel.com>
---
  include/hw/vfio/vfio-common.h |  1 +
  hw/vfio/helpers.c             | 34 +++++++++++++++++++++++++++++
  hw/vfio/iommufd.c             | 12 +++++++----
  hw/vfio/pci.c                 | 40 ++++++++++++++++++++++++-----------
  4 files changed, 71 insertions(+), 16 deletions(-)

diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index 3dac5c167e..960a14e8d8 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -238,6 +238,7 @@ struct vfio_info_cap_header *
  vfio_get_device_info_cap(struct vfio_device_info *info, uint16_t id);
  struct vfio_info_cap_header *
  vfio_get_cap(void *ptr, uint32_t cap_offset, uint16_t id);
+int vfio_device_get_name(VFIODevice *vbasedev, Error **errp);
  #endif
bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp);
diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c
index 168847e7c5..d80aa58719 100644
--- a/hw/vfio/helpers.c
+++ b/hw/vfio/helpers.c
@@ -20,6 +20,7 @@
   */
#include "qemu/osdep.h"
+#include CONFIG_DEVICES /* CONFIG_IOMMUFD */
  #include <sys/ioctl.h>
#include "hw/vfio/vfio-common.h"
@@ -609,3 +610,36 @@ bool vfio_has_region_cap(VFIODevice *vbasedev, int region, 
uint16_t cap_type)
return ret;
  }
+
+int vfio_device_get_name(VFIODevice *vbasedev, Error **errp)
+{
+    struct stat st;
+
+    if (vbasedev->fd < 0) {
+        if (stat(vbasedev->sysfsdev, &st) < 0) {
+            error_setg_errno(errp, errno, "no such host device");
+            error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->sysfsdev);
+            return -errno;
+        }
+        /* User may specify a name, e.g: VFIO platform device */
+        if (!vbasedev->name) {
+            vbasedev->name = g_path_get_basename(vbasedev->sysfsdev);
+        }
+    }
+#ifdef CONFIG_IOMMUFD
+    else {
+        if (!vbasedev->iommufd) {


Can we handle with this case without CONFIG_IOMMUFD, simply by
testing vbasedev->iommufd ?

+            error_setg(errp, "Use FD passing only with iommufd backend");
+            return -EINVAL;
+        }
+        /*
+         * Give a name with fd so any function printing out vbasedev->name
+         * will not break.
+         */
+        if (!vbasedev->name) {
+            vbasedev->name = g_strdup_printf("VFIO_FD%d", vbasedev->fd);
+        }
+    }
+#endif
+    return 0;
+}
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
index 44dc6848bf..fd30477275 100644
--- a/hw/vfio/iommufd.c
+++ b/hw/vfio/iommufd.c
@@ -326,11 +326,15 @@ static int iommufd_attach_device(const char *name, 
VFIODevice *vbasedev,
      uint32_t ioas_id;
      Error *err = NULL;
- devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp);
-    if (devfd < 0) {
-        return devfd;
+    if (vbasedev->fd < 0) {
+        devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp);
+        if (devfd < 0) {
+            return devfd;
+        }
+        vbasedev->fd = devfd;
+    } else {
+        devfd = vbasedev->fd;
      }
-    vbasedev->fd = devfd;
ret = iommufd_connect_and_bind(vbasedev, errp);
      if (ret) {
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index e9a426200b..f95725ed16 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -44,6 +44,7 @@
  #include "migration/blocker.h"
  #include "migration/qemu-file.h"
  #include "sysemu/iommufd.h"
+#include "monitor/monitor.h"
#define TYPE_VFIO_PCI_NOHOTPLUG "vfio-pci-nohotplug" @@ -2934,18 +2935,23 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
      VFIODevice *vbasedev = &vdev->vbasedev;
      char *tmp, *subsys;
      Error *err = NULL;
-    struct stat st;
      int i, ret;
      bool is_mdev;
      char uuid[UUID_STR_LEN];
      char *name;
- if (!vbasedev->sysfsdev) {
+    if (vbasedev->fd < 0 && !vbasedev->sysfsdev) {
          if (!(~vdev->host.domain || ~vdev->host.bus ||
                ~vdev->host.slot || ~vdev->host.function)) {
              error_setg(errp, "No provided host device");
+#ifdef CONFIG_IOMMUFD
+            error_append_hint(errp, "Use -device vfio-pci,host=DDDD:BB:DD.F, "
+                              "-device vfio-pci,sysfsdev=PATH_TO_DEVICE "
+                              "or -device vfio-pci,fd=DEVICE_FD\n");
+#else
              error_append_hint(errp, "Use -device vfio-pci,host=DDDD:BB:DD.F "
                                "or -device 
vfio-pci,sysfsdev=PATH_TO_DEVICE\n");
+#endif

or simply :


               error_append_hint(errp, "Use -device vfio-pci,host=DDDD:BB:DD.F "
 +#ifdef CONFIG_IOMMUFD
 +                              "or -device vfio-pci,fd=DEVICE_FD "
 +#endif
                                 "or -device 
vfio-pci,sysfsdev=PATH_TO_DEVICE\n");



              return;
          }
          vbasedev->sysfsdev =
@@ -2954,13 +2960,9 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
                              vdev->host.slot, vdev->host.function);
      }
- if (stat(vbasedev->sysfsdev, &st) < 0) {
-        error_setg_errno(errp, errno, "no such host device");
-        error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->sysfsdev);
+    if (vfio_device_get_name(vbasedev, errp)) {
          return;
      }
-
-    vbasedev->name = g_path_get_basename(vbasedev->sysfsdev);
      vbasedev->ops = &vfio_pci_ops;
      vbasedev->type = VFIO_DEVICE_TYPE_PCI;
      vbasedev->dev = DEVICE(vdev);
@@ -3320,6 +3322,7 @@ static void vfio_instance_init(Object *obj)
      vdev->host.bus = ~0U;
      vdev->host.slot = ~0U;
      vdev->host.function = ~0U;
+    vdev->vbasedev.fd = -1;
We should probably move the all VFIODevice initializations :

    vbasedev->ops = &vfio_pci_ops;
    vbasedev->type = VFIO_DEVICE_TYPE_PCI;
    vbasedev->dev = DEVICE(vdev);

under vfio_instance_init (should be called vfio_pci_instance_init).

This is true for all other VFIO devices. May be not for this series,
it can come later.


vdev->nv_gpudirect_clique = 0xFF; @@ -3373,11 +3376,6 @@ static Property vfio_pci_dev_properties[] = {
                                     qdev_prop_nv_gpudirect_clique, uint8_t),
      DEFINE_PROP_OFF_AUTO_PCIBAR("x-msix-relocation", VFIOPCIDevice, msix_relo,
                                  OFF_AUTOPCIBAR_OFF),
-    /*
-     * TODO - support passed fds... is this necessary?
-     * DEFINE_PROP_STRING("vfiofd", VFIOPCIDevice, vfiofd_name),
-     * DEFINE_PROP_STRING("vfiogroupfd, VFIOPCIDevice, vfiogroupfd_name),
-     */
  #ifdef CONFIG_IOMMUFD
      DEFINE_PROP_LINK("iommufd", VFIOPCIDevice, vbasedev.iommufd,
                       TYPE_IOMMUFD_BACKEND, IOMMUFDBackend *),
@@ -3385,6 +3383,21 @@ static Property vfio_pci_dev_properties[] = {
      DEFINE_PROP_END_OF_LIST(),
  };
+#ifdef CONFIG_IOMMUFD
+static void vfio_pci_set_fd(Object *obj, const char *str, Error **errp)
+{
+    VFIOPCIDevice *vdev = VFIO_PCI(obj);
+    int fd = -1;
+
+    fd = monitor_fd_param(monitor_cur(), str, errp);
+    if (fd == -1) {
+        error_prepend(errp, "Could not parse remote object fd %s:", str);
+        return;
+    }
+    vdev->vbasedev.fd = fd;

We could introduce a common helper in hw/vfio/common.c to remove code
duplication :

#ifdef CONFIG_IOMMUFD
static void vfio_pci_set_fd(Object *obj, const char *str, Error **errp)
{
    vfio_device_set_fd(&VFIO_PCI(obj)->vbasedev, str, errp);
}
#endif
Thanks,

C.



+}
+#endif
+
  static void vfio_pci_dev_class_init(ObjectClass *klass, void *data)
  {
      DeviceClass *dc = DEVICE_CLASS(klass);
@@ -3392,6 +3405,9 @@ static void vfio_pci_dev_class_init(ObjectClass *klass, 
void *data)
dc->reset = vfio_pci_reset;
      device_class_set_props(dc, vfio_pci_dev_properties);
+#ifdef CONFIG_IOMMUFD
+    object_class_property_add_str(klass, "fd", NULL, vfio_pci_set_fd);
+#endif
      dc->desc = "VFIO-based PCI device assignment";
      set_bit(DEVICE_CATEGORY_MISC, dc->categories);
      pdc->realize = vfio_realize;


Reply via email to