On 5/29/25 21:24, Steve Sistare wrote:
At vfio creation time, save the value of vfio container, group, and device
descriptors in CPR state.  On qemu restart, vfio_realize() finds and uses
the saved descriptors.

During reuse, device and iommu state is already configured, so operations
in vfio_realize that would modify the configuration, such as vfio ioctl's,
are skipped.  The result is that vfio_realize constructs qemu data
structures that reflect the current state of the device.

Signed-off-by: Steve Sistare <steven.sist...@oracle.com>

Thanks for making the changes. They look much better in container.c.

Reviewed-by: Cédric Le Goater <c...@redhat.com>

C.


---
  include/hw/vfio/vfio-cpr.h |  6 +++++
  hw/vfio/container.c        | 67 +++++++++++++++++++++++++++++++++++-----------
  hw/vfio/cpr-legacy.c       | 42 +++++++++++++++++++++++++++++
  3 files changed, 100 insertions(+), 15 deletions(-)

diff --git a/include/hw/vfio/vfio-cpr.h b/include/hw/vfio/vfio-cpr.h
index d4e0bd5..5a2e5f6 100644
--- a/include/hw/vfio/vfio-cpr.h
+++ b/include/hw/vfio/vfio-cpr.h
@@ -13,6 +13,7 @@
struct VFIOContainer;
  struct VFIOContainerBase;
+struct VFIOGroup;
typedef struct VFIOContainerCPR {
      Error *blocker;
@@ -30,4 +31,9 @@ bool vfio_cpr_register_container(struct VFIOContainerBase 
*bcontainer,
                                   Error **errp);
  void vfio_cpr_unregister_container(struct VFIOContainerBase *bcontainer);
+int vfio_cpr_group_get_device_fd(int d, const char *name);
+
+bool vfio_cpr_container_match(struct VFIOContainer *container,
+                              struct VFIOGroup *group, int fd);
+
  #endif /* HW_VFIO_VFIO_CPR_H */
diff --git a/hw/vfio/container.c b/hw/vfio/container.c
index 7d2035c..798abda 100644
--- a/hw/vfio/container.c
+++ b/hw/vfio/container.c
@@ -31,6 +31,8 @@
  #include "system/reset.h"
  #include "trace.h"
  #include "qapi/error.h"
+#include "migration/cpr.h"
+#include "migration/blocker.h"
  #include "pci.h"
  #include "hw/vfio/vfio-container.h"
  #include "hw/vfio/vfio-cpr.h"
@@ -426,7 +428,12 @@ static VFIOContainer *vfio_create_container(int fd, 
VFIOGroup *group,
          return NULL;
      }
- if (!vfio_set_iommu(fd, group->fd, &iommu_type, errp)) {
+    /*
+     * During CPR, just set the container type and skip the ioctls, as the
+     * container and group are already configured in the kernel.
+     */
+    if (!cpr_is_incoming() &&
+        !vfio_set_iommu(fd, group->fd, &iommu_type, errp)) {
          return NULL;
      }
@@ -593,6 +600,11 @@ static bool vfio_container_group_add(VFIOContainer *container, VFIOGroup *group,
      group->container = container;
      QLIST_INSERT_HEAD(&container->group_list, group, container_next);
      vfio_group_add_kvm_device(group);
+    /*
+     * Remember the container fd for each group, so we can attach to the same
+     * container after CPR.
+     */
+    cpr_resave_fd("vfio_container_for_group", group->groupid, container->fd);
      return true;
  }
@@ -602,6 +614,7 @@ static void vfio_container_group_del(VFIOContainer *container, VFIOGroup *group)
      group->container = NULL;
      vfio_group_del_kvm_device(group);
      vfio_ram_block_discard_disable(container, false);
+    cpr_delete_fd("vfio_container_for_group", group->groupid);
  }
static bool vfio_container_connect(VFIOGroup *group, AddressSpace *as,
@@ -616,17 +629,34 @@ static bool vfio_container_connect(VFIOGroup *group, 
AddressSpace *as,
      bool group_was_added = false;
space = vfio_address_space_get(as);
+    fd = cpr_find_fd("vfio_container_for_group", group->groupid);
- QLIST_FOREACH(bcontainer, &space->containers, next) {
-        container = container_of(bcontainer, VFIOContainer, bcontainer);
-        if (!ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) {
-            return vfio_container_group_add(container, group, errp);
+    if (!cpr_is_incoming()) {
+        QLIST_FOREACH(bcontainer, &space->containers, next) {
+            container = container_of(bcontainer, VFIOContainer, bcontainer);
+            if (!ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) {
+                return vfio_container_group_add(container, group, errp);
+            }
          }
-    }
- fd = qemu_open("/dev/vfio/vfio", O_RDWR, errp);
-    if (fd < 0) {
-        goto fail;
+        fd = qemu_open("/dev/vfio/vfio", O_RDWR, errp);
+        if (fd < 0) {
+            goto fail;
+        }
+    } else {
+        /*
+         * For incoming CPR, the group is already attached in the kernel.
+         * If a container with matching fd is found, then update the
+         * userland group list and return.  If not, then after the loop,
+         * create the container struct and group list.
+         */
+        QLIST_FOREACH(bcontainer, &space->containers, next) {
+            container = container_of(bcontainer, VFIOContainer, bcontainer);
+
+            if (vfio_cpr_container_match(container, group, fd)) {
+                return vfio_container_group_add(container, group, errp);
+            }
+        }
      }
ret = ioctl(fd, VFIO_GET_API_VERSION);
@@ -698,6 +728,7 @@ static void vfio_container_disconnect(VFIOGroup *group)
QLIST_REMOVE(group, container_next);
      group->container = NULL;
+    cpr_delete_fd("vfio_container_for_group", group->groupid);
/*
       * Explicitly release the listener first before unset container,
@@ -751,7 +782,7 @@ static VFIOGroup *vfio_group_get(int groupid, AddressSpace 
*as, Error **errp)
      group = g_malloc0(sizeof(*group));
snprintf(path, sizeof(path), "/dev/vfio/%d", groupid);
-    group->fd = qemu_open(path, O_RDWR, errp);
+    group->fd = cpr_open_fd(path, O_RDWR, "vfio_group", groupid, errp);
      if (group->fd < 0) {
          goto free_group_exit;
      }
@@ -783,6 +814,7 @@ static VFIOGroup *vfio_group_get(int groupid, AddressSpace 
*as, Error **errp)
      return group;
close_fd_exit:
+    cpr_delete_fd("vfio_group", groupid);
      close(group->fd);
free_group_exit:
@@ -804,6 +836,7 @@ static void vfio_group_put(VFIOGroup *group)
      vfio_container_disconnect(group);
      QLIST_REMOVE(group, next);
      trace_vfio_group_put(group->fd);
+    cpr_delete_fd("vfio_group", group->groupid);
      close(group->fd);
      g_free(group);
  }
@@ -814,7 +847,7 @@ static bool vfio_device_get(VFIOGroup *group, const char 
*name,
      g_autofree struct vfio_device_info *info = NULL;
      int fd;
- fd = ioctl(group->fd, VFIO_GROUP_GET_DEVICE_FD, name);
+    fd = vfio_cpr_group_get_device_fd(group->fd, name);
      if (fd < 0) {
          error_setg_errno(errp, errno, "error getting device from group %d",
                           group->groupid);
@@ -827,8 +860,7 @@ static bool vfio_device_get(VFIOGroup *group, const char 
*name,
      info = vfio_get_device_info(fd);
      if (!info) {
          error_setg_errno(errp, errno, "error getting device info");
-        close(fd);
-        return false;
+        goto fail;
      }
/*
@@ -842,8 +874,7 @@ static bool vfio_device_get(VFIOGroup *group, const char 
*name,
          if (!QLIST_EMPTY(&group->device_list)) {
              error_setg(errp, "Inconsistent setting of support for discarding "
                         "RAM (e.g., balloon) within group");
-            close(fd);
-            return false;
+            goto fail;
          }
if (!group->ram_block_discard_allowed) {
@@ -861,6 +892,11 @@ static bool vfio_device_get(VFIOGroup *group, const char 
*name,
      trace_vfio_device_get(name, info->flags, info->num_regions, 
info->num_irqs);
return true;
+
+fail:
+    close(fd);
+    cpr_delete_fd(name, 0);
+    return false;
  }
static void vfio_device_put(VFIODevice *vbasedev)
@@ -871,6 +907,7 @@ static void vfio_device_put(VFIODevice *vbasedev)
      QLIST_REMOVE(vbasedev, next);
      vbasedev->group = NULL;
      trace_vfio_device_put(vbasedev->fd);
+    cpr_delete_fd(vbasedev->name, 0);
      close(vbasedev->fd);
  }
diff --git a/hw/vfio/cpr-legacy.c b/hw/vfio/cpr-legacy.c
index 419b9fb..29be64f 100644
--- a/hw/vfio/cpr-legacy.c
+++ b/hw/vfio/cpr-legacy.c
@@ -9,6 +9,7 @@
  #include "qemu/osdep.h"
  #include "hw/vfio/vfio-container.h"
  #include "hw/vfio/vfio-cpr.h"
+#include "hw/vfio/vfio-device.h"
  #include "migration/blocker.h"
  #include "migration/cpr.h"
  #include "migration/migration.h"
@@ -67,3 +68,44 @@ void vfio_legacy_cpr_unregister_container(VFIOContainer 
*container)
      migrate_del_blocker(&container->cpr.blocker);
      vmstate_unregister(NULL, &vfio_container_vmstate, container);
  }
+
+int vfio_cpr_group_get_device_fd(int d, const char *name)
+{
+    const int id = 0;
+    int fd = cpr_find_fd(name, id);
+
+    if (fd < 0) {
+        fd = ioctl(d, VFIO_GROUP_GET_DEVICE_FD, name);
+        if (fd >= 0) {
+            cpr_save_fd(name, id, fd);
+        }
+    }
+    return fd;
+}
+
+static bool same_device(int fd1, int fd2)
+{
+    struct stat st1, st2;
+
+    return !fstat(fd1, &st1) && !fstat(fd2, &st2) && st1.st_dev == st2.st_dev;
+}
+
+bool vfio_cpr_container_match(VFIOContainer *container, VFIOGroup *group,
+                              int fd)
+{
+    if (container->fd == fd) {
+        return true;
+    }
+    if (!same_device(container->fd, fd)) {
+        return false;
+    }
+    /*
+     * Same device, different fd.  This occurs when the container fd is
+     * cpr_save'd multiple times, once for each groupid, so SCM_RIGHTS
+     * produces duplicates.  De-dup it.
+     */
+    cpr_delete_fd("vfio_container_for_group", group->groupid);
+    close(fd);
+    cpr_save_fd("vfio_container_for_group", group->groupid, container->fd);
+    return true;
+}


Reply via email to