>-----Original Message-----
>From: Steve Sistare <steven.sist...@oracle.com>

>Subject: [PATCH V4 10/43] vfio/container: preserve descriptors
>
>At vfio creation time, save the value of vfio container, group, and device
>descriptors in CPR state.  On qemu restart, vfio_realize() finds and uses
>the saved descriptors.
>
>During reuse, device and iommu state is already configured, so operations
>in vfio_realize that would modify the configuration, such as vfio ioctl's,
>are skipped.  The result is that vfio_realize constructs qemu data
>structures that reflect the current state of the device.
>
>Signed-off-by: Steve Sistare <steven.sist...@oracle.com>

Reviewed-by: Zhenzhong Duan <zhenzhong.d...@intel.com>

>---
> include/hw/vfio/vfio-cpr.h |  6 +++++
> hw/vfio/container.c        | 67 +++++++++++++++++++++++++++++++++++----------
>-
> hw/vfio/cpr-legacy.c       | 42 +++++++++++++++++++++++++++++
> 3 files changed, 100 insertions(+), 15 deletions(-)
>
>diff --git a/include/hw/vfio/vfio-cpr.h b/include/hw/vfio/vfio-cpr.h
>index d4e0bd5..5a2e5f6 100644
>--- a/include/hw/vfio/vfio-cpr.h
>+++ b/include/hw/vfio/vfio-cpr.h
>@@ -13,6 +13,7 @@
>
> struct VFIOContainer;
> struct VFIOContainerBase;
>+struct VFIOGroup;
>
> typedef struct VFIOContainerCPR {
>     Error *blocker;
>@@ -30,4 +31,9 @@ bool vfio_cpr_register_container(struct VFIOContainerBase
>*bcontainer,
>                                  Error **errp);
> void vfio_cpr_unregister_container(struct VFIOContainerBase *bcontainer);
>
>+int vfio_cpr_group_get_device_fd(int d, const char *name);
>+
>+bool vfio_cpr_container_match(struct VFIOContainer *container,
>+                              struct VFIOGroup *group, int fd);
>+
> #endif /* HW_VFIO_VFIO_CPR_H */
>diff --git a/hw/vfio/container.c b/hw/vfio/container.c
>index 7d2035c..798abda 100644
>--- a/hw/vfio/container.c
>+++ b/hw/vfio/container.c
>@@ -31,6 +31,8 @@
> #include "system/reset.h"
> #include "trace.h"
> #include "qapi/error.h"
>+#include "migration/cpr.h"
>+#include "migration/blocker.h"
> #include "pci.h"
> #include "hw/vfio/vfio-container.h"
> #include "hw/vfio/vfio-cpr.h"
>@@ -426,7 +428,12 @@ static VFIOContainer *vfio_create_container(int fd,
>VFIOGroup *group,
>         return NULL;
>     }
>
>-    if (!vfio_set_iommu(fd, group->fd, &iommu_type, errp)) {
>+    /*
>+     * During CPR, just set the container type and skip the ioctls, as the
>+     * container and group are already configured in the kernel.
>+     */
>+    if (!cpr_is_incoming() &&
>+        !vfio_set_iommu(fd, group->fd, &iommu_type, errp)) {
>         return NULL;
>     }
>
>@@ -593,6 +600,11 @@ static bool vfio_container_group_add(VFIOContainer
>*container, VFIOGroup *group,
>     group->container = container;
>     QLIST_INSERT_HEAD(&container->group_list, group, container_next);
>     vfio_group_add_kvm_device(group);
>+    /*
>+     * Remember the container fd for each group, so we can attach to the same
>+     * container after CPR.
>+     */
>+    cpr_resave_fd("vfio_container_for_group", group->groupid, container->fd);
>     return true;
> }
>
>@@ -602,6 +614,7 @@ static void vfio_container_group_del(VFIOContainer
>*container, VFIOGroup *group)
>     group->container = NULL;
>     vfio_group_del_kvm_device(group);
>     vfio_ram_block_discard_disable(container, false);
>+    cpr_delete_fd("vfio_container_for_group", group->groupid);
> }
>
> static bool vfio_container_connect(VFIOGroup *group, AddressSpace *as,
>@@ -616,17 +629,34 @@ static bool vfio_container_connect(VFIOGroup *group,
>AddressSpace *as,
>     bool group_was_added = false;
>
>     space = vfio_address_space_get(as);
>+    fd = cpr_find_fd("vfio_container_for_group", group->groupid);
>
>-    QLIST_FOREACH(bcontainer, &space->containers, next) {
>-        container = container_of(bcontainer, VFIOContainer, bcontainer);
>-        if (!ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) {
>-            return vfio_container_group_add(container, group, errp);
>+    if (!cpr_is_incoming()) {
>+        QLIST_FOREACH(bcontainer, &space->containers, next) {
>+            container = container_of(bcontainer, VFIOContainer, bcontainer);
>+            if (!ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) {
>+                return vfio_container_group_add(container, group, errp);
>+            }
>         }
>-    }
>
>-    fd = qemu_open("/dev/vfio/vfio", O_RDWR, errp);
>-    if (fd < 0) {
>-        goto fail;
>+        fd = qemu_open("/dev/vfio/vfio", O_RDWR, errp);
>+        if (fd < 0) {
>+            goto fail;
>+        }
>+    } else {
>+        /*
>+         * For incoming CPR, the group is already attached in the kernel.
>+         * If a container with matching fd is found, then update the
>+         * userland group list and return.  If not, then after the loop,
>+         * create the container struct and group list.
>+         */
>+        QLIST_FOREACH(bcontainer, &space->containers, next) {
>+            container = container_of(bcontainer, VFIOContainer, bcontainer);
>+
>+            if (vfio_cpr_container_match(container, group, fd)) {
>+                return vfio_container_group_add(container, group, errp);
>+            }
>+        }
>     }
>
>     ret = ioctl(fd, VFIO_GET_API_VERSION);
>@@ -698,6 +728,7 @@ static void vfio_container_disconnect(VFIOGroup *group)
>
>     QLIST_REMOVE(group, container_next);
>     group->container = NULL;
>+    cpr_delete_fd("vfio_container_for_group", group->groupid);
>
>     /*
>      * Explicitly release the listener first before unset container,
>@@ -751,7 +782,7 @@ static VFIOGroup *vfio_group_get(int groupid,
>AddressSpace *as, Error **errp)
>     group = g_malloc0(sizeof(*group));
>
>     snprintf(path, sizeof(path), "/dev/vfio/%d", groupid);
>-    group->fd = qemu_open(path, O_RDWR, errp);
>+    group->fd = cpr_open_fd(path, O_RDWR, "vfio_group", groupid, errp);
>     if (group->fd < 0) {
>         goto free_group_exit;
>     }
>@@ -783,6 +814,7 @@ static VFIOGroup *vfio_group_get(int groupid,
>AddressSpace *as, Error **errp)
>     return group;
>
> close_fd_exit:
>+    cpr_delete_fd("vfio_group", groupid);
>     close(group->fd);
>
> free_group_exit:
>@@ -804,6 +836,7 @@ static void vfio_group_put(VFIOGroup *group)
>     vfio_container_disconnect(group);
>     QLIST_REMOVE(group, next);
>     trace_vfio_group_put(group->fd);
>+    cpr_delete_fd("vfio_group", group->groupid);
>     close(group->fd);
>     g_free(group);
> }
>@@ -814,7 +847,7 @@ static bool vfio_device_get(VFIOGroup *group, const
>char *name,
>     g_autofree struct vfio_device_info *info = NULL;
>     int fd;
>
>-    fd = ioctl(group->fd, VFIO_GROUP_GET_DEVICE_FD, name);
>+    fd = vfio_cpr_group_get_device_fd(group->fd, name);
>     if (fd < 0) {
>         error_setg_errno(errp, errno, "error getting device from group %d",
>                          group->groupid);
>@@ -827,8 +860,7 @@ static bool vfio_device_get(VFIOGroup *group, const
>char *name,
>     info = vfio_get_device_info(fd);
>     if (!info) {
>         error_setg_errno(errp, errno, "error getting device info");
>-        close(fd);
>-        return false;
>+        goto fail;
>     }
>
>     /*
>@@ -842,8 +874,7 @@ static bool vfio_device_get(VFIOGroup *group, const
>char *name,
>         if (!QLIST_EMPTY(&group->device_list)) {
>             error_setg(errp, "Inconsistent setting of support for discarding "
>                        "RAM (e.g., balloon) within group");
>-            close(fd);
>-            return false;
>+            goto fail;
>         }
>
>         if (!group->ram_block_discard_allowed) {
>@@ -861,6 +892,11 @@ static bool vfio_device_get(VFIOGroup *group, const
>char *name,
>     trace_vfio_device_get(name, info->flags, info->num_regions, 
> info->num_irqs);
>
>     return true;
>+
>+fail:
>+    close(fd);
>+    cpr_delete_fd(name, 0);
>+    return false;
> }
>
> static void vfio_device_put(VFIODevice *vbasedev)
>@@ -871,6 +907,7 @@ static void vfio_device_put(VFIODevice *vbasedev)
>     QLIST_REMOVE(vbasedev, next);
>     vbasedev->group = NULL;
>     trace_vfio_device_put(vbasedev->fd);
>+    cpr_delete_fd(vbasedev->name, 0);
>     close(vbasedev->fd);
> }
>
>diff --git a/hw/vfio/cpr-legacy.c b/hw/vfio/cpr-legacy.c
>index 419b9fb..29be64f 100644
>--- a/hw/vfio/cpr-legacy.c
>+++ b/hw/vfio/cpr-legacy.c
>@@ -9,6 +9,7 @@
> #include "qemu/osdep.h"
> #include "hw/vfio/vfio-container.h"
> #include "hw/vfio/vfio-cpr.h"
>+#include "hw/vfio/vfio-device.h"
> #include "migration/blocker.h"
> #include "migration/cpr.h"
> #include "migration/migration.h"
>@@ -67,3 +68,44 @@ void vfio_legacy_cpr_unregister_container(VFIOContainer
>*container)
>     migrate_del_blocker(&container->cpr.blocker);
>     vmstate_unregister(NULL, &vfio_container_vmstate, container);
> }
>+
>+int vfio_cpr_group_get_device_fd(int d, const char *name)
>+{
>+    const int id = 0;
>+    int fd = cpr_find_fd(name, id);
>+
>+    if (fd < 0) {
>+        fd = ioctl(d, VFIO_GROUP_GET_DEVICE_FD, name);
>+        if (fd >= 0) {
>+            cpr_save_fd(name, id, fd);
>+        }
>+    }
>+    return fd;
>+}
>+
>+static bool same_device(int fd1, int fd2)
>+{
>+    struct stat st1, st2;
>+
>+    return !fstat(fd1, &st1) && !fstat(fd2, &st2) && st1.st_dev == st2.st_dev;
>+}
>+
>+bool vfio_cpr_container_match(VFIOContainer *container, VFIOGroup *group,
>+                              int fd)
>+{
>+    if (container->fd == fd) {
>+        return true;
>+    }
>+    if (!same_device(container->fd, fd)) {
>+        return false;
>+    }
>+    /*
>+     * Same device, different fd.  This occurs when the container fd is
>+     * cpr_save'd multiple times, once for each groupid, so SCM_RIGHTS
>+     * produces duplicates.  De-dup it.
>+     */
>+    cpr_delete_fd("vfio_container_for_group", group->groupid);
>+    close(fd);
>+    cpr_save_fd("vfio_container_for_group", group->groupid, container->fd);
>+    return true;
>+}
>--
>1.8.3.1


Reply via email to