cpr-transfer will use the device name as a key to find the value of the device descriptor in new QEMU. However, if the descriptor number is specified by a command-line fd parameter, then vfio_device_get_name creates a name that includes the fd number. This causes a chicken-and-egg problem: new QEMU must know the fd number to construct a name to find the fd number.
To fix, create an invariant name based on the id command-line parameter. If id is not defined, add a CPR blocker. Signed-off-by: Steve Sistare <steven.sist...@oracle.com> --- hw/vfio/cpr.c | 21 +++++++++++++++++++++ hw/vfio/helpers.c | 10 ++++------ hw/vfio/iommufd.c | 2 ++ include/hw/vfio/vfio-cpr.h | 4 ++++ 4 files changed, 31 insertions(+), 6 deletions(-) diff --git a/hw/vfio/cpr.c b/hw/vfio/cpr.c index a2400ca..e3ea2bf 100644 --- a/hw/vfio/cpr.c +++ b/hw/vfio/cpr.c @@ -11,6 +11,7 @@ #include "hw/vfio/pci.h" #include "hw/pci/msix.h" #include "hw/pci/msi.h" +#include "migration/blocker.h" #include "migration/cpr.h" #include "qapi/error.h" #include "system/runstate.h" @@ -184,3 +185,23 @@ const VMStateDescription vfio_cpr_pci_vmstate = { VMSTATE_END_OF_LIST() } }; + +bool vfio_cpr_set_device_name(VFIODevice *vbasedev, Error **errp) +{ + if (vbasedev->dev->id) { + vbasedev->name = g_strdup(vbasedev->dev->id); + return true; + } else { + /* + * Assign a name so any function printing it will not break, but the + * fd number changes across processes, so this cannot be used as an + * invariant name for CPR. + */ + vbasedev->name = g_strdup_printf("VFIO_FD%d", vbasedev->fd); + error_setg(&vbasedev->cpr.id_blocker, + "vfio device with fd=%d needs an id property", + vbasedev->fd); + return migrate_add_blocker_modes(&vbasedev->cpr.id_blocker, errp, + MIG_MODE_CPR_TRANSFER, -1) == 0; + } +} diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c index 4b255d4..4ff794c 100644 --- a/hw/vfio/helpers.c +++ b/hw/vfio/helpers.c @@ -29,6 +29,7 @@ #include "qapi/error.h" #include "qemu/error-report.h" #include "qemu/units.h" +#include "migration/cpr.h" #include "monitor/monitor.h" /* @@ -637,6 +638,7 @@ bool vfio_device_get_name(VFIODevice *vbasedev, Error **errp) { ERRP_GUARD(); struct stat st; + bool ret = true; if (vbasedev->fd < 0) { if (stat(vbasedev->sysfsdev, &st) < 0) { @@ -653,16 +655,12 @@ bool vfio_device_get_name(VFIODevice *vbasedev, Error **errp) error_setg(errp, "Use FD passing only with iommufd backend"); return false; } - /* - * Give a name with fd so any function printing out vbasedev->name - * will not break. - */ if (!vbasedev->name) { - vbasedev->name = g_strdup_printf("VFIO_FD%d", vbasedev->fd); + ret = vfio_cpr_set_device_name(vbasedev, errp); } } - return true; + return ret; } void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp) diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c index 7c0cdd7..2de2811 100644 --- a/hw/vfio/iommufd.c +++ b/hw/vfio/iommufd.c @@ -24,6 +24,7 @@ #include "system/reset.h" #include "qemu/cutils.h" #include "qemu/chardev_open.h" +#include "migration/blocker.h" #include "pci.h" #include "exec/ram_addr.h" @@ -661,6 +662,7 @@ static void iommufd_cdev_detach(VFIODevice *vbasedev) iommufd_cdev_container_destroy(container); vfio_put_address_space(space); + migrate_del_blocker(&vbasedev->cpr.id_blocker); iommufd_cdev_unbind_and_disconnect(vbasedev); close(vbasedev->fd); } diff --git a/include/hw/vfio/vfio-cpr.h b/include/hw/vfio/vfio-cpr.h index a9f2fbe..8a30d30 100644 --- a/include/hw/vfio/vfio-cpr.h +++ b/include/hw/vfio/vfio-cpr.h @@ -22,12 +22,14 @@ typedef struct VFIOContainerCPR { typedef struct VFIODeviceCPR { bool reused; Error *mdev_blocker; + Error *id_blocker; } VFIODeviceCPR; struct VFIOContainer; struct VFIOGroup; struct VFIOContainerBase; struct VFIOPCIDevice; +struct VFIODevice; int vfio_cpr_reboot_notifier(NotifierWithReturn *notifier, MigrationEvent *e, Error **errp); @@ -53,4 +55,6 @@ void vfio_cpr_delete_vector_fd(struct VFIOPCIDevice *vdev, const char *name, int nr); extern const VMStateDescription vfio_cpr_pci_vmstate; + +bool vfio_cpr_set_device_name(struct VFIODevice *vbasedev, Error **errp); #endif -- 1.8.3.1