VFIO migration is not compatible with postcopy migration. A VFIO device
in the destination can't handle page faults for pages that have not been
sent yet.

Doing such migration will cause the VM to crash in the destination:

qemu-system-x86_64: VFIO_MAP_DMA failed: Bad address
qemu-system-x86_64: vfio_dma_map(0x55a28c7659d0, 0xc0000, 0xb000, 
0x7f1b11a00000) = -14 (Bad address)
qemu: hardware error: vfio: DMA mapping failed, unable to continue

To prevent this and to be explicit about supported features, block VFIO
migration with postcopy migration: Fail setting postcopy capability if a
VFIO device is present, and add a migration blocker if a VFIO device is
added when postcopy capability is on.

Reported-by: Yanghang Liu <yangh...@redhat.com>
Signed-off-by: Avihai Horon <avih...@nvidia.com>
---
 include/hw/vfio/vfio-common.h |  2 ++
 migration/migration.h         |  2 ++
 hw/vfio/common.c              | 43 +++++++++++++++++++++++++++++++++++
 hw/vfio/migration.c           |  6 +++++
 migration/options.c           | 19 ++++++++++++++++
 migration/target.c            | 19 ++++++++++++++++
 6 files changed, 91 insertions(+)

diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index e9b8954595..c0b58f2bb7 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -227,6 +227,8 @@ extern VFIOGroupList vfio_group_list;
 bool vfio_mig_active(void);
 int vfio_block_multiple_devices_migration(VFIODevice *vbasedev, Error **errp);
 void vfio_unblock_multiple_devices_migration(void);
+int vfio_block_postcopy_migration(VFIODevice *vbasedev, Error **errp);
+void vfio_unblock_postcopy_migration(void);
 bool vfio_viommu_preset(VFIODevice *vbasedev);
 int64_t vfio_mig_bytes_transferred(void);
 void vfio_reset_bytes_transferred(void);
diff --git a/migration/migration.h b/migration/migration.h
index c5695de214..21a6423408 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -514,6 +514,8 @@ void migration_cancel(const Error *error);
 
 void migration_populate_vfio_info(MigrationInfo *info);
 void migration_reset_vfio_bytes_transferred(void);
+bool migration_vfio_mig_active(void);
+void migration_vfio_unblock_postcopy_migration(void);
 void postcopy_temp_page_reset(PostcopyTmpPage *tmp_page);
 
 #endif
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 373f6e5932..7461194b2b 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -40,6 +40,7 @@
 #include "trace.h"
 #include "qapi/error.h"
 #include "migration/migration.h"
+#include "migration/options.h"
 #include "migration/misc.h"
 #include "migration/blocker.h"
 #include "migration/qemu-file.h"
@@ -343,6 +344,7 @@ static int vfio_get_dirty_bitmap(VFIOContainer *container, 
uint64_t iova,
                                  uint64_t size, ram_addr_t ram_addr);
 
 static Error *multiple_devices_migration_blocker;
+static Error *postcopy_migration_blocker;
 
 static unsigned int vfio_migratable_devices_num(void)
 {
@@ -427,6 +429,47 @@ void vfio_unblock_multiple_devices_migration(void)
     multiple_devices_migration_blocker = NULL;
 }
 
+int vfio_block_postcopy_migration(VFIODevice *vbasedev, Error **errp)
+{
+    int ret;
+
+    if (!migrate_postcopy_ram()) {
+        return 0;
+    }
+
+    if (vbasedev->enable_migration == ON_OFF_AUTO_ON) {
+        error_setg(errp,
+                   "VFIO migration is not compatible with postcopy migration");
+        return -EINVAL;
+    }
+
+    if (postcopy_migration_blocker) {
+        return 0;
+    }
+
+    error_setg(&postcopy_migration_blocker,
+               "VFIO migration is not compatible with postcopy migration");
+    ret = migrate_add_blocker(postcopy_migration_blocker, errp);
+    if (ret < 0) {
+        error_free(postcopy_migration_blocker);
+        postcopy_migration_blocker = NULL;
+    }
+
+    return ret;
+}
+
+void vfio_unblock_postcopy_migration(void)
+{
+    if (!postcopy_migration_blocker ||
+        (vfio_migratable_devices_num() && migrate_postcopy_ram())) {
+        return;
+    }
+
+    migrate_del_blocker(postcopy_migration_blocker);
+    error_free(postcopy_migration_blocker);
+    postcopy_migration_blocker = NULL;
+}
+
 bool vfio_mig_active(void)
 {
     return vfio_migratable_devices_num();
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index 71855468fe..76406e9ae9 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -856,6 +856,7 @@ static void vfio_migration_deinit(VFIODevice *vbasedev)
     unregister_savevm(VMSTATE_IF(vbasedev->dev), "vfio", vbasedev);
     vfio_migration_free(vbasedev);
     vfio_unblock_multiple_devices_migration();
+    vfio_unblock_postcopy_migration();
 }
 
 static int vfio_block_migration(VFIODevice *vbasedev, Error *err, Error **errp)
@@ -939,6 +940,11 @@ bool vfio_migration_realize(VFIODevice *vbasedev, Error 
**errp)
         goto out_deinit;
     }
 
+    ret = vfio_block_postcopy_migration(vbasedev, errp);
+    if (ret) {
+        goto out_deinit;
+    }
+
     if (vfio_viommu_preset(vbasedev)) {
         error_setg(&err, "%s: Migration is currently not supported "
                    "with vIOMMU enabled", vbasedev->name);
diff --git a/migration/options.c b/migration/options.c
index 1d1e1321b0..e201053563 100644
--- a/migration/options.c
+++ b/migration/options.c
@@ -499,6 +499,11 @@ bool migrate_caps_check(bool *old_caps, bool *new_caps, 
Error **errp)
             error_setg(errp, "Postcopy is not yet compatible with multifd");
             return false;
         }
+
+        if (migration_vfio_mig_active()) {
+            error_setg(errp, "Postcopy is not compatible with VFIO migration");
+            return false;
+        }
     }
 
     if (new_caps[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]) {
@@ -612,6 +617,16 @@ bool migrate_caps_check(bool *old_caps, bool *new_caps, 
Error **errp)
     return true;
 }
 
+/*
+ * Devices might have added migration blockers based on migration capabilities
+ * values when those devices were added. Remove such blockers according to new
+ * changes in migration capabilities.
+ */
+static void migration_caps_remove_blockers(void)
+{
+    migration_vfio_unblock_postcopy_migration();
+}
+
 bool migrate_cap_set(int cap, bool value, Error **errp)
 {
     MigrationState *s = migrate_get_current();
@@ -629,6 +644,8 @@ bool migrate_cap_set(int cap, bool value, Error **errp)
         return false;
     }
     s->capabilities[cap] = value;
+    migration_caps_remove_blockers();
+
     return true;
 }
 
@@ -678,6 +695,8 @@ void 
qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
     for (cap = params; cap; cap = cap->next) {
         s->capabilities[cap->value->capability] = cap->value->state;
     }
+
+    migration_caps_remove_blockers();
 }
 
 /* parameters */
diff --git a/migration/target.c b/migration/target.c
index a6ffa9a5ce..690ecb4dd5 100644
--- a/migration/target.c
+++ b/migration/target.c
@@ -27,6 +27,16 @@ void migration_reset_vfio_bytes_transferred(void)
 {
     vfio_reset_bytes_transferred();
 }
+
+bool migration_vfio_mig_active(void)
+{
+    return vfio_mig_active();
+}
+
+void migration_vfio_unblock_postcopy_migration(void)
+{
+    vfio_unblock_postcopy_migration();
+}
 #else
 void migration_populate_vfio_info(MigrationInfo *info)
 {
@@ -35,4 +45,13 @@ void migration_populate_vfio_info(MigrationInfo *info)
 void migration_reset_vfio_bytes_transferred(void)
 {
 }
+
+bool migration_vfio_mig_active(void)
+{
+    return false;
+}
+
+void migration_vfio_unblock_postcopy_migration()
+{
+}
 #endif
-- 
2.26.3


Reply via email to