VFIO migration is not compatible with postcopy migration. A VFIO device in the destination can't handle page faults for pages that have not been sent yet.
Doing such migration will cause the VM to crash in the destination: qemu-system-x86_64: VFIO_MAP_DMA failed: Bad address qemu-system-x86_64: vfio_dma_map(0x55a28c7659d0, 0xc0000, 0xb000, 0x7f1b11a00000) = -14 (Bad address) qemu: hardware error: vfio: DMA mapping failed, unable to continue To prevent this and to be explicit about supported features, block VFIO migration with postcopy migration: Fail setting postcopy capability if a VFIO device is present, and add a migration blocker if a VFIO device is added when postcopy capability is on. Reported-by: Yanghang Liu <yangh...@redhat.com> Signed-off-by: Avihai Horon <avih...@nvidia.com> --- include/hw/vfio/vfio-common.h | 2 ++ migration/migration.h | 2 ++ hw/vfio/common.c | 43 +++++++++++++++++++++++++++++++++++ hw/vfio/migration.c | 6 +++++ migration/options.c | 19 ++++++++++++++++ migration/target.c | 19 ++++++++++++++++ 6 files changed, 91 insertions(+) diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h index e9b8954595..c0b58f2bb7 100644 --- a/include/hw/vfio/vfio-common.h +++ b/include/hw/vfio/vfio-common.h @@ -227,6 +227,8 @@ extern VFIOGroupList vfio_group_list; bool vfio_mig_active(void); int vfio_block_multiple_devices_migration(VFIODevice *vbasedev, Error **errp); void vfio_unblock_multiple_devices_migration(void); +int vfio_block_postcopy_migration(VFIODevice *vbasedev, Error **errp); +void vfio_unblock_postcopy_migration(void); bool vfio_viommu_preset(VFIODevice *vbasedev); int64_t vfio_mig_bytes_transferred(void); void vfio_reset_bytes_transferred(void); diff --git a/migration/migration.h b/migration/migration.h index c5695de214..21a6423408 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -514,6 +514,8 @@ void migration_cancel(const Error *error); void migration_populate_vfio_info(MigrationInfo *info); void migration_reset_vfio_bytes_transferred(void); +bool migration_vfio_mig_active(void); +void migration_vfio_unblock_postcopy_migration(void); void postcopy_temp_page_reset(PostcopyTmpPage *tmp_page); #endif diff --git a/hw/vfio/common.c b/hw/vfio/common.c index 373f6e5932..7461194b2b 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -40,6 +40,7 @@ #include "trace.h" #include "qapi/error.h" #include "migration/migration.h" +#include "migration/options.h" #include "migration/misc.h" #include "migration/blocker.h" #include "migration/qemu-file.h" @@ -343,6 +344,7 @@ static int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova, uint64_t size, ram_addr_t ram_addr); static Error *multiple_devices_migration_blocker; +static Error *postcopy_migration_blocker; static unsigned int vfio_migratable_devices_num(void) { @@ -427,6 +429,47 @@ void vfio_unblock_multiple_devices_migration(void) multiple_devices_migration_blocker = NULL; } +int vfio_block_postcopy_migration(VFIODevice *vbasedev, Error **errp) +{ + int ret; + + if (!migrate_postcopy_ram()) { + return 0; + } + + if (vbasedev->enable_migration == ON_OFF_AUTO_ON) { + error_setg(errp, + "VFIO migration is not compatible with postcopy migration"); + return -EINVAL; + } + + if (postcopy_migration_blocker) { + return 0; + } + + error_setg(&postcopy_migration_blocker, + "VFIO migration is not compatible with postcopy migration"); + ret = migrate_add_blocker(postcopy_migration_blocker, errp); + if (ret < 0) { + error_free(postcopy_migration_blocker); + postcopy_migration_blocker = NULL; + } + + return ret; +} + +void vfio_unblock_postcopy_migration(void) +{ + if (!postcopy_migration_blocker || + (vfio_migratable_devices_num() && migrate_postcopy_ram())) { + return; + } + + migrate_del_blocker(postcopy_migration_blocker); + error_free(postcopy_migration_blocker); + postcopy_migration_blocker = NULL; +} + bool vfio_mig_active(void) { return vfio_migratable_devices_num(); diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c index 71855468fe..76406e9ae9 100644 --- a/hw/vfio/migration.c +++ b/hw/vfio/migration.c @@ -856,6 +856,7 @@ static void vfio_migration_deinit(VFIODevice *vbasedev) unregister_savevm(VMSTATE_IF(vbasedev->dev), "vfio", vbasedev); vfio_migration_free(vbasedev); vfio_unblock_multiple_devices_migration(); + vfio_unblock_postcopy_migration(); } static int vfio_block_migration(VFIODevice *vbasedev, Error *err, Error **errp) @@ -939,6 +940,11 @@ bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp) goto out_deinit; } + ret = vfio_block_postcopy_migration(vbasedev, errp); + if (ret) { + goto out_deinit; + } + if (vfio_viommu_preset(vbasedev)) { error_setg(&err, "%s: Migration is currently not supported " "with vIOMMU enabled", vbasedev->name); diff --git a/migration/options.c b/migration/options.c index 1d1e1321b0..e201053563 100644 --- a/migration/options.c +++ b/migration/options.c @@ -499,6 +499,11 @@ bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp) error_setg(errp, "Postcopy is not yet compatible with multifd"); return false; } + + if (migration_vfio_mig_active()) { + error_setg(errp, "Postcopy is not compatible with VFIO migration"); + return false; + } } if (new_caps[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]) { @@ -612,6 +617,16 @@ bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp) return true; } +/* + * Devices might have added migration blockers based on migration capabilities + * values when those devices were added. Remove such blockers according to new + * changes in migration capabilities. + */ +static void migration_caps_remove_blockers(void) +{ + migration_vfio_unblock_postcopy_migration(); +} + bool migrate_cap_set(int cap, bool value, Error **errp) { MigrationState *s = migrate_get_current(); @@ -629,6 +644,8 @@ bool migrate_cap_set(int cap, bool value, Error **errp) return false; } s->capabilities[cap] = value; + migration_caps_remove_blockers(); + return true; } @@ -678,6 +695,8 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, for (cap = params; cap; cap = cap->next) { s->capabilities[cap->value->capability] = cap->value->state; } + + migration_caps_remove_blockers(); } /* parameters */ diff --git a/migration/target.c b/migration/target.c index a6ffa9a5ce..690ecb4dd5 100644 --- a/migration/target.c +++ b/migration/target.c @@ -27,6 +27,16 @@ void migration_reset_vfio_bytes_transferred(void) { vfio_reset_bytes_transferred(); } + +bool migration_vfio_mig_active(void) +{ + return vfio_mig_active(); +} + +void migration_vfio_unblock_postcopy_migration(void) +{ + vfio_unblock_postcopy_migration(); +} #else void migration_populate_vfio_info(MigrationInfo *info) { @@ -35,4 +45,13 @@ void migration_populate_vfio_info(MigrationInfo *info) void migration_reset_vfio_bytes_transferred(void) { } + +bool migration_vfio_mig_active(void) +{ + return false; +} + +void migration_vfio_unblock_postcopy_migration() +{ +} #endif -- 2.26.3