Replace the current VFIO migration protocol v1 implementation with a new implementation corresponding to VFIO migration protocol v2.
The main changes are: - VFIO device state is now represented as a finite state machine instead of a bitmap. - Migration interface with kernel is now done using VFIO_DEVICE_FEATURE ioctl and normal read() and write() instead of the migration region. - As VFIO migration protocol v2 currently doesn't support the pre-copy phase of migration, .save_live_pending and .save_live_iterate handlers plus pre-copy relevant code are removed. Detailed information about VFIO migration protocol v2 and difference compared to v1 can be found here [1]. [1] https://lore.kernel.org/all/20220224142024.147653-10-yish...@nvidia.com/ Signed-off-by: Avihai Horon <avih...@nvidia.com> --- hw/vfio/common.c | 21 +- hw/vfio/migration.c | 628 +++++++--------------------------- hw/vfio/trace-events | 9 +- include/hw/vfio/vfio-common.h | 8 +- 4 files changed, 153 insertions(+), 513 deletions(-) diff --git a/hw/vfio/common.c b/hw/vfio/common.c index 29982c7af8..4c6baa5a79 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -40,6 +40,7 @@ #include "trace.h" #include "qapi/error.h" #include "migration/migration.h" +#include "migration/misc.h" #include "sysemu/tpm.h" VFIOGroupList vfio_group_list = @@ -354,8 +355,9 @@ static bool vfio_devices_all_dirty_tracking(VFIOContainer *container) return false; } - if ((vbasedev->pre_copy_dirty_page_tracking == ON_OFF_AUTO_OFF) - && (migration->device_state & VFIO_DEVICE_STATE_V1_RUNNING)) { + if ((vbasedev->pre_copy_dirty_page_tracking == ON_OFF_AUTO_OFF) && + (migration->device_state == VFIO_DEVICE_STATE_RUNNING || + migration->device_state == VFIO_DEVICE_STATE_RUNNING_P2P)) { return false; } } @@ -363,13 +365,16 @@ static bool vfio_devices_all_dirty_tracking(VFIOContainer *container) return true; } -static bool vfio_devices_all_running_and_saving(VFIOContainer *container) +/* + * Check if all VFIO devices are running and migration is active, which is + * essentially equivalent to the migration being in pre-copy phase. + */ +static bool vfio_devices_all_running_and_mig_active(VFIOContainer *container) { VFIOGroup *group; VFIODevice *vbasedev; - MigrationState *ms = migrate_get_current(); - if (!migration_is_setup_or_active(ms->state)) { + if (!migration_is_active(migrate_get_current())) { return false; } @@ -381,8 +386,8 @@ static bool vfio_devices_all_running_and_saving(VFIOContainer *container) return false; } - if ((migration->device_state & VFIO_DEVICE_STATE_V1_SAVING) && - (migration->device_state & VFIO_DEVICE_STATE_V1_RUNNING)) { + if (migration->device_state == VFIO_DEVICE_STATE_RUNNING || + migration->device_state == VFIO_DEVICE_STATE_RUNNING_P2P) { continue; } else { return false; @@ -461,7 +466,7 @@ static int vfio_dma_unmap(VFIOContainer *container, }; if (iotlb && container->dirty_pages_supported && - vfio_devices_all_running_and_saving(container)) { + vfio_devices_all_running_and_mig_active(container)) { return vfio_dma_unmap_bitmap(container, iova, size, iotlb); } diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c index d4b6653026..8943ccbace 100644 --- a/hw/vfio/migration.c +++ b/hw/vfio/migration.c @@ -44,309 +44,96 @@ #define VFIO_MIG_FLAG_DEV_SETUP_STATE (0xffffffffef100003ULL) #define VFIO_MIG_FLAG_DEV_DATA_STATE (0xffffffffef100004ULL) +#define VFIO_MIG_DATA_BUFFER_SIZE (1024 * 1024) + static int64_t bytes_transferred; -static inline int vfio_mig_access(VFIODevice *vbasedev, void *val, int count, - off_t off, bool iswrite) +static const char *mig_state_to_str(enum vfio_device_mig_state state) { - int ret; - - ret = iswrite ? pwrite(vbasedev->fd, val, count, off) : - pread(vbasedev->fd, val, count, off); - if (ret < count) { - error_report("vfio_mig_%s %d byte %s: failed at offset 0x%" - HWADDR_PRIx", err: %s", iswrite ? "write" : "read", count, - vbasedev->name, off, strerror(errno)); - return (ret < 0) ? ret : -EINVAL; + switch (state) { + case VFIO_DEVICE_STATE_ERROR: + return "ERROR"; + case VFIO_DEVICE_STATE_STOP: + return "STOP"; + case VFIO_DEVICE_STATE_RUNNING: + return "RUNNING"; + case VFIO_DEVICE_STATE_STOP_COPY: + return "STOP_COPY"; + case VFIO_DEVICE_STATE_RESUMING: + return "RESUMING"; + case VFIO_DEVICE_STATE_RUNNING_P2P: + return "RUNNING_P2P"; + default: + return "UNKNOWN STATE"; } - return 0; } -static int vfio_mig_rw(VFIODevice *vbasedev, __u8 *buf, size_t count, - off_t off, bool iswrite) -{ - int ret, done = 0; - __u8 *tbuf = buf; - - while (count) { - int bytes = 0; - - if (count >= 8 && !(off % 8)) { - bytes = 8; - } else if (count >= 4 && !(off % 4)) { - bytes = 4; - } else if (count >= 2 && !(off % 2)) { - bytes = 2; - } else { - bytes = 1; - } - - ret = vfio_mig_access(vbasedev, tbuf, bytes, off, iswrite); - if (ret) { - return ret; - } - - count -= bytes; - done += bytes; - off += bytes; - tbuf += bytes; - } - return done; -} - -#define vfio_mig_read(f, v, c, o) vfio_mig_rw(f, (__u8 *)v, c, o, false) -#define vfio_mig_write(f, v, c, o) vfio_mig_rw(f, (__u8 *)v, c, o, true) - -#define VFIO_MIG_STRUCT_OFFSET(f) \ - offsetof(struct vfio_device_migration_info, f) -/* - * Change the device_state register for device @vbasedev. Bits set in @mask - * are preserved, bits set in @value are set, and bits not set in either @mask - * or @value are cleared in device_state. If the register cannot be accessed, - * the resulting state would be invalid, or the device enters an error state, - * an error is returned. - */ - -static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t mask, - uint32_t value) +static int vfio_migration_set_state(VFIODevice *vbasedev, + enum vfio_device_mig_state new_state, + enum vfio_device_mig_state recover_state) { VFIOMigration *migration = vbasedev->migration; - VFIORegion *region = &migration->region; - off_t dev_state_off = region->fd_offset + - VFIO_MIG_STRUCT_OFFSET(device_state); - uint32_t device_state; + uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature) + + sizeof(struct vfio_device_feature_mig_state), + sizeof(uint64_t))] = {}; + struct vfio_device_feature *feature = (void *)buf; + struct vfio_device_feature_mig_state *mig_state = (void *)feature->data; int ret; - ret = vfio_mig_read(vbasedev, &device_state, sizeof(device_state), - dev_state_off); - if (ret < 0) { - return ret; - } - - device_state = (device_state & mask) | value; - - if (!VFIO_DEVICE_STATE_VALID(device_state)) { - return -EINVAL; - } - - ret = vfio_mig_write(vbasedev, &device_state, sizeof(device_state), - dev_state_off); - if (ret < 0) { - int rret; - - rret = vfio_mig_read(vbasedev, &device_state, sizeof(device_state), - dev_state_off); - - if ((rret < 0) || (VFIO_DEVICE_STATE_IS_ERROR(device_state))) { - hw_error("%s: Device in error state 0x%x", vbasedev->name, - device_state); - return rret ? rret : -EIO; + feature->argsz = sizeof(buf); + feature->flags = + VFIO_DEVICE_FEATURE_SET | VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE; + mig_state->device_state = new_state; + ret = ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature); + if (ret) { + /* Try to put the device in some good state */ + mig_state->device_state = recover_state; + if (ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature)) { + hw_error("%s: Device in error state, can't recover", + vbasedev->name); } - return ret; - } - - migration->device_state = device_state; - trace_vfio_migration_set_state(vbasedev->name, device_state); - return 0; -} -static void *get_data_section_size(VFIORegion *region, uint64_t data_offset, - uint64_t data_size, uint64_t *size) -{ - void *ptr = NULL; - uint64_t limit = 0; - int i; + error_report("%s: Failed changing device state to %s", vbasedev->name, + mig_state_to_str(new_state)); + migration->device_state = recover_state; - if (!region->mmaps) { - if (size) { - *size = MIN(data_size, region->size - data_offset); - } - return ptr; + return -1; } - for (i = 0; i < region->nr_mmaps; i++) { - VFIOMmap *map = region->mmaps + i; - - if ((data_offset >= map->offset) && - (data_offset < map->offset + map->size)) { - - /* check if data_offset is within sparse mmap areas */ - ptr = map->mmap + data_offset - map->offset; - if (size) { - *size = MIN(data_size, map->offset + map->size - data_offset); - } - break; - } else if ((data_offset < map->offset) && - (!limit || limit > map->offset)) { + if (mig_state->data_fd != -1) { + if (migration->data_fd != -1) { /* - * data_offset is not within sparse mmap areas, find size of - * non-mapped area. Check through all list since region->mmaps list - * is not sorted. + * This can happen if the device is asynchronously reset and + * terminates a data transfer. */ - limit = map->offset; - } - } - - if (!ptr && size) { - *size = limit ? MIN(data_size, limit - data_offset) : data_size; - } - return ptr; -} - -static int vfio_save_buffer(QEMUFile *f, VFIODevice *vbasedev, uint64_t *size) -{ - VFIOMigration *migration = vbasedev->migration; - VFIORegion *region = &migration->region; - uint64_t data_offset = 0, data_size = 0, sz; - int ret; - - ret = vfio_mig_read(vbasedev, &data_offset, sizeof(data_offset), - region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_offset)); - if (ret < 0) { - return ret; - } - - ret = vfio_mig_read(vbasedev, &data_size, sizeof(data_size), - region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_size)); - if (ret < 0) { - return ret; - } + error_report("%s: data_fd out of sync", vbasedev->name); + close(mig_state->data_fd); - trace_vfio_save_buffer(vbasedev->name, data_offset, data_size, - migration->pending_bytes); - - qemu_put_be64(f, data_size); - sz = data_size; - - while (sz) { - void *buf; - uint64_t sec_size; - bool buf_allocated = false; - - buf = get_data_section_size(region, data_offset, sz, &sec_size); - - if (!buf) { - buf = g_try_malloc(sec_size); - if (!buf) { - error_report("%s: Error allocating buffer ", __func__); - return -ENOMEM; - } - buf_allocated = true; - - ret = vfio_mig_read(vbasedev, buf, sec_size, - region->fd_offset + data_offset); - if (ret < 0) { - g_free(buf); - return ret; - } + return -1; } - qemu_put_buffer(f, buf, sec_size); - - if (buf_allocated) { - g_free(buf); - } - sz -= sec_size; - data_offset += sec_size; + migration->data_fd = mig_state->data_fd; } + migration->device_state = new_state; - ret = qemu_file_get_error(f); + trace_vfio_migration_set_state(vbasedev->name, new_state); - if (!ret && size) { - *size = data_size; - } - - bytes_transferred += data_size; - return ret; + return 0; } static int vfio_load_buffer(QEMUFile *f, VFIODevice *vbasedev, uint64_t data_size) -{ - VFIORegion *region = &vbasedev->migration->region; - uint64_t data_offset = 0, size, report_size; - int ret; - - do { - ret = vfio_mig_read(vbasedev, &data_offset, sizeof(data_offset), - region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_offset)); - if (ret < 0) { - return ret; - } - - if (data_offset + data_size > region->size) { - /* - * If data_size is greater than the data section of migration region - * then iterate the write buffer operation. This case can occur if - * size of migration region at destination is smaller than size of - * migration region at source. - */ - report_size = size = region->size - data_offset; - data_size -= size; - } else { - report_size = size = data_size; - data_size = 0; - } - - trace_vfio_load_state_device_data(vbasedev->name, data_offset, size); - - while (size) { - void *buf; - uint64_t sec_size; - bool buf_alloc = false; - - buf = get_data_section_size(region, data_offset, size, &sec_size); - - if (!buf) { - buf = g_try_malloc(sec_size); - if (!buf) { - error_report("%s: Error allocating buffer ", __func__); - return -ENOMEM; - } - buf_alloc = true; - } - - qemu_get_buffer(f, buf, sec_size); - - if (buf_alloc) { - ret = vfio_mig_write(vbasedev, buf, sec_size, - region->fd_offset + data_offset); - g_free(buf); - - if (ret < 0) { - return ret; - } - } - size -= sec_size; - data_offset += sec_size; - } - - ret = vfio_mig_write(vbasedev, &report_size, sizeof(report_size), - region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_size)); - if (ret < 0) { - return ret; - } - } while (data_size); - - return 0; -} - -static int vfio_update_pending(VFIODevice *vbasedev) { VFIOMigration *migration = vbasedev->migration; - VFIORegion *region = &migration->region; - uint64_t pending_bytes = 0; int ret; - ret = vfio_mig_read(vbasedev, &pending_bytes, sizeof(pending_bytes), - region->fd_offset + VFIO_MIG_STRUCT_OFFSET(pending_bytes)); - if (ret < 0) { - migration->pending_bytes = 0; + ret = qemu_file_get_to_fd(f, migration->data_fd, data_size); + if (ret) { return ret; } - migration->pending_bytes = pending_bytes; - trace_vfio_update_pending(vbasedev->name, pending_bytes); + trace_vfio_load_state_device_data(vbasedev->name, data_size); + return 0; } @@ -398,9 +185,8 @@ static void vfio_migration_cleanup(VFIODevice *vbasedev) { VFIOMigration *migration = vbasedev->migration; - if (migration->region.mmaps) { - vfio_region_unmap(&migration->region); - } + close(migration->data_fd); + migration->data_fd = -1; } /* ---------------------------------------------------------------------- */ @@ -408,44 +194,13 @@ static void vfio_migration_cleanup(VFIODevice *vbasedev) static int vfio_save_setup(QEMUFile *f, void *opaque) { VFIODevice *vbasedev = opaque; - VFIOMigration *migration = vbasedev->migration; - int ret; trace_vfio_save_setup(vbasedev->name); qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE); - - if (migration->region.mmaps) { - /* - * Calling vfio_region_mmap() from migration thread. Memory API called - * from this function require locking the iothread when called from - * outside the main loop thread. - */ - qemu_mutex_lock_iothread(); - ret = vfio_region_mmap(&migration->region); - qemu_mutex_unlock_iothread(); - if (ret) { - error_report("%s: Failed to mmap VFIO migration region: %s", - vbasedev->name, strerror(-ret)); - error_report("%s: Falling back to slow path", vbasedev->name); - } - } - - ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_MASK, - VFIO_DEVICE_STATE_V1_SAVING); - if (ret) { - error_report("%s: Failed to set state SAVING", vbasedev->name); - return ret; - } - qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); - ret = qemu_file_get_error(f); - if (ret) { - return ret; - } - - return 0; + return qemu_file_get_error(f); } static void vfio_save_cleanup(void *opaque) @@ -456,127 +211,67 @@ static void vfio_save_cleanup(void *opaque) trace_vfio_save_cleanup(vbasedev->name); } -static void vfio_save_pending(QEMUFile *f, void *opaque, - uint64_t threshold_size, - uint64_t *res_precopy_only, - uint64_t *res_compatible, - uint64_t *res_postcopy_only) -{ - VFIODevice *vbasedev = opaque; - VFIOMigration *migration = vbasedev->migration; - int ret; - - ret = vfio_update_pending(vbasedev); - if (ret) { - return; - } - - *res_precopy_only += migration->pending_bytes; - - trace_vfio_save_pending(vbasedev->name, *res_precopy_only, - *res_postcopy_only, *res_compatible); -} - -static int vfio_save_iterate(QEMUFile *f, void *opaque) +/* Returns 1 if end-of-stream is reached, 0 if more data and -1 if error */ +static int vfio_save_block(QEMUFile *f, VFIOMigration *migration) { - VFIODevice *vbasedev = opaque; - VFIOMigration *migration = vbasedev->migration; - uint64_t data_size; - int ret; - - qemu_put_be64(f, VFIO_MIG_FLAG_DEV_DATA_STATE); - - if (migration->pending_bytes == 0) { - ret = vfio_update_pending(vbasedev); - if (ret) { - return ret; - } + ssize_t data_size; - if (migration->pending_bytes == 0) { - qemu_put_be64(f, 0); - qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); - /* indicates data finished, goto complete phase */ - return 1; - } + data_size = read(migration->data_fd, migration->data_buffer, + migration->data_buffer_size); + if (data_size < 0) { + return -1; } - - ret = vfio_save_buffer(f, vbasedev, &data_size); - if (ret) { - error_report("%s: vfio_save_buffer failed %s", vbasedev->name, - strerror(errno)); - return ret; + if (data_size == 0) { + return 1; } - qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); + qemu_put_be64(f, VFIO_MIG_FLAG_DEV_DATA_STATE); + qemu_put_be64(f, data_size); + qemu_put_buffer_async(f, migration->data_buffer, data_size, false); + qemu_fflush(f); + bytes_transferred += data_size; - ret = qemu_file_get_error(f); - if (ret) { - return ret; - } + trace_vfio_save_block(migration->vbasedev->name, data_size); - /* - * Reset pending_bytes as .save_live_pending is not called during savevm or - * snapshot case, in such case vfio_update_pending() at the start of this - * function updates pending_bytes. - */ - migration->pending_bytes = 0; - trace_vfio_save_iterate(vbasedev->name, data_size); - return 0; + return qemu_file_get_error(f); } static int vfio_save_complete_precopy(QEMUFile *f, void *opaque) { VFIODevice *vbasedev = opaque; - VFIOMigration *migration = vbasedev->migration; - uint64_t data_size; + enum vfio_device_mig_state recover_state; int ret; - ret = vfio_migration_set_state(vbasedev, ~VFIO_DEVICE_STATE_V1_RUNNING, - VFIO_DEVICE_STATE_V1_SAVING); + /* We reach here with device state STOP or STOP_COPY only */ + recover_state = VFIO_DEVICE_STATE_STOP; + ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_STOP_COPY, + recover_state); if (ret) { - error_report("%s: Failed to set state STOP and SAVING", - vbasedev->name); return ret; } - ret = vfio_update_pending(vbasedev); - if (ret) { - return ret; - } - - while (migration->pending_bytes > 0) { - qemu_put_be64(f, VFIO_MIG_FLAG_DEV_DATA_STATE); - ret = vfio_save_buffer(f, vbasedev, &data_size); + do { + ret = vfio_save_block(f, vbasedev->migration); if (ret < 0) { - error_report("%s: Failed to save buffer", vbasedev->name); - return ret; - } - - if (data_size == 0) { - break; - } - - ret = vfio_update_pending(vbasedev); - if (ret) { return ret; } - } + } while (!ret); qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); - ret = qemu_file_get_error(f); if (ret) { return ret; } - ret = vfio_migration_set_state(vbasedev, ~VFIO_DEVICE_STATE_V1_SAVING, 0); + ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_STOP, + recover_state); if (ret) { - error_report("%s: Failed to set state STOPPED", vbasedev->name); return ret; } trace_vfio_save_complete_precopy(vbasedev->name); - return ret; + + return 0; } static void vfio_save_state(QEMUFile *f, void *opaque) @@ -595,28 +290,9 @@ static void vfio_save_state(QEMUFile *f, void *opaque) static int vfio_load_setup(QEMUFile *f, void *opaque) { VFIODevice *vbasedev = opaque; - VFIOMigration *migration = vbasedev->migration; - int ret = 0; - if (migration->region.mmaps) { - ret = vfio_region_mmap(&migration->region); - if (ret) { - error_report("%s: Failed to mmap VFIO migration region %d: %s", - vbasedev->name, migration->region.nr, - strerror(-ret)); - error_report("%s: Falling back to slow path", vbasedev->name); - } - } - - ret = vfio_migration_set_state(vbasedev, ~VFIO_DEVICE_STATE_MASK, - VFIO_DEVICE_STATE_V1_RESUMING); - if (ret) { - error_report("%s: Failed to set state RESUMING", vbasedev->name); - if (migration->region.mmaps) { - vfio_region_unmap(&migration->region); - } - } - return ret; + return vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_RESUMING, + vbasedev->migration->device_state); } static int vfio_load_cleanup(void *opaque) @@ -685,8 +361,6 @@ static int vfio_load_state(QEMUFile *f, void *opaque, int version_id) static SaveVMHandlers savevm_vfio_handlers = { .save_setup = vfio_save_setup, .save_cleanup = vfio_save_cleanup, - .save_live_pending = vfio_save_pending, - .save_live_iterate = vfio_save_iterate, .save_live_complete_precopy = vfio_save_complete_precopy, .save_state = vfio_save_state, .load_setup = vfio_load_setup, @@ -699,58 +373,28 @@ static SaveVMHandlers savevm_vfio_handlers = { static void vfio_vmstate_change(void *opaque, bool running, RunState state) { VFIODevice *vbasedev = opaque; - VFIOMigration *migration = vbasedev->migration; - uint32_t value, mask; + enum vfio_device_mig_state new_state; int ret; - if (vbasedev->migration->vm_running == running) { - return; - } - if (running) { - /* - * Here device state can have one of _SAVING, _RESUMING or _STOP bit. - * Transition from _SAVING to _RUNNING can happen if there is migration - * failure, in that case clear _SAVING bit. - * Transition from _RESUMING to _RUNNING occurs during resuming - * phase, in that case clear _RESUMING bit. - * In both the above cases, set _RUNNING bit. - */ - mask = ~VFIO_DEVICE_STATE_MASK; - value = VFIO_DEVICE_STATE_V1_RUNNING; + new_state = VFIO_DEVICE_STATE_RUNNING; } else { - /* - * Here device state could be either _RUNNING or _SAVING|_RUNNING. Reset - * _RUNNING bit - */ - mask = ~VFIO_DEVICE_STATE_V1_RUNNING; - - /* - * When VM state transition to stop for savevm command, device should - * start saving data. - */ - if (state == RUN_STATE_SAVE_VM) { - value = VFIO_DEVICE_STATE_V1_SAVING; - } else { - value = 0; - } + new_state = VFIO_DEVICE_STATE_STOP; } - ret = vfio_migration_set_state(vbasedev, mask, value); + ret = + vfio_migration_set_state(vbasedev, new_state, VFIO_DEVICE_STATE_ERROR); if (ret) { /* * Migration should be aborted in this case, but vm_state_notify() * currently does not support reporting failures. */ - error_report("%s: Failed to set device state 0x%x", vbasedev->name, - (migration->device_state & mask) | value); if (migrate_get_current()->to_dst_file) { qemu_file_set_error(migrate_get_current()->to_dst_file, ret); } } - vbasedev->migration->vm_running = running; trace_vfio_vmstate_change(vbasedev->name, running, RunState_str(state), - (migration->device_state & mask) | value); + new_state); } static void vfio_migration_state_notifier(Notifier *notifier, void *data) @@ -759,7 +403,6 @@ static void vfio_migration_state_notifier(Notifier *notifier, void *data) VFIOMigration *migration = container_of(notifier, VFIOMigration, migration_state); VFIODevice *vbasedev = migration->vbasedev; - int ret; trace_vfio_migration_state_notifier(vbasedev->name, MigrationStatus_str(s->state)); @@ -769,34 +412,45 @@ static void vfio_migration_state_notifier(Notifier *notifier, void *data) case MIGRATION_STATUS_CANCELLED: case MIGRATION_STATUS_FAILED: bytes_transferred = 0; - ret = vfio_migration_set_state( - vbasedev, - ~(VFIO_DEVICE_STATE_V1_SAVING | VFIO_DEVICE_STATE_V1_RESUMING), - VFIO_DEVICE_STATE_V1_RUNNING); - if (ret) { - error_report("%s: Failed to set state RUNNING", vbasedev->name); - } + vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_RUNNING, + VFIO_DEVICE_STATE_ERROR); } } static void vfio_migration_exit(VFIODevice *vbasedev) { - VFIOMigration *migration = vbasedev->migration; - - vfio_region_exit(&migration->region); - vfio_region_finalize(&migration->region); + g_free(vbasedev->migration->data_buffer); g_free(vbasedev->migration); vbasedev->migration = NULL; } -static int vfio_migration_init(VFIODevice *vbasedev, - struct vfio_region_info *info) +static int vfio_migration_query_flags(VFIODevice *vbasedev, uint64_t *mig_flags) +{ + uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature) + + sizeof(struct vfio_device_feature_migration), + sizeof(uint64_t))] = {}; + struct vfio_device_feature *feature = (void *)buf; + struct vfio_device_feature_migration *mig = (void *)feature->data; + + feature->argsz = sizeof(buf); + feature->flags = VFIO_DEVICE_FEATURE_GET | VFIO_DEVICE_FEATURE_MIGRATION; + if (ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature)) { + return -EOPNOTSUPP; + } + + *mig_flags = mig->flags; + + return 0; +} + +static int vfio_migration_init(VFIODevice *vbasedev) { - int ret; Object *obj; VFIOMigration *migration; char id[256] = ""; g_autofree char *path = NULL, *oid = NULL; + uint64_t mig_flags; + int ret; if (!vbasedev->ops->vfio_get_object) { return -EINVAL; @@ -807,25 +461,23 @@ static int vfio_migration_init(VFIODevice *vbasedev, return -EINVAL; } - vbasedev->migration = g_new0(VFIOMigration, 1); - - ret = vfio_region_setup(obj, vbasedev, &vbasedev->migration->region, - info->index, "migration"); + ret = vfio_migration_query_flags(vbasedev, &mig_flags); if (ret) { - error_report("%s: Failed to setup VFIO migration region %d: %s", - vbasedev->name, info->index, strerror(-ret)); - goto err; + return ret; } - if (!vbasedev->migration->region.size) { - error_report("%s: Invalid zero-sized VFIO migration region %d", - vbasedev->name, info->index); - ret = -EINVAL; - goto err; + /* Basic migration functionality must be supported */ + if (!(mig_flags & VFIO_MIGRATION_STOP_COPY)) { + return -EOPNOTSUPP; } + vbasedev->migration = g_new0(VFIOMigration, 1); + vbasedev->migration->data_buffer_size = VFIO_MIG_DATA_BUFFER_SIZE; + vbasedev->migration->data_buffer = + g_malloc0(vbasedev->migration->data_buffer_size); migration = vbasedev->migration; migration->vbasedev = vbasedev; + migration->data_fd = -1; oid = vmstate_if_get_id(VMSTATE_IF(DEVICE(obj))); if (oid) { @@ -837,17 +489,13 @@ static int vfio_migration_init(VFIODevice *vbasedev, register_savevm_live(id, VMSTATE_INSTANCE_ID_ANY, 1, &savevm_vfio_handlers, vbasedev); - migration->vm_state = qdev_add_vm_change_state_handler(vbasedev->dev, vfio_vmstate_change, vbasedev); migration->migration_state.notify = vfio_migration_state_notifier; add_migration_state_change_notifier(&migration->migration_state); - return 0; -err: - vfio_migration_exit(vbasedev); - return ret; + return 0; } /* ---------------------------------------------------------------------- */ @@ -860,7 +508,6 @@ int64_t vfio_mig_bytes_transferred(void) int vfio_migration_probe(VFIODevice *vbasedev, Error **errp) { VFIOContainer *container = vbasedev->group->container; - struct vfio_region_info *info = NULL; int ret = -ENOTSUP; if (!vbasedev->enable_migration) { @@ -874,27 +521,18 @@ int vfio_migration_probe(VFIODevice *vbasedev, Error **errp) migrate_get_current()->skip_precopy = true; } - ret = vfio_get_dev_region_info(vbasedev, - VFIO_REGION_TYPE_MIGRATION_DEPRECATED, - VFIO_REGION_SUBTYPE_MIGRATION_DEPRECATED, - &info); + ret = vfio_migration_init(vbasedev); if (ret) { goto add_blocker; } - ret = vfio_migration_init(vbasedev, info); - if (ret) { - goto add_blocker; - } + trace_vfio_migration_probe(vbasedev->name); - trace_vfio_migration_probe(vbasedev->name, info->index); - g_free(info); return 0; add_blocker: error_setg(&vbasedev->migration_blocker, "VFIO device doesn't support migration"); - g_free(info); ret = migrate_add_blocker(vbasedev->migration_blocker, errp); if (ret < 0) { diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events index 582882db91..a24ea7d8b0 100644 --- a/hw/vfio/trace-events +++ b/hw/vfio/trace-events @@ -148,21 +148,18 @@ vfio_display_edid_update(uint32_t prefx, uint32_t prefy) "%ux%u" vfio_display_edid_write_error(void) "" # migration.c -vfio_migration_probe(const char *name, uint32_t index) " (%s) Region %d" +vfio_migration_probe(const char *name) " (%s)" vfio_migration_set_state(const char *name, uint32_t state) " (%s) state %d" vfio_vmstate_change(const char *name, int running, const char *reason, uint32_t dev_state) " (%s) running %d reason %s device state %d" vfio_migration_state_notifier(const char *name, const char *state) " (%s) state %s" vfio_save_setup(const char *name) " (%s)" vfio_save_cleanup(const char *name) " (%s)" -vfio_save_buffer(const char *name, uint64_t data_offset, uint64_t data_size, uint64_t pending) " (%s) Offset 0x%"PRIx64" size 0x%"PRIx64" pending 0x%"PRIx64 -vfio_update_pending(const char *name, uint64_t pending) " (%s) pending 0x%"PRIx64 vfio_save_device_config_state(const char *name) " (%s)" -vfio_save_pending(const char *name, uint64_t precopy, uint64_t postcopy, uint64_t compatible) " (%s) precopy 0x%"PRIx64" postcopy 0x%"PRIx64" compatible 0x%"PRIx64 -vfio_save_iterate(const char *name, int data_size) " (%s) data_size %d" vfio_save_complete_precopy(const char *name) " (%s)" vfio_load_device_config_state(const char *name) " (%s)" vfio_load_state(const char *name, uint64_t data) " (%s) data 0x%"PRIx64 -vfio_load_state_device_data(const char *name, uint64_t data_offset, uint64_t data_size) " (%s) Offset 0x%"PRIx64" size 0x%"PRIx64 +vfio_load_state_device_data(const char *name, uint64_t data_size) " (%s) size 0x%"PRIx64 vfio_load_cleanup(const char *name) " (%s)" vfio_get_dirty_bitmap(int fd, uint64_t iova, uint64_t size, uint64_t bitmap_size, uint64_t start) "container fd=%d, iova=0x%"PRIx64" size= 0x%"PRIx64" bitmap_size=0x%"PRIx64" start=0x%"PRIx64 vfio_iommu_map_dirty_notify(uint64_t iova_start, uint64_t iova_end) "iommu dirty @ 0x%"PRIx64" - 0x%"PRIx64 +vfio_save_block(const char *name, int data_size) " (%s) data_size %d" diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h index e573f5a9f1..09446a9082 100644 --- a/include/hw/vfio/vfio-common.h +++ b/include/hw/vfio/vfio-common.h @@ -61,11 +61,11 @@ typedef struct VFIORegion { typedef struct VFIOMigration { struct VFIODevice *vbasedev; VMChangeStateEntry *vm_state; - VFIORegion region; - uint32_t device_state; - int vm_running; + enum vfio_device_mig_state device_state; + int data_fd; Notifier migration_state; - uint64_t pending_bytes; + void *data_buffer; + size_t data_buffer_size; } VFIOMigration; typedef struct VFIOAddressSpace { -- 2.21.3