From: "Maciej S. Szmigiero" <maciej.szmigi...@oracle.com> The multifd received data needs to be reassembled since device state packets sent via different multifd channels can arrive out-of-order.
Therefore, each VFIO device state packet carries a header indicating its position in the stream. The raw device state data is saved into a VFIOStateBuffer for later in-order loading into the device. The last such VFIO device state packet should have VFIO_DEVICE_STATE_CONFIG_STATE flag set and carry the device config state. Signed-off-by: Maciej S. Szmigiero <maciej.szmigi...@oracle.com> --- hw/vfio/migration.c | 116 ++++++++++++++++++++++++++++++++++ hw/vfio/pci.c | 2 + hw/vfio/trace-events | 1 + include/hw/vfio/vfio-common.h | 1 + 4 files changed, 120 insertions(+) diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c index bcdf204d5cf4..0c0caec1bd64 100644 --- a/hw/vfio/migration.c +++ b/hw/vfio/migration.c @@ -301,6 +301,12 @@ typedef struct VFIOStateBuffer { } VFIOStateBuffer; typedef struct VFIOMultifd { + VFIOStateBuffers load_bufs; + QemuCond load_bufs_buffer_ready_cond; + QemuMutex load_bufs_mutex; /* Lock order: this lock -> BQL */ + uint32_t load_buf_idx; + uint32_t load_buf_idx_last; + uint32_t load_buf_queued_pending_buffers; } VFIOMultifd; static void vfio_state_buffer_clear(gpointer data) @@ -346,6 +352,103 @@ static VFIOStateBuffer *vfio_state_buffers_at(VFIOStateBuffers *bufs, guint idx) return &g_array_index(bufs->array, VFIOStateBuffer, idx); } +static bool vfio_load_state_buffer_insert(VFIODevice *vbasedev, + VFIODeviceStatePacket *packet, + size_t packet_total_size, + Error **errp) +{ + VFIOMigration *migration = vbasedev->migration; + VFIOMultifd *multifd = migration->multifd; + VFIOStateBuffer *lb; + + vfio_state_buffers_assert_init(&multifd->load_bufs); + if (packet->idx >= vfio_state_buffers_size_get(&multifd->load_bufs)) { + vfio_state_buffers_size_set(&multifd->load_bufs, packet->idx + 1); + } + + lb = vfio_state_buffers_at(&multifd->load_bufs, packet->idx); + if (lb->is_present) { + error_setg(errp, "state buffer %" PRIu32 " already filled", + packet->idx); + return false; + } + + assert(packet->idx >= multifd->load_buf_idx); + + multifd->load_buf_queued_pending_buffers++; + if (multifd->load_buf_queued_pending_buffers > + vbasedev->migration_max_queued_buffers) { + error_setg(errp, + "queuing state buffer %" PRIu32 " would exceed the max of %" PRIu64, + packet->idx, vbasedev->migration_max_queued_buffers); + return false; + } + + lb->data = g_memdup2(&packet->data, packet_total_size - sizeof(*packet)); + lb->len = packet_total_size - sizeof(*packet); + lb->is_present = true; + + return true; +} + +static bool vfio_load_state_buffer(void *opaque, char *data, size_t data_size, + Error **errp) +{ + VFIODevice *vbasedev = opaque; + VFIOMigration *migration = vbasedev->migration; + VFIOMultifd *multifd = migration->multifd; + VFIODeviceStatePacket *packet = (VFIODeviceStatePacket *)data; + + /* + * Holding BQL here would violate the lock order and can cause + * a deadlock once we attempt to lock load_bufs_mutex below. + */ + assert(!bql_locked()); + + if (!migration->multifd_transfer) { + error_setg(errp, + "got device state packet but not doing multifd transfer"); + return false; + } + + assert(multifd); + + if (data_size < sizeof(*packet)) { + error_setg(errp, "packet too short at %zu (min is %zu)", + data_size, sizeof(*packet)); + return false; + } + + if (packet->version != 0) { + error_setg(errp, "packet has unknown version %" PRIu32, + packet->version); + return false; + } + + if (packet->idx == UINT32_MAX) { + error_setg(errp, "packet has too high idx %" PRIu32, + packet->idx); + return false; + } + + trace_vfio_load_state_device_buffer_incoming(vbasedev->name, packet->idx); + + QEMU_LOCK_GUARD(&multifd->load_bufs_mutex); + + /* config state packet should be the last one in the stream */ + if (packet->flags & VFIO_DEVICE_STATE_CONFIG_STATE) { + multifd->load_buf_idx_last = packet->idx; + } + + if (!vfio_load_state_buffer_insert(vbasedev, packet, data_size, errp)) { + return false; + } + + qemu_cond_signal(&multifd->load_bufs_buffer_ready_cond); + + return true; +} + static int vfio_save_device_config_state(QEMUFile *f, void *opaque, Error **errp) { @@ -405,11 +508,23 @@ static VFIOMultifd *vfio_multifd_new(void) { VFIOMultifd *multifd = g_new(VFIOMultifd, 1); + vfio_state_buffers_init(&multifd->load_bufs); + + qemu_mutex_init(&multifd->load_bufs_mutex); + + multifd->load_buf_idx = 0; + multifd->load_buf_idx_last = UINT32_MAX; + multifd->load_buf_queued_pending_buffers = 0; + qemu_cond_init(&multifd->load_bufs_buffer_ready_cond); + return multifd; } static void vfio_multifd_free(VFIOMultifd *multifd) { + qemu_cond_destroy(&multifd->load_bufs_buffer_ready_cond); + qemu_mutex_destroy(&multifd->load_bufs_mutex); + g_free(multifd); } @@ -940,6 +1055,7 @@ static const SaveVMHandlers savevm_vfio_handlers = { .load_setup = vfio_load_setup, .load_cleanup = vfio_load_cleanup, .load_state = vfio_load_state, + .load_state_buffer = vfio_load_state_buffer, .switchover_ack_needed = vfio_switchover_ack_needed, }; diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 83090c544d95..2700b355ecf1 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -3380,6 +3380,8 @@ static const Property vfio_pci_dev_properties[] = { DEFINE_PROP_ON_OFF_AUTO("x-migration-load-config-after-iter", VFIOPCIDevice, vbasedev.migration_load_config_after_iter, ON_OFF_AUTO_AUTO), + DEFINE_PROP_UINT64("x-migration-max-queued-buffers", VFIOPCIDevice, + vbasedev.migration_max_queued_buffers, UINT64_MAX), DEFINE_PROP_BOOL("migration-events", VFIOPCIDevice, vbasedev.migration_events, false), DEFINE_PROP_BOOL("x-no-mmap", VFIOPCIDevice, vbasedev.no_mmap, false), diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events index 1bebe9877d88..042a3dc54a33 100644 --- a/hw/vfio/trace-events +++ b/hw/vfio/trace-events @@ -153,6 +153,7 @@ vfio_load_device_config_state_start(const char *name) " (%s)" vfio_load_device_config_state_end(const char *name) " (%s)" vfio_load_state(const char *name, uint64_t data) " (%s) data 0x%"PRIx64 vfio_load_state_device_data(const char *name, uint64_t data_size, int ret) " (%s) size %"PRIu64" ret %d" +vfio_load_state_device_buffer_incoming(const char *name, uint32_t idx) " (%s) idx %"PRIu32 vfio_migration_realize(const char *name) " (%s)" vfio_migration_set_device_state(const char *name, const char *state) " (%s) state %s" vfio_migration_set_state(const char *name, const char *new_state, const char *recover_state) " (%s) new state %s, recover state %s" diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h index c0c9c0b1b263..0e8b0848882e 100644 --- a/include/hw/vfio/vfio-common.h +++ b/include/hw/vfio/vfio-common.h @@ -139,6 +139,7 @@ typedef struct VFIODevice { OnOffAuto enable_migration; OnOffAuto migration_multifd_transfer; OnOffAuto migration_load_config_after_iter; + uint64_t migration_max_queued_buffers; bool migration_events; VFIODeviceOps *ops; unsigned int num_irqs;