On 26.02.2025 17:43, Cédric Le Goater wrote:
On 2/19/25 21:34, Maciej S. Szmigiero wrote:
From: "Maciej S. Szmigiero" <maciej.szmigi...@oracle.com>
Implement the multifd device state transfer via additional per-device
thread inside save_live_complete_precopy_thread handler.
Switch between doing the data transfer in the new handler and doing it
in the old save_state handler depending on the
x-migration-multifd-transfer device property value.
Signed-off-by: Maciej S. Szmigiero <maciej.szmigi...@oracle.com>
---
hw/vfio/migration-multifd.c | 139 ++++++++++++++++++++++++++++++++++
hw/vfio/migration-multifd.h | 5 ++
hw/vfio/migration.c | 26 +++++--
hw/vfio/trace-events | 2 +
include/hw/vfio/vfio-common.h | 8 ++
5 files changed, 174 insertions(+), 6 deletions(-)
diff --git a/hw/vfio/migration-multifd.c b/hw/vfio/migration-multifd.c
index 7200f6f1c2a2..0cfa9d31732a 100644
--- a/hw/vfio/migration-multifd.c
+++ b/hw/vfio/migration-multifd.c
@@ -476,6 +476,145 @@ bool vfio_multifd_transfer_setup(VFIODevice *vbasedev,
Error **errp)
return true;
}
+void vfio_multifd_emit_dummy_eos(VFIODevice *vbasedev, QEMUFile *f)
+{
+ assert(vfio_multifd_transfer_enabled(vbasedev));
+
+ /*
+ * Emit dummy NOP data on the main migration channel since the actual
+ * device state transfer is done via multifd channels.
+ */
+ qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
+}
+
+static bool
+vfio_save_complete_precopy_thread_config_state(VFIODevice *vbasedev,
+ char *idstr,
+ uint32_t instance_id,
+ uint32_t idx,
+ Error **errp)
+{
+ g_autoptr(QIOChannelBuffer) bioc = NULL;
+ g_autoptr(QEMUFile) f = NULL;
+ int ret;
+ g_autofree VFIODeviceStatePacket *packet = NULL;
+ size_t packet_len;
+
+ bioc = qio_channel_buffer_new(0);
+ qio_channel_set_name(QIO_CHANNEL(bioc), "vfio-device-config-save");
+
+ f = qemu_file_new_output(QIO_CHANNEL(bioc));
+
+ if (vfio_save_device_config_state(f, vbasedev, errp)) {
+ return false;
+ }
+
+ ret = qemu_fflush(f);
+ if (ret) {
+ error_setg(errp, "save config state flush failed: %d", ret);
+ return false;
+ }
+
+ packet_len = sizeof(*packet) + bioc->usage;
+ packet = g_malloc0(packet_len);
+ packet->version = VFIO_DEVICE_STATE_PACKET_VER_CURRENT;
+ packet->idx = idx;
+ packet->flags = VFIO_DEVICE_STATE_CONFIG_STATE;
+ memcpy(&packet->data, bioc->data, bioc->usage);
+
+ if (!multifd_queue_device_state(idstr, instance_id,
+ (char *)packet, packet_len)) {
+ error_setg(errp, "multifd config data queuing failed");
+ return false;
+ }
+
+ vfio_add_bytes_transferred(packet_len);
+
+ return true;
+}
+
+/*
+ * This thread is spawned by the migration core directly via
+ * .save_live_complete_precopy_thread SaveVMHandler.
+ *
+ * It exits after either:
+ * * completing saving the remaining device state and device config, OR:
+ * * encountering some error while doing the above, OR:
+ * * being forcefully aborted by the migration core by
+ * multifd_device_state_save_thread_should_exit() returning true.
+ */
+bool vfio_save_complete_precopy_thread(SaveLiveCompletePrecopyThreadData *d,
+ Error **errp)
In qemu_savevm_state_complete_precopy_iterable(), this handler is
called :
....
if (multifd_device_state) {
QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
SaveLiveCompletePrecopyThreadHandler hdlr;
if (!se->ops || (in_postcopy && se->ops->has_postcopy &&
se->ops->has_postcopy(se->opaque)) ||
!se->ops->save_live_complete_precopy_thread) {
continue;
}
hdlr = se->ops->save_live_complete_precopy_thread;
multifd_spawn_device_state_save_thread(hdlr,
se->idstr, se->instance_id,
se->opaque);
}
}
I suggest naming it : vfio_multifd_save_complete_precopy_thread()
Renamed accordingly.
+{
+ VFIODevice *vbasedev = d->handler_opaque;
+ VFIOMigration *migration = vbasedev->migration;
+ bool ret;
+ g_autofree VFIODeviceStatePacket *packet = NULL;
+ uint32_t idx;
+
+ if (!vfio_multifd_transfer_enabled(vbasedev)) {
+ /* Nothing to do, vfio_save_complete_precopy() does the transfer. */
+ return true;
+ }
+
+ trace_vfio_save_complete_precopy_thread_start(vbasedev->name,
+ d->idstr, d->instance_id);
+
+ /* We reach here with device state STOP or STOP_COPY only */
+ if (vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_STOP_COPY,
+ VFIO_DEVICE_STATE_STOP, errp)) {
+ ret = false;
These "ret = false" can be avoided if the variable is set at the
top of the function.
I inverted the "ret" logic here as in vfio_load_bufs_thread()
to make it false by default and set to true just before early
exit label.
+ goto ret_finish;
goto thread_exit ?
As I asked in one of the previous patches,
do this comment mean that your want to rename ret_finish label to
thread_exit?
+ }
+
+ packet = g_malloc0(sizeof(*packet) + migration->data_buffer_size);
+ packet->version = VFIO_DEVICE_STATE_PACKET_VER_CURRENT;
+
+ for (idx = 0; ; idx++) {
+ ssize_t data_size;
+ size_t packet_size;
+
+ if (multifd_device_state_save_thread_should_exit()) {
+ error_setg(errp, "operation cancelled");
+ ret = false;
+ goto ret_finish;
+ }> +
+ data_size = read(migration->data_fd, &packet->data,
+ migration->data_buffer_size);
+ if (data_size < 0) {
+ error_setg(errp, "reading state buffer %" PRIu32 " failed: %d",
+ idx, errno);
+ ret = false;
+ goto ret_finish;
+ } else if (data_size == 0) {
+ break;
+ }
+
+ packet->idx = idx;
+ packet_size = sizeof(*packet) + data_size;
+
+ if (!multifd_queue_device_state(d->idstr, d->instance_id,
+ (char *)packet, packet_size)) {
+ error_setg(errp, "multifd data queuing failed");
+ ret = false;
+ goto ret_finish;
+ }
+
+ vfio_add_bytes_transferred(packet_size);
+ }
+
+ ret = vfio_save_complete_precopy_thread_config_state(vbasedev,
+ d->idstr,
+ d->instance_id,
+ idx, errp);
+
+ret_finish:
+ trace_vfio_save_complete_precopy_thread_end(vbasedev->name, ret);
+
+ return ret;
+}
+
int vfio_multifd_switchover_start(VFIODevice *vbasedev)
{
VFIOMigration *migration = vbasedev->migration;
diff --git a/hw/vfio/migration-multifd.h b/hw/vfio/migration-multifd.h
index 09cbb437d9d1..79780d7b5392 100644
--- a/hw/vfio/migration-multifd.h
+++ b/hw/vfio/migration-multifd.h
@@ -25,6 +25,11 @@ bool vfio_multifd_transfer_setup(VFIODevice *vbasedev, Error
**errp);
bool vfio_load_state_buffer(void *opaque, char *data, size_t data_size,
Error **errp);
+void vfio_multifd_emit_dummy_eos(VFIODevice *vbasedev, QEMUFile *f);
+
+bool vfio_save_complete_precopy_thread(SaveLiveCompletePrecopyThreadData *d,
+ Error **errp);
+
int vfio_multifd_switchover_start(VFIODevice *vbasedev);
#endif
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index b962309f7c27..69dcf2dac2fa 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
(..)
@@ -238,8 +238,7 @@ static int vfio_load_buffer(QEMUFile *f, VFIODevice
*vbasedev,
return ret;
}
-static int vfio_save_device_config_state(QEMUFile *f, void *opaque,
- Error **errp)
+int vfio_save_device_config_state(QEMUFile *f, void *opaque, Error **errp)
{
VFIODevice *vbasedev = opaque;
int ret;
@@ -453,6 +452,10 @@ static int vfio_save_setup(QEMUFile *f, void *opaque,
Error **errp)
uint64_t stop_copy_size = VFIO_MIG_DEFAULT_DATA_BUFFER_SIZE;
int ret;
+ if (!vfio_multifd_transfer_setup(vbasedev, errp)) {
+ return -EINVAL;
+ }
+
please move to another patch with the similar change of patch 25.
This patch is about the send/save side while patch 25
is called "*receive* init/cleanup".
So adding save setup to patch called "receive init" wouldn't be
consistent with that patch subject.
qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE);
vfio_query_stop_copy_size(vbasedev, &stop_copy_size);
(..)
index ce2bdea8a2c2..ba851917f9fc 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -298,6 +298,14 @@ void vfio_add_bytes_transferred(unsigned long val);
bool vfio_device_state_is_running(VFIODevice *vbasedev);
bool vfio_device_state_is_precopy(VFIODevice *vbasedev);
+#ifdef CONFIG_LINUX
+int vfio_migration_set_state(VFIODevice *vbasedev,
+ enum vfio_device_mig_state new_state,
+ enum vfio_device_mig_state recover_state,
+ Error **errp);
please move below with the other declarations under #ifdef CONFIG_LINUX.
+#endif
+
+int vfio_save_device_config_state(QEMUFile *f, void *opaque, Error **errp);
int vfio_load_device_config_state(QEMUFile *f, void *opaque);
#ifdef CONFIG_LINUX
Done.
Thanks,
C.
Thanks,
Maciej