On 12.02.2025 18:03, Cédric Le Goater wrote:
On 1/30/25 11:08, Maciej S. Szmigiero wrote:
From: "Maciej S. Szmigiero" <maciej.szmigi...@oracle.com>

Implement the multifd device state transfer via additional per-device
thread inside save_live_complete_precopy_thread handler.

Switch between doing the data transfer in the new handler and doing it
in the old save_state handler depending on the
x-migration-multifd-transfer device property value.

Signed-off-by: Maciej S. Szmigiero <maciej.szmigi...@oracle.com>
---
  hw/vfio/migration.c  | 159 +++++++++++++++++++++++++++++++++++++++++++
  hw/vfio/trace-events |   2 +
  2 files changed, 161 insertions(+)

diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index 31f651ffee85..37d1c0f3d32f 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -943,6 +943,24 @@ static int vfio_save_setup(QEMUFile *f, void *opaque, 
Error **errp)
      uint64_t stop_copy_size = VFIO_MIG_DEFAULT_DATA_BUFFER_SIZE;
      int ret;
+    /*
+     * Make a copy of this setting at the start in case it is changed
+     * mid-migration.
+     */
+    if (vbasedev->migration_multifd_transfer == ON_OFF_AUTO_AUTO) {
+        migration->multifd_transfer = vfio_multifd_transfer_supported();
+    } else {
+        migration->multifd_transfer =
+            vbasedev->migration_multifd_transfer == ON_OFF_AUTO_ON;
+    }
+
+    if (migration->multifd_transfer && !vfio_multifd_transfer_supported()) {
+        error_setg(errp,
+                   "%s: Multifd device transfer requested but unsupported in the 
current config",
+                   vbasedev->name);
+        return -EINVAL;
+    }

Please implement a common routine vfio_multifd_is_enabled() that can be
shared with vfio_load_setup().

Done/almost done (details are being worked out in conversation about other 
patch).

+
      qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE);
      vfio_query_stop_copy_size(vbasedev, &stop_copy_size);
@@ -1114,13 +1132,32 @@ static int vfio_save_iterate(QEMUFile *f, void *opaque)
      return !migration->precopy_init_size && !migration->precopy_dirty_size;
  }
+static void vfio_save_multifd_emit_dummy_eos(VFIODevice *vbasedev, QEMUFile *f)

I would prefer naming it vfio_multifd_emit_dummy_eos().

Done.

+{
+    VFIOMigration *migration = vbasedev->migration;
+
+    assert(migration->multifd_transfer);
+
+    /*
+     * Emit dummy NOP data on the main migration channel since the actual
+     * device state transfer is done via multifd channels.
+     */
+    qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
+}
+
  static int vfio_save_complete_precopy(QEMUFile *f, void *opaque)
  {
      VFIODevice *vbasedev = opaque;
+    VFIOMigration *migration = vbasedev->migration;
      ssize_t data_size;
      int ret;
      Error *local_err = NULL;
+    if (migration->multifd_transfer) {
+        vfio_save_multifd_emit_dummy_eos(vbasedev, f);
+        return 0;
+    }
+
      trace_vfio_save_complete_precopy_start(vbasedev->name);
      /* We reach here with device state STOP or STOP_COPY only */
@@ -1146,12 +1183,133 @@ static int vfio_save_complete_precopy(QEMUFile *f, 
void *opaque)
      return ret;
  }
+static int
+vfio_save_complete_precopy_async_thread_config_state(VFIODevice *vbasedev,
+                                                     char *idstr,
+                                                     uint32_t instance_id,
+                                                     uint32_t idx)

why use 'async_thread' in the name ?

vfio_save_complete_precopy_config_state() should be enough to refer
to its caller vfio_save_complete_precopy_thread().

That "async" part is truly unnecessary since it's a leftover from
patch set v1 days when the thread entry point was called
"vfio_save_complete_precopy_async_thread".

But I will keep the "thread" part since naming it just
"vfio_save_complete_precopy_config_state()" would suggest it's
called from normal precopy handler (vfio_save_complete_precopy()).

Please add
an 'Error **' argument too.


Good idea, done now.

+{
+    g_autoptr(QIOChannelBuffer) bioc = NULL;
+    g_autoptr(QEMUFile) f = NULL;
+    int ret;
+    g_autofree VFIODeviceStatePacket *packet = NULL;
+    size_t packet_len;
+
+    bioc = qio_channel_buffer_new(0);
+    qio_channel_set_name(QIO_CHANNEL(bioc), "vfio-device-config-save");
+
+    f = qemu_file_new_output(QIO_CHANNEL(bioc));
+
+    ret = vfio_save_device_config_state(f, vbasedev, NULL);

I would prefer that we catch the error and propagate it to the caller.

Sure.

+    if (ret) {
+        return ret;
+    }
+
+    ret = qemu_fflush(f);
+    if (ret) {
+        return ret;
+    }
+
+    packet_len = sizeof(*packet) + bioc->usage;
+    packet = g_malloc0(packet_len);
+    packet->idx = idx;
+    packet->flags = VFIO_DEVICE_STATE_CONFIG_STATE;
+    memcpy(&packet->data, bioc->data, bioc->usage);
+
+    if (!multifd_queue_device_state(idstr, instance_id,
+                                    (char *)packet, packet_len)) {
+        return -1;
+    }
+
+    qatomic_add(&bytes_transferred, packet_len);
+
+    return 0;
+}
+
+static int vfio_save_complete_precopy_thread(char *idstr,
+                                             uint32_t instance_id,
+                                             bool *abort_flag,
+                                             void *opaque)

This lacks an "Error **" argument. I am not sure what was decided
in patch 19 "migration: Add save_live_complete_precopy_thread
handler".

We should do our best to collect and propagate errors and avoid
error_report() calls. With VFIO involved, the reasons why errors
can occur are increasingly numerous, as hardware is exposed and
host drivers are involved.

I understand this is a complex request for code when this code
relies on a framework using callbacks, even more with threads.

It now has an Error argument from the changes resulting from
discussions with Peter:
https://gitlab.com/maciejsszmigiero/qemu/-/commit/0e23b66291b95c10ec1f0d82830320cae9e06ce4

+{
+    VFIODevice *vbasedev = opaque;
+    VFIOMigration *migration = vbasedev->migration;
+    int ret;
+    g_autofree VFIODeviceStatePacket *packet = NULL;
+    uint32_t idx;
+
+    if (!migration->multifd_transfer) {
+        /* Nothing to do, vfio_save_complete_precopy() does the transfer. */

why would vfio_save_complete_precopy_thread be called then ?

The migration core launches these threads if it supports device
state transfer.

But the driver (in this case) VFIO might have such transfer
unsupported for its own reasons - like because user disabled
switchover start message or just explicitly disabled this transfer.

Or maybe even the device does not like this transfer for some
reason (possible in the ARM case without config state interlock).

We discussed this detail during v3 with Avihai and Peter and
decided to do this this way (launching this thread unconditionally)
rather than export additional SaveVMHandler through which
the device could tell the migration core whether it wants such thread:
https://lore.kernel.org/qemu-devel/Z2BkbkF6P-2MHNN2@x1n/

Looks
like an error to me, may be not fatal but an error report would be
good to have. no ?

+        return 0;
+    }
+
+    trace_vfio_save_complete_precopy_thread_start(vbasedev->name,
+                                                  idstr, instance_id);
+
+    /* We reach here with device state STOP or STOP_COPY only */
+    ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_STOP_COPY,
+                                   VFIO_DEVICE_STATE_STOP, NULL);

Error missing.

Already taken care of when adding Error parameter to the thread entry
point function, but good point anyway.

+    if (ret) {
+        goto ret_finish;
+    }
+
+    packet = g_malloc0(sizeof(*packet) + migration->data_buffer_size);
+
+    for (idx = 0; ; idx++) {
+        ssize_t data_size;
+        size_t packet_size;
+
+        if (qatomic_read(abort_flag)) {
+            ret = -ECANCELED;
+            goto ret_finish;
+        }
+
+        data_size = read(migration->data_fd, &packet->data,
+                         migration->data_buffer_size);
+        if (data_size < 0) {
+            ret = -errno;
+            goto ret_finish;
+        } else if (data_size == 0) {
+            break;
+        }
+
+        packet->idx = idx;
+        packet_size = sizeof(*packet) + data_size;
+
+        if (!multifd_queue_device_state(idstr, instance_id,
+                                        (char *)packet, packet_size)) {
+            ret = -1;
+            goto ret_finish;
+        }
+
+        qatomic_add(&bytes_transferred, packet_size);
+    }
+
+    ret = vfio_save_complete_precopy_async_thread_config_state(vbasedev, idstr,
+                                                               instance_id,
+                                                               idx);
+
+ret_finish:
+    trace_vfio_save_complete_precopy_thread_end(vbasedev->name, ret);
+
+    return ret;
+}
+
  static void vfio_save_state(QEMUFile *f, void *opaque)
  {
      VFIODevice *vbasedev = opaque;
+    VFIOMigration *migration = vbasedev->migration;
      Error *local_err = NULL;
      int ret;
+    if (migration->multifd_transfer) {
+        if (vfio_load_config_after_iter(vbasedev)) {
+            qemu_put_be64(f, VFIO_MIG_FLAG_DEV_CONFIG_LOAD_READY);


Please put the above chunck at the end of the series with the patch
adding ARM support.

Done.

+        } else {
+            vfio_save_multifd_emit_dummy_eos(vbasedev, f);
+        }

Please introduce a vfio_multifd_save_state() routine and a
vfio_"normal"_save_state() routine and change vfio_save_state()
to call one or the other.


So what should be the name of this "normal" save state routine
then, since you put "normal" in quotes?
vfio_nonmultifd_save_state()?

Thanks,

C.

Thanks,
Maciej


Reply via email to