Hello Peter,

On Wed, 5 Mar 2025 at 18:24, Peter Xu <pet...@redhat.com> wrote:
> > On Tue, 4 Mar 2025 at 20:05, Peter Xu <pet...@redhat.com> wrote:
> > > I think we need the header, the ram is a module.
> > > Do similarly like qemu_savevm_state_complete_precopy_iterable() but do
> > > whatever a vmstate hander wants, so it'll be with a header.
> >
===
diff --git a/migration/migration.c b/migration/migration.c
index 65fc4f5eed..da2c49c303 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -3401,9 +3401,10 @@ static MigIterateState
migration_iteration_run(MigrationState *s)
     if (!in_postcopy && must_precopy <= s->threshold_size
         && can_switchover && qatomic_read(&s->start_postcopy)) {
         if (migrate_multifd()) {
-            multifd_send_flush();
-            multifd_send_sync_main(MULTIFD_SYNC_LOCAL);
-            qemu_savevm_send_multifd_recv_sync(s->to_dst_file);
+/*          multifd_send_flush();
+ *          multifd_send_sync_main(MULTIFD_SYNC_ALL);
+ *          qemu_savevm_send_multifd_recv_sync(s->to_dst_file);
+ */
+            qemu_savevm_state_complete_multifd(s->to_dst_file);
             multifd_send_shutdown();
         }
         if (postcopy_start(s, &local_err)) {
diff --git a/migration/savevm.c b/migration/savevm.c
index 0b71e988ba..c2b181b0cc 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -1602,6 +1602,37 @@ int qemu_savevm_state_complete_precopy(QEMUFile
*f, bool iterable_only)
     return qemu_fflush(f);
 }

+int qemu_savevm_state_complete_multifd(QEMUFile *f)
+{
+    int ret;
+    SaveStateEntry *se;
+    int64_t start_ts_each, end_ts_each;
+
+    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
+        if (strcmp(se->idstr, "ram")) {
+            continue;
+        }
+
+        start_ts_each = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
+        trace_savevm_section_start(se->idstr, se->section_id);
+
+        save_section_header(f, se, QEMU_VM_SECTION_END);
+
+        ret = se->ops->save_live_complete_precopy(f, se->opaque);
+        trace_savevm_section_end(se->idstr, se->section_id, ret);
+        save_section_footer(f, se);
+        if (ret < 0) {
+            qemu_file_set_error(f, ret);
+            return -1;
+        }
+        end_ts_each = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
+        trace_vmstate_downtime_save("iterable", se->idstr, se->instance_id,
+                                    end_ts_each - start_ts_each);
+    }
+
+    return ret;
+}
+
 /* Give an estimate of the amount left to be transferred,
  * the result is split into the amount for units that can and
  * for units that can't do postcopy.
diff --git a/migration/savevm.h b/migration/savevm.h
index 91ae703925..e3789984a1 100644
--- a/migration/savevm.h
+++ b/migration/savevm.h
@@ -70,5 +70,6 @@ int qemu_load_device_state(QEMUFile *f);
 int qemu_loadvm_approve_switchover(void);
 int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f,
         bool in_postcopy);
+int qemu_savevm_state_complete_multifd(QEMUFile *f);

 #endif
====

63/63 qemu:qtest+qtest-x86_64 / qtest-x86_64/migration-test
     OK             164.02s   79 subtests passed
63/63 qemu:qtest+qtest-x86_64 / qtest-x86_64/migration-test
     OK             164.99s   79 subtests passed
====

* Does the above patch look okay?

====
Guest not dirtying RAM:
===================
2025-03-07 10:57:28.740+0000: initiating migration
2025-03-07T10:57:28.823166Z qemu-system-x86_64: info:
multifd_ram_flush_and_sync: called
2025-03-07T10:58:04.450758Z qemu-system-x86_64: info:
multifd_ram_flush_and_sync: called
2025-03-07T10:58:04.711523Z qemu-system-x86_64: info:
multifd_ram_flush_and_sync: called
2025-03-07 10:58:05.078+0000: shutting down, reason=migrated

2025-03-07 10:59:44.322+0000: initiating migration
2025-03-07T10:59:44.394714Z qemu-system-x86_64: info:
multifd_ram_flush_and_sync: called
2025-03-07T11:00:20.198360Z qemu-system-x86_64: info:
multifd_ram_flush_and_sync: called
2025-03-07T11:00:20.279135Z qemu-system-x86_64: info:
multifd_ram_flush_and_sync: called
2025-03-07 11:00:20.571+0000: shutting down, reason=migrated
====

Guest dirtying RAM: $ dd if=/dev/urandom of=/tmp/random.bin bs=256M count=32 ...
================
2025-03-07 11:04:00.281+0000: initiating migration
2025-03-07T11:04:00.359426Z qemu-system-x86_64: info:
multifd_ram_flush_and_sync: called
2025-03-07T11:05:51.406988Z qemu-system-x86_64: info:
multifd_ram_flush_and_sync: called
2025-03-07T11:06:56.899920Z qemu-system-x86_64: info:
multifd_ram_flush_and_sync: called
2025-03-07 11:08:02.376+0000: shutting down, reason=migrated

2025-03-07 11:09:19.295+0000: initiating migration
2025-03-07T11:09:19.372012Z qemu-system-x86_64: info:
multifd_ram_flush_and_sync: called
2025-03-07T11:11:24.217610Z qemu-system-x86_64: info:
multifd_ram_flush_and_sync: called
2025-03-07T11:12:35.342709Z qemu-system-x86_64: info:
multifd_ram_flush_and_sync: called
2025-03-07 11:13:48.947+0000: shutting down, reason=migrated
====

* When a guest is running some workload (dirtying memory), it seems to
take a little more time before switching to the Postcopy phase.

> Let me know if you want me to write the patches.  That's almost the only
> thing left to make it clearer..

* If the above patch is not okay,  it'll help if you share your
version of it. Meanwhile I'll split the patch-2 and re-arrange other
patches.

Thank you.
---
  - Prasad


Reply via email to