[PATCH v4 3/4] migration: zero-copy flush only at the end of bitmap scanning

Leonardo Bras Sun, 19 Jun 2022 22:40:53 -0700

When sending memory pages with MSG_ZEROCOPY, it's necessary to flush
to make sure all dirty pages are sent before a future version of them
happens to be sent.


Currently, the flush happens every time at the end of ram_save_iterate(),
which usually happens around 20x per second, due to a timeout.

Change so it flushes only after a whole scanning of the dirty bimap,
so it never sends a newer version of a page before an older one, while
avoiding unnecessary overhead.

Signed-off-by: Leonardo Bras <leob...@redhat.com>
---
 migration/multifd.h |  1 +
 migration/multifd.c | 58 ++++++++++++++++++++++++++-------------------
 migration/ram.c     |  7 ++++++
 3 files changed, 41 insertions(+), 25 deletions(-)

diff --git a/migration/multifd.h b/migration/multifd.h
index 4d8d89e5e5..e7cbdf1fb4 100644
--- a/migration/multifd.h
+++ b/migration/multifd.h
@@ -22,6 +22,7 @@ bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp);
 void multifd_recv_sync_main(void);
 int multifd_send_sync_main(QEMUFile *f);
 int multifd_queue_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset);
+int multifd_zero_copy_flush(void);
 
 /* Multifd Compression flags */
 #define MULTIFD_FLAG_SYNC (1 << 0)
diff --git a/migration/multifd.c b/migration/multifd.c
index 9282ab6aa4..ce4220a97d 100644
--- a/migration/multifd.c
+++ b/migration/multifd.c
@@ -566,10 +566,42 @@ void multifd_save_cleanup(void)
     multifd_send_state = NULL;
 }
 
+/*
+ * Set zero_copy_flush = true for every multifd channel
+ *
+ * When using zero-copy, it's necessary to flush the pages before any of
+ * the pages can be sent again, so we'll make sure the new version of the
+ * pages will always arrive _later_ than the old pages.
+ *
+ * Should be called only after we finished one whole scanning of
+ * all the dirty bitmaps.
+ */
+int multifd_zero_copy_flush(void)
+{
+    int i;
+    Error *local_err = NULL;
+
+    if (!migrate_use_multifd()) {
+        return 0;
+    }
+
+    for (i = 0; i < migrate_multifd_channels(); i++) {
+        MultiFDSendParams *p = &multifd_send_state->params[i];
+        int ret;
+
+        ret = qio_channel_flush(p->c, &local_err);
+        if (ret < 0) {
+            error_report_err(local_err);
+            return ret;
+        }
+    }
+
+    return 0;
+}
+
 int multifd_send_sync_main(QEMUFile *f)
 {
     int i;
-    bool flush_zero_copy;
 
     if (!migrate_use_multifd()) {
         return 0;
@@ -581,19 +613,6 @@ int multifd_send_sync_main(QEMUFile *f)
         }
     }
 
-    /*
-     * When using zero-copy, it's necessary to flush the pages before any of
-     * the pages can be sent again, so we'll make sure the new version of the
-     * pages will always arrive _later_ than the old pages.
-     *
-     * Currently we achieve this by flushing the zero-page requested writes
-     * per ram iteration, but in the future we could potentially optimize it
-     * to be less frequent, e.g. only after we finished one whole scanning of
-     * all the dirty bitmaps.
-     */
-
-    flush_zero_copy = migrate_use_zero_copy_send();
-
     for (i = 0; i < migrate_multifd_channels(); i++) {
         MultiFDSendParams *p = &multifd_send_state->params[i];
 
@@ -615,17 +634,6 @@ int multifd_send_sync_main(QEMUFile *f)
         ram_counters.transferred += p->packet_len;
         qemu_mutex_unlock(&p->mutex);
         qemu_sem_post(&p->sem);
-
-        if (flush_zero_copy && p->c) {
-            int ret;
-            Error *err = NULL;
-
-            ret = qio_channel_flush(p->c, &err);
-            if (ret < 0) {
-                error_report_err(err);
-                return -1;
-            }
-        }
     }
     for (i = 0; i < migrate_multifd_channels(); i++) {
         MultiFDSendParams *p = &multifd_send_state->params[i];
diff --git a/migration/ram.c b/migration/ram.c
index 5f5e37f64d..514584e44f 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -2288,6 +2288,13 @@ static int ram_find_and_save_block(RAMState *rs)
     rs->last_seen_block = pss.block;
     rs->last_page = pss.page;
 
+    if (pss.complete_round && migrate_use_zero_copy_send()) {
+        int ret = multifd_zero_copy_flush();
+        if (ret < 0) {
+            return ret;
+        }
+    }
+
     return pages;
 }
 
-- 
2.36.1

[PATCH v4 3/4] migration: zero-copy flush only at the end of bitmap scanning

Reply via email to