1. Implements the zero page detection and handling on the multifd
threads for non-compression, zlib and zstd compression backends.
2. Added a new value 'multifd' in ZeroPageDetection enumeration.
3. Add proper asserts to ensure pages->normal are used for normal pages
in all scenarios.

Signed-off-by: Hao Xiang <hao.xi...@bytedance.com>
---
 migration/meson.build         |  1 +
 migration/multifd-zero-page.c | 59 +++++++++++++++++++++++++++++++++++
 migration/multifd-zlib.c      | 26 ++++++++++++---
 migration/multifd-zstd.c      | 25 ++++++++++++---
 migration/multifd.c           | 50 +++++++++++++++++++++++------
 migration/multifd.h           |  7 +++++
 qapi/migration.json           |  4 ++-
 7 files changed, 151 insertions(+), 21 deletions(-)
 create mode 100644 migration/multifd-zero-page.c

diff --git a/migration/meson.build b/migration/meson.build
index 92b1cc4297..1eeb915ff6 100644
--- a/migration/meson.build
+++ b/migration/meson.build
@@ -22,6 +22,7 @@ system_ss.add(files(
   'migration.c',
   'multifd.c',
   'multifd-zlib.c',
+  'multifd-zero-page.c',
   'ram-compress.c',
   'options.c',
   'postcopy-ram.c',
diff --git a/migration/multifd-zero-page.c b/migration/multifd-zero-page.c
new file mode 100644
index 0000000000..f0cd8e2c53
--- /dev/null
+++ b/migration/multifd-zero-page.c
@@ -0,0 +1,59 @@
+/*
+ * Multifd zero page detection implementation.
+ *
+ * Copyright (c) 2024 Bytedance Inc
+ *
+ * Authors:
+ *  Hao Xiang <hao.xi...@bytedance.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/cutils.h"
+#include "exec/ramblock.h"
+#include "migration.h"
+#include "multifd.h"
+#include "options.h"
+#include "ram.h"
+
+void multifd_zero_page_check_send(MultiFDSendParams *p)
+{
+    /*
+     * QEMU older than 9.0 don't understand zero page
+     * on multifd channel. This switch is required to
+     * maintain backward compatibility.
+     */
+    bool use_multifd_zero_page =
+        (migrate_zero_page_detection() == ZERO_PAGE_DETECTION_MULTIFD);
+    MultiFDPages_t *pages = p->pages;
+    RAMBlock *rb = pages->block;
+
+    assert(pages->num != 0);
+    assert(pages->normal_num == 0);
+    assert(pages->zero_num == 0);
+
+    for (int i = 0; i < pages->num; i++) {
+        uint64_t offset = pages->offset[i];
+        if (use_multifd_zero_page &&
+            buffer_is_zero(rb->host + offset, p->page_size)) {
+            pages->zero[pages->zero_num] = offset;
+            pages->zero_num++;
+            ram_release_page(rb->idstr, offset);
+        } else {
+            pages->normal[pages->normal_num] = offset;
+            pages->normal_num++;
+        }
+    }
+}
+
+void multifd_zero_page_check_recv(MultiFDRecvParams *p)
+{
+    for (int i = 0; i < p->zero_num; i++) {
+        void *page = p->host + p->zero[i];
+        if (!buffer_is_zero(page, p->page_size)) {
+            memset(page, 0, p->page_size);
+        }
+    }
+}
diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c
index 012e3bdea1..cdfe0fa70e 100644
--- a/migration/multifd-zlib.c
+++ b/migration/multifd-zlib.c
@@ -123,13 +123,20 @@ static int zlib_send_prepare(MultiFDSendParams *p, Error 
**errp)
     int ret;
     uint32_t i;
 
+    multifd_zero_page_check_send(p);
+
+    if (!pages->normal_num) {
+        p->next_packet_size = 0;
+        goto out;
+    }
+
     multifd_send_prepare_header(p);
 
-    for (i = 0; i < pages->num; i++) {
+    for (i = 0; i < pages->normal_num; i++) {
         uint32_t available = z->zbuff_len - out_size;
         int flush = Z_NO_FLUSH;
 
-        if (i == pages->num - 1) {
+        if (i == pages->normal_num - 1) {
             flush = Z_SYNC_FLUSH;
         }
 
@@ -138,7 +145,7 @@ static int zlib_send_prepare(MultiFDSendParams *p, Error 
**errp)
          * with compression. zlib does not guarantee that this is safe,
          * therefore copy the page before calling deflate().
          */
-        memcpy(z->buf, p->pages->block->host + pages->offset[i], p->page_size);
+        memcpy(z->buf, p->pages->block->host + pages->normal[i], p->page_size);
         zs->avail_in = p->page_size;
         zs->next_in = z->buf;
 
@@ -172,10 +179,10 @@ static int zlib_send_prepare(MultiFDSendParams *p, Error 
**errp)
     p->iov[p->iovs_num].iov_len = out_size;
     p->iovs_num++;
     p->next_packet_size = out_size;
-    p->flags |= MULTIFD_FLAG_ZLIB;
 
+out:
+    p->flags |= MULTIFD_FLAG_ZLIB;
     multifd_send_fill_packet(p);
-
     return 0;
 }
 
@@ -261,6 +268,14 @@ static int zlib_recv_pages(MultiFDRecvParams *p, Error 
**errp)
                    p->id, flags, MULTIFD_FLAG_ZLIB);
         return -1;
     }
+
+    multifd_zero_page_check_recv(p);
+
+    if (!p->normal_num) {
+        assert(in_size == 0);
+        return 0;
+    }
+
     ret = qio_channel_read_all(p->c, (void *)z->zbuff, in_size, errp);
 
     if (ret != 0) {
@@ -310,6 +325,7 @@ static int zlib_recv_pages(MultiFDRecvParams *p, Error 
**errp)
                    p->id, out_size, expected_size);
         return -1;
     }
+
     return 0;
 }
 
diff --git a/migration/multifd-zstd.c b/migration/multifd-zstd.c
index dc8fe43e94..27a1eba075 100644
--- a/migration/multifd-zstd.c
+++ b/migration/multifd-zstd.c
@@ -118,19 +118,26 @@ static int zstd_send_prepare(MultiFDSendParams *p, Error 
**errp)
     int ret;
     uint32_t i;
 
+    multifd_zero_page_check_send(p);
+
+    if (!pages->normal_num) {
+        p->next_packet_size = 0;
+        goto out;
+    }
+
     multifd_send_prepare_header(p);
 
     z->out.dst = z->zbuff;
     z->out.size = z->zbuff_len;
     z->out.pos = 0;
 
-    for (i = 0; i < pages->num; i++) {
+    for (i = 0; i < pages->normal_num; i++) {
         ZSTD_EndDirective flush = ZSTD_e_continue;
 
-        if (i == pages->num - 1) {
+        if (i == pages->normal_num - 1) {
             flush = ZSTD_e_flush;
         }
-        z->in.src = p->pages->block->host + pages->offset[i];
+        z->in.src = p->pages->block->host + pages->normal[i];
         z->in.size = p->page_size;
         z->in.pos = 0;
 
@@ -161,10 +168,10 @@ static int zstd_send_prepare(MultiFDSendParams *p, Error 
**errp)
     p->iov[p->iovs_num].iov_len = z->out.pos;
     p->iovs_num++;
     p->next_packet_size = z->out.pos;
-    p->flags |= MULTIFD_FLAG_ZSTD;
 
+out:
+    p->flags |= MULTIFD_FLAG_ZSTD;
     multifd_send_fill_packet(p);
-
     return 0;
 }
 
@@ -257,6 +264,14 @@ static int zstd_recv_pages(MultiFDRecvParams *p, Error 
**errp)
                    p->id, flags, MULTIFD_FLAG_ZSTD);
         return -1;
     }
+
+    multifd_zero_page_check_recv(p);
+
+    if (!p->normal_num) {
+        assert(in_size == 0);
+        return 0;
+    }
+
     ret = qio_channel_read_all(p->c, (void *)z->zbuff, in_size, errp);
 
     if (ret != 0) {
diff --git a/migration/multifd.c b/migration/multifd.c
index a33dba40d9..fbb40ea10b 100644
--- a/migration/multifd.c
+++ b/migration/multifd.c
@@ -11,6 +11,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/cutils.h"
 #include "qemu/rcu.h"
 #include "exec/target_page.h"
 #include "sysemu/sysemu.h"
@@ -126,6 +127,8 @@ static int nocomp_send_prepare(MultiFDSendParams *p, Error 
**errp)
     MultiFDPages_t *pages = p->pages;
     int ret;
 
+    multifd_zero_page_check_send(p);
+
     if (!use_zero_copy_send) {
         /*
          * Only !zerocopy needs the header in IOV; zerocopy will
@@ -134,13 +137,13 @@ static int nocomp_send_prepare(MultiFDSendParams *p, 
Error **errp)
         multifd_send_prepare_header(p);
     }
 
-    for (int i = 0; i < pages->num; i++) {
-        p->iov[p->iovs_num].iov_base = pages->block->host + pages->offset[i];
+    for (int i = 0; i < pages->normal_num; i++) {
+        p->iov[p->iovs_num].iov_base = pages->block->host + pages->normal[i];
         p->iov[p->iovs_num].iov_len = p->page_size;
         p->iovs_num++;
     }
 
-    p->next_packet_size = pages->num * p->page_size;
+    p->next_packet_size = pages->normal_num * p->page_size;
     p->flags |= MULTIFD_FLAG_NOCOMP;
 
     multifd_send_fill_packet(p);
@@ -202,6 +205,13 @@ static int nocomp_recv_pages(MultiFDRecvParams *p, Error 
**errp)
                    p->id, flags, MULTIFD_FLAG_NOCOMP);
         return -1;
     }
+
+    multifd_zero_page_check_recv(p);
+
+    if (!p->normal_num) {
+        return 0;
+    }
+
     for (int i = 0; i < p->normal_num; i++) {
         p->iov[i].iov_base = p->host + p->normal[i];
         p->iov[i].iov_len = p->page_size;
@@ -339,7 +349,7 @@ void multifd_send_fill_packet(MultiFDSendParams *p)
 
     packet->flags = cpu_to_be32(p->flags);
     packet->pages_alloc = cpu_to_be32(p->pages->allocated);
-    packet->normal_pages = cpu_to_be32(pages->num);
+    packet->normal_pages = cpu_to_be32(pages->normal_num);
     packet->zero_pages = cpu_to_be32(pages->zero_num);
     packet->next_packet_size = cpu_to_be32(p->next_packet_size);
 
@@ -350,18 +360,25 @@ void multifd_send_fill_packet(MultiFDSendParams *p)
         strncpy(packet->ramblock, pages->block->idstr, 256);
     }
 
-    for (i = 0; i < pages->num; i++) {
+    for (i = 0; i < pages->normal_num; i++) {
         /* there are architectures where ram_addr_t is 32 bit */
-        uint64_t temp = pages->offset[i];
+        uint64_t temp = pages->normal[i];
 
         packet->offset[i] = cpu_to_be64(temp);
     }
 
+    for (i = 0; i < pages->zero_num; i++) {
+        /* there are architectures where ram_addr_t is 32 bit */
+        uint64_t temp = pages->zero[i];
+
+        packet->offset[pages->normal_num + i] = cpu_to_be64(temp);
+    }
+
     p->packets_sent++;
-    p->total_normal_pages += pages->num;
+    p->total_normal_pages += pages->normal_num;
     p->total_zero_pages += pages->zero_num;
 
-    trace_multifd_send(p->id, packet_num, pages->num, pages->zero_num,
+    trace_multifd_send(p->id, packet_num, pages->normal_num, pages->zero_num,
                        p->flags, p->next_packet_size);
 }
 
@@ -451,6 +468,18 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams 
*p, Error **errp)
         p->normal[i] = offset;
     }
 
+    for (i = 0; i < p->zero_num; i++) {
+        uint64_t offset = be64_to_cpu(packet->offset[p->normal_num + i]);
+
+        if (offset > (p->block->used_length - p->page_size)) {
+            error_setg(errp, "multifd: offset too long %" PRIu64
+                       " (max " RAM_ADDR_FMT ")",
+                       offset, p->block->used_length);
+            return -1;
+        }
+        p->zero[i] = offset;
+    }
+
     return 0;
 }
 
@@ -842,7 +871,7 @@ static void *multifd_send_thread(void *opaque)
 
             stat64_add(&mig_stats.multifd_bytes,
                        p->next_packet_size + p->packet_len);
-            stat64_add(&mig_stats.normal_pages, pages->num);
+            stat64_add(&mig_stats.normal_pages, pages->normal_num);
             stat64_add(&mig_stats.zero_pages, pages->zero_num);
 
             multifd_pages_reset(p->pages);
@@ -1256,7 +1285,8 @@ static void *multifd_recv_thread(void *opaque)
         p->flags &= ~MULTIFD_FLAG_SYNC;
         qemu_mutex_unlock(&p->mutex);
 
-        if (p->normal_num) {
+        if (p->normal_num + p->zero_num) {
+            assert(!(flags & MULTIFD_FLAG_SYNC));
             ret = multifd_recv_state->ops->recv_pages(p, &local_err);
             if (ret != 0) {
                 break;
diff --git a/migration/multifd.h b/migration/multifd.h
index 9822ff298a..125f0bbe60 100644
--- a/migration/multifd.h
+++ b/migration/multifd.h
@@ -53,6 +53,11 @@ typedef struct {
     uint32_t unused32[1];    /* Reserved for future use */
     uint64_t unused64[3];    /* Reserved for future use */
     char ramblock[256];
+    /*
+     * This array contains the pointers to:
+     *  - normal pages (initial normal_pages entries)
+     *  - zero pages (following zero_pages entries)
+     */
     uint64_t offset[];
 } __attribute__((packed)) MultiFDPacket_t;
 
@@ -224,6 +229,8 @@ typedef struct {
 
 void multifd_register_ops(int method, MultiFDMethods *ops);
 void multifd_send_fill_packet(MultiFDSendParams *p);
+void multifd_zero_page_check_send(MultiFDSendParams *p);
+void multifd_zero_page_check_recv(MultiFDRecvParams *p);
 
 static inline void multifd_send_prepare_header(MultiFDSendParams *p)
 {
diff --git a/qapi/migration.json b/qapi/migration.json
index 99843a8e95..e2450b92d4 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -660,9 +660,11 @@
 #
 # @none: Do not perform zero page checking.
 #
+# @multifd: Perform zero page checking on the multifd sender thread. (since 
9.0)
+#
 ##
 { 'enum': 'ZeroPageDetection',
-  'data': [ 'legacy', 'none' ] }
+  'data': [ 'legacy', 'none', 'multifd' ] }
 
 ##
 # @BitmapMigrationBitmapAliasTransform:
-- 
2.30.2


Reply via email to