On Sun, Jan 14, 2024 at 11:01 PM Shivam Kumar <shivam.kum...@nutanix.com> wrote: > > > > > On 04-Jan-2024, at 6:14 AM, Hao Xiang <hao.xi...@bytedance.com> wrote: > > > > From: Juan Quintela <quint...@redhat.com> > > > > This implements the zero page dection and handling. > > > > Signed-off-by: Juan Quintela <quint...@redhat.com> > > --- > > migration/multifd.c | 41 +++++++++++++++++++++++++++++++++++++++-- > > migration/multifd.h | 5 +++++ > > 2 files changed, 44 insertions(+), 2 deletions(-) > > > > diff --git a/migration/multifd.c b/migration/multifd.c > > index 5a1f50c7e8..756673029d 100644 > > --- a/migration/multifd.c > > +++ b/migration/multifd.c > > @@ -11,6 +11,7 @@ > > */ > > > > #include "qemu/osdep.h" > > +#include "qemu/cutils.h" > > #include "qemu/rcu.h" > > #include "exec/target_page.h" > > #include "sysemu/sysemu.h" > > @@ -279,6 +280,12 @@ static void multifd_send_fill_packet(MultiFDSendParams > > *p) > > > > packet->offset[i] = cpu_to_be64(temp); > > } > > + for (i = 0; i < p->zero_num; i++) { > > + /* there are architectures where ram_addr_t is 32 bit */ > > + uint64_t temp = p->zero[i]; > > + > > + packet->offset[p->normal_num + i] = cpu_to_be64(temp); > > + } > > } > > > > static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) > > @@ -361,6 +368,18 @@ static int > > multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) > > p->normal[i] = offset; > > } > > > > + for (i = 0; i < p->zero_num; i++) { > > + uint64_t offset = be64_to_cpu(packet->offset[p->normal_num + i]); > > + > > + if (offset > (p->block->used_length - p->page_size)) { > > + error_setg(errp, "multifd: offset too long %" PRIu64 > > + " (max " RAM_ADDR_FMT ")", > > + offset, p->block->used_length); > > + return -1; > > + } > > + p->zero[i] = offset; > > + } > > + > > return 0; > > } > > > > @@ -664,6 +683,8 @@ static void *multifd_send_thread(void *opaque) > > MultiFDSendParams *p = opaque; > > MigrationThread *thread = NULL; > > Error *local_err = NULL; > > + /* qemu older than 8.2 don't understand zero page on multifd channel */ > > + bool use_zero_page = !migrate_use_main_zero_page(); > > int ret = 0; > > bool use_zero_copy_send = migrate_zero_copy_send(); > > > > @@ -689,6 +710,7 @@ static void *multifd_send_thread(void *opaque) > > qemu_mutex_lock(&p->mutex); > > > > if (p->pending_job) { > > + RAMBlock *rb = p->pages->block; > > uint64_t packet_num = p->packet_num; > > uint32_t flags; > > p->normal_num = 0; > > @@ -701,8 +723,16 @@ static void *multifd_send_thread(void *opaque) > > } > > > > for (int i = 0; i < p->pages->num; i++) { > > - p->normal[p->normal_num] = p->pages->offset[i]; > > - p->normal_num++; > > + uint64_t offset = p->pages->offset[i]; > > + if (use_zero_page && > > + buffer_is_zero(rb->host + offset, p->page_size)) { > > + p->zero[p->zero_num] = offset; > > + p->zero_num++; > > + ram_release_page(rb->idstr, offset); > > + } else { > > + p->normal[p->normal_num] = offset; > > + p->normal_num++; > > + } > > } > > > > if (p->normal_num) { > > @@ -1155,6 +1185,13 @@ static void *multifd_recv_thread(void *opaque) > > } > > } > > > > + for (int i = 0; i < p->zero_num; i++) { > > + void *page = p->host + p->zero[i]; > > + if (!buffer_is_zero(page, p->page_size)) { > > + memset(page, 0, p->page_size); > > + } > > + } > > + > I am wondering if zeroing the zero page on the destination can also be > offloaded to DSA. Can it help in reducing cpu consumption on the destination > in case of multifd-based migration?
I have that change coded up and I have tested for performance. It's not saving as much CPU cycles as I hoped. The problem is that on the sender side, we run zero page detection on every single page but on the receiver side, we only zero-ing the pages if the sender tells us so. For instance, if a multifd packet with 128 pages are all zero pages, we can offload the zero-ing pages on the entire 128 pages in a single DSA operation and that's the best case. In a worse case, if a multifd packet with 128 pages only has say 10 zero pages, we can offload the zero-ing pages on only the 10 pages instead of the entire 128 pages. Considering DSA software overhead, that is not a good deal. In the future, if we refactor the multifd path to separate zero pages in its own packet, it will be a different story. For instance, if there are 1024 non-contiguous zero pages detected, they will be sent across multiple packets. With a new packet format, those pages can be put into the same packet (and we can put more than 1024 zero pages in a packet) and DSA offloading can become much more effective. We can add that function after we have the new packet format implemented. > > if (flags & MULTIFD_FLAG_SYNC) { > > qemu_sem_post(&multifd_recv_state->sem_sync); > > qemu_sem_wait(&p->sem_sync); > > diff --git a/migration/multifd.h b/migration/multifd.h > > index d587b0e19c..13762900d4 100644 > > --- a/migration/multifd.h > > +++ b/migration/multifd.h > > @@ -53,6 +53,11 @@ typedef struct { > > uint32_t unused32[1]; /* Reserved for future use */ > > uint64_t unused64[3]; /* Reserved for future use */ > > char ramblock[256]; > > + /* > > + * This array contains the pointers to: > > + * - normal pages (initial normal_pages entries) > > + * - zero pages (following zero_pages entries) > > + */ > > uint64_t offset[]; > > } __attribute__((packed)) MultiFDPacket_t; > > > > -- > > 2.30.2 > > > > > > >