On Thu, Jan 25, 2024 at 3:14 PM Fabiano Rosas <faro...@suse.de> wrote: > > Hao Xiang <hao.xi...@bytedance.com> writes: > > > On Mon, Jan 22, 2024 at 8:28 PM Hao Xiang <hao.xi...@bytedance.com> wrote: > >> > >> On Thu, Nov 16, 2023 at 7:14 AM Fabiano Rosas <faro...@suse.de> wrote: > >> > > >> > Hao Xiang <hao.xi...@bytedance.com> writes: > >> > > >> > > From: Juan Quintela <quint...@redhat.com> > >> > > > >> > > Signed-off-by: Juan Quintela <quint...@redhat.com> > >> > > Reviewed-by: Leonardo Bras <leob...@redhat.com> > >> > > --- > >> > > migration/multifd.c | 7 ++++--- > >> > > migration/options.c | 13 +++++++------ > >> > > migration/ram.c | 45 ++++++++++++++++++++++++++++++++++++++------- > >> > > qapi/migration.json | 1 - > >> > > 4 files changed, 49 insertions(+), 17 deletions(-) > >> > > > >> > > diff --git a/migration/multifd.c b/migration/multifd.c > >> > > index 1b994790d5..1198ffde9c 100644 > >> > > --- a/migration/multifd.c > >> > > +++ b/migration/multifd.c > >> > > @@ -13,6 +13,7 @@ > >> > > #include "qemu/osdep.h" > >> > > #include "qemu/cutils.h" > >> > > #include "qemu/rcu.h" > >> > > +#include "qemu/cutils.h" > >> > > #include "exec/target_page.h" > >> > > #include "sysemu/sysemu.h" > >> > > #include "exec/ramblock.h" > >> > > @@ -459,7 +460,6 @@ static int multifd_send_pages(QEMUFile *f) > >> > > p->packet_num = multifd_send_state->packet_num++; > >> > > multifd_send_state->pages = p->pages; > >> > > p->pages = pages; > >> > > - > >> > > qemu_mutex_unlock(&p->mutex); > >> > > qemu_sem_post(&p->sem); > >> > > > >> > > @@ -684,7 +684,7 @@ static void *multifd_send_thread(void *opaque) > >> > > MigrationThread *thread = NULL; > >> > > Error *local_err = NULL; > >> > > /* qemu older than 8.2 don't understand zero page on multifd > >> > > channel */ > >> > > - bool use_zero_page = !migrate_use_main_zero_page(); > >> > > + bool use_multifd_zero_page = !migrate_use_main_zero_page(); > >> > > int ret = 0; > >> > > bool use_zero_copy_send = migrate_zero_copy_send(); > >> > > > >> > > @@ -713,6 +713,7 @@ static void *multifd_send_thread(void *opaque) > >> > > RAMBlock *rb = p->pages->block; > >> > > uint64_t packet_num = p->packet_num; > >> > > uint32_t flags; > >> > > + > >> > > p->normal_num = 0; > >> > > p->zero_num = 0; > >> > > > >> > > @@ -724,7 +725,7 @@ static void *multifd_send_thread(void *opaque) > >> > > > >> > > for (int i = 0; i < p->pages->num; i++) { > >> > > uint64_t offset = p->pages->offset[i]; > >> > > - if (use_zero_page && > >> > > + if (use_multifd_zero_page && > >> > > >> > We could have a new function in multifd_ops for zero page > >> > handling. We're already considering an accelerator for the compression > >> > method in the other series[1] and in this series we're adding an > >> > accelerator for zero page checking. It's about time we make the > >> > multifd_ops generic instead of only compression/no compression. > >> > >> Sorry I overlooked this email earlier. > >> I will extend the multifd_ops interface and add a new API for zero > >> page checking. > >> > >> > > >> > 1- [PATCH v2 0/4] Live Migration Acceleration with IAA Compression > >> > https://lore.kernel.org/r/20231109154638.488213-1-yuan1....@intel.com > >> > > >> > > buffer_is_zero(rb->host + offset, p->page_size)) { > >> > > p->zero[p->zero_num] = offset; > >> > > p->zero_num++; > >> > > diff --git a/migration/options.c b/migration/options.c > >> > > index 00c0c4a0d6..97d121d4d7 100644 > >> > > --- a/migration/options.c > >> > > +++ b/migration/options.c > >> > > @@ -195,6 +195,7 @@ Property migration_properties[] = { > >> > > DEFINE_PROP_MIG_CAP("x-block", MIGRATION_CAPABILITY_BLOCK), > >> > > DEFINE_PROP_MIG_CAP("x-return-path", > >> > > MIGRATION_CAPABILITY_RETURN_PATH), > >> > > DEFINE_PROP_MIG_CAP("x-multifd", MIGRATION_CAPABILITY_MULTIFD), > >> > > + DEFINE_PROP_MIG_CAP("x-main-zero-page", > >> > > MIGRATION_CAPABILITY_MAIN_ZERO_PAGE), > >> > > DEFINE_PROP_MIG_CAP("x-background-snapshot", > >> > > MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT), > >> > > #ifdef CONFIG_LINUX > >> > > @@ -288,13 +289,9 @@ bool migrate_multifd(void) > >> > > > >> > > bool migrate_use_main_zero_page(void) > >> > > { > >> > > - //MigrationState *s; > >> > > - > >> > > - //s = migrate_get_current(); > >> > > + MigrationState *s = migrate_get_current(); > >> > > > >> > > - // We will enable this when we add the right code. > >> > > - // return > >> > > s->enabled_capabilities[MIGRATION_CAPABILITY_MAIN_ZERO_PAGE]; > >> > > - return true; > >> > > + return s->capabilities[MIGRATION_CAPABILITY_MAIN_ZERO_PAGE]; > >> > > >> > What happens if we disable main-zero-page while multifd is not enabled? > >> > >> In ram.c > >> ... > >> if (migrate_multifd() && !migrate_use_main_zero_page()) { > >> migration_ops->ram_save_target_page = ram_save_target_page_multifd; > >> } else { > >> migration_ops->ram_save_target_page = ram_save_target_page_legacy; > >> } > >> ... > >> > >> So if main-zero-page is disabled and multifd is also disabled, it will > >> go with the "else" path, which is the legacy path > >> ram_save_target_page_legacy() and do zero page checking from the main > >> thread. > >> > >> > > >> > > } > >> > > > >> > > bool migrate_pause_before_switchover(void) > >> > > @@ -457,6 +454,7 @@ > >> > > INITIALIZE_MIGRATE_CAPS_SET(check_caps_background_snapshot, > >> > > MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE, > >> > > MIGRATION_CAPABILITY_RETURN_PATH, > >> > > MIGRATION_CAPABILITY_MULTIFD, > >> > > + MIGRATION_CAPABILITY_MAIN_ZERO_PAGE, > >> > > MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER, > >> > > MIGRATION_CAPABILITY_AUTO_CONVERGE, > >> > > MIGRATION_CAPABILITY_RELEASE_RAM, > >> > > @@ -534,6 +532,9 @@ bool migrate_caps_check(bool *old_caps, bool > >> > > *new_caps, Error **errp) > >> > > error_setg(errp, "Postcopy is not yet compatible with > >> > > multifd"); > >> > > return false; > >> > > } > >> > > + if (new_caps[MIGRATION_CAPABILITY_MAIN_ZERO_PAGE]) { > >> > > + error_setg(errp, "Postcopy is not yet compatible with > >> > > main zero copy"); > >> > > + } > >> > > >> > Won't this will breaks compatibility for postcopy? A command that used > >> > to work now will have to disable main-zero-page first. > >> > >> main-zero-page is disabled by default. > >> > >> > > >> > > } > >> > > > >> > > if (new_caps[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]) { > >> > > diff --git a/migration/ram.c b/migration/ram.c > >> > > index 8c7886ab79..f7a42feff2 100644 > >> > > --- a/migration/ram.c > >> > > +++ b/migration/ram.c > >> > > @@ -2059,17 +2059,42 @@ static int > >> > > ram_save_target_page_legacy(RAMState *rs, PageSearchStatus *pss) > >> > > if (save_zero_page(rs, pss, offset)) { > >> > > return 1; > >> > > } > >> > > - > >> > > /* > >> > > - * Do not use multifd in postcopy as one whole host page should be > >> > > - * placed. Meanwhile postcopy requires atomic update of pages, > >> > > so even > >> > > - * if host page size == guest page size the dest guest during run > >> > > may > >> > > - * still see partially copied pages which is data corruption. > >> > > + * Do not use multifd for: > >> > > + * 1. Compression as the first page in the new block should be > >> > > posted out > >> > > + * before sending the compressed page > >> > > + * 2. In postcopy as one whole host page should be placed > >> > > */ > >> > > - if (migrate_multifd() && !migration_in_postcopy()) { > >> > > + if (!migrate_compress() && migrate_multifd() && > >> > > !migration_in_postcopy()) { > >> > > + return ram_save_multifd_page(pss->pss_channel, block, offset); > >> > > + } > >> > > >> > This could go into ram_save_target_page_multifd like so: > >> > > >> > if (!migrate_compress() && !migration_in_postcopy() && > >> > !migration_main_zero_page()) { > >> > return ram_save_multifd_page(pss->pss_channel, block, offset); > >> > } else { > >> > return ram_save_target_page_legacy(); > >> > } > >> > > >> > > + > >> > > + return ram_save_page(rs, pss); > >> > > +} > >> > > + > >> > > +/** > >> > > + * ram_save_target_page_multifd: save one target page > >> > > + * > >> > > + * Returns the number of pages written > >> > > + * > >> > > + * @rs: current RAM state > >> > > + * @pss: data about the page we want to send > >> > > + */ > >> > > +static int ram_save_target_page_multifd(RAMState *rs, > >> > > PageSearchStatus *pss) > >> > > +{ > >> > > + RAMBlock *block = pss->block; > >> > > + ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS; > >> > > + int res; > >> > > + > >> > > + if (!migration_in_postcopy()) { > >> > > return ram_save_multifd_page(pss->pss_channel, block, offset); > >> > > } > >> > > > >> > > + res = save_zero_page(rs, pss, offset); > >> > > + if (res > 0) { > >> > > + return res; > >> > > + } > >> > > + > >> > > return ram_save_page(rs, pss); > >> > > } > >> > > > >> > > @@ -2982,9 +3007,15 @@ static int ram_save_setup(QEMUFile *f, void > >> > > *opaque) > >> > > } > >> > > > >> > > migration_ops = g_malloc0(sizeof(MigrationOps)); > >> > > - migration_ops->ram_save_target_page = ram_save_target_page_legacy; > >> > > + > >> > > + if (migrate_multifd() && !migrate_use_main_zero_page()) { > >> > > + migration_ops->ram_save_target_page = > >> > > ram_save_target_page_multifd; > >> > > + } else { > >> > > + migration_ops->ram_save_target_page = > >> > > ram_save_target_page_legacy; > >> > > + } > >> > > >> > This should not check main-zero-page. Just have multifd vs. legacy and > >> > have the multifd function defer to _legacy if main-zero-page or > >> > in_postcopy. > >> > >> I noticed that ram_save_target_page_legacy and > >> ram_save_target_page_multifd have a lot of overlap and are quite > >> confusing. I can refactor this path and take-in your comments here. > >> > >> 1) Remove ram_save_multifd_page() call from > >> ram_save_target_page_legacy(). ram_save_multifd_page() will only be > >> called in ram_save_target_page_multifd(). > >> 2) Remove save_zero_page() and ram_save_page() from > >> ram_save_target_page_multifd(). > >> 3) Postcopy will always go with the ram_save_target_page_legacy() path. > >> 4) Legacy compression will always go with the > >> ram_save_target_page_legacy() path. > >> 5) Call ram_save_target_page_legacy() from within > >> ram_save_target_page_multifd() if postcopy or legacy compression. > >> > > > > Hi Fabiano, > > So I spent some time reading the > > ram_save_target_page_legacy/ram_save_target_page_multifd code path > > Juan wrote and here is my current understanding: > > 1) Multifd and legacy compression are not compatible. > > 2) Multifd and postcopy are not compatible. > > The compatibility checks are implemented in migrate_caps_check(). So > > there is really no need to handle a lot of the complexity in Juan's > > code. > > > > I think what we can do is: > > 1) If multifd is enabled, use ram_save_target_page_multifd(). > > Otherwise, use ram_save_target_page_legacy(). > > 2) In ram_save_target_page_legacy(), we don't need the special path to > > call ram_save_multifd_page(). That can be handled by > > ram_save_target_page_multifd() alone. > > 3) In ram_save_target_page_multifd(), we assert that legacy > > compression is not enabled. And we also assert that postcopy is also > > not enabled. > > 4) We do need backward compatibility support for the main zero page > > checking case in multifd. So in ram_save_target_page_multifd(), we > > call save_zero_page() if migrate_multifd_zero_page() is false. > > > > Sounds good. Could you apply those changes and the capability we > discussed in the other message and send a separate series? I haven't > found the time to work on this yet. >
Sure, I will send out a separate series for multifd zero page checking.