Hi Prasad, On 2025-02-15 18:01, Prasad Pandit wrote: > From: Prasad Pandit <p...@fedoraproject.org> > > Enable Multifd and Postcopy migration together. > The migration_ioc_process_incoming() routine > checks magic value sent on each channel and > helps to properly setup multifd and postcopy > channels. > > The Precopy and Multifd threads work during the > initial guest RAM transfer. When migration moves > to the Postcopy phase, the multifd threads are > shutdown and Postcopy threads on the destination > request/pull data from the source side. > > Signed-off-by: Prasad Pandit <p...@fedoraproject.org> > --- > migration/migration.c | 107 ++++++++++++++++++++----------------- > migration/multifd-nocomp.c | 3 +- > migration/multifd.c | 4 +- > migration/options.c | 5 -- > migration/ram.c | 7 ++- > 5 files changed, 64 insertions(+), 62 deletions(-) > > v6: > - Shutdown multifd threads before postcopy_start() > - Reorder tests/qtest/migration/ patches > - Some refactoring of functions > > v5: > - > https://lore.kernel.org/qemu-devel/20250205122712.229151-1-ppan...@redhat.com/T/#t > > diff --git a/migration/migration.c b/migration/migration.c > index 396928513a..38697182e8 100644 > --- a/migration/migration.c > +++ b/migration/migration.c > @@ -95,6 +95,9 @@ enum mig_rp_message_type { > MIG_RP_MSG_MAX > }; > > +/* Migration channel types */ > +enum { CH_MAIN, CH_MULTIFD, CH_POSTCOPY }; > + > /* When we add fault tolerance, we could have several > migrations at once. For now we don't need to add > dynamic creation of migration */ > @@ -959,28 +962,19 @@ void migration_fd_process_incoming(QEMUFile *f) > migration_incoming_process(); > } > > -/* > - * Returns true when we want to start a new incoming migration process, > - * false otherwise. > - */ > -static bool migration_should_start_incoming(bool main_channel) > +static bool migration_has_main_and_multifd_channels(void) > { > - /* Multifd doesn't start unless all channels are established */ > - if (migrate_multifd()) { > - return migration_has_all_channels(); > + MigrationIncomingState *mis = migration_incoming_get_current(); > + if (!mis->from_src_file) { > + /* main channel not established */ > + return false; > } > > - /* Preempt channel only starts when the main channel is created */ > - if (migrate_postcopy_preempt()) { > - return main_channel; > + if (migrate_multifd() && !multifd_recv_all_channels_created()) { > + return false; > } > > - /* > - * For all the rest types of migration, we should only reach here when > - * it's the main channel that's being created, and we should always > - * proceed with this channel. > - */ > - assert(main_channel); > + /* main channel and all multifd channels are established */ > return true; > } > > @@ -989,13 +983,12 @@ void migration_ioc_process_incoming(QIOChannel *ioc, > Error **errp) > MigrationIncomingState *mis = migration_incoming_get_current(); > Error *local_err = NULL; > QEMUFile *f; > - bool default_channel = true; > uint32_t channel_magic = 0; > + uint8_t channel = CH_MAIN; > int ret = 0; > > - if (migrate_multifd() && !migrate_mapped_ram() && > - !migrate_postcopy_ram() && > - qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_READ_MSG_PEEK)) { > + if (!migration_has_main_and_multifd_channels()) { > + if (qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_READ_MSG_PEEK)) > { > /* > * With multiple channels, it is possible that we receive channels > * out of order on destination side, causing incorrect mapping of > @@ -1006,42 +999,58 @@ void migration_ioc_process_incoming(QIOChannel *ioc, > Error **errp) > * tls handshake while initializing main channel so with tls this > * issue is not possible. > */ > - ret = migration_channel_read_peek(ioc, (void *)&channel_magic, > - sizeof(channel_magic), errp); > + ret = migration_channel_read_peek(ioc, (void *)&channel_magic, > + sizeof(channel_magic), errp); > + if (ret != 0) { > + return; > + } > > - if (ret != 0) { > - return; > + if (channel_magic == cpu_to_be32(QEMU_VM_FILE_MAGIC)) { > + channel = CH_MAIN; > + } else if (channel_magic == cpu_to_be32(MULTIFD_MAGIC)) { > + channel = CH_MULTIFD; > + } else if (!mis->from_src_file > + && mis->state == MIGRATION_STATUS_POSTCOPY_PAUSED) { > + /* reconnect default channel for postcopy recovery */ > + channel = CH_MAIN; > + } else { > + error_report("%s: unknown channel magic: %u", > + __func__, channel_magic);
Here, the number reported in the error will have incorrect endianness on a non-BE system. I think it would be better to convert channel_magic to the system endianness right after reading it. On top of that, then there is no need to convert constants with magic numbers when comparing. > + return; > + } > + } else if (mis->from_src_file > + && (!strcmp(ioc->name, "migration-tls-incoming") > + || !strcmp(ioc->name, "migration-file-incoming"))) { > + channel = CH_MULTIFD; > } > - > - default_channel = (channel_magic == cpu_to_be32(QEMU_VM_FILE_MAGIC)); > - } else { > - default_channel = !mis->from_src_file; > + } else if (mis->from_src_file) { > + channel = CH_POSTCOPY; > } > > if (multifd_recv_setup(errp) != 0) { > return; > } > > - if (default_channel) { > + if (channel == CH_MAIN) { > f = qemu_file_new_input(ioc); > migration_incoming_setup(f); > - } else { > + } else if (channel == CH_MULTIFD) { > /* Multiple connections */ > - assert(migration_needs_multiple_sockets()); > if (migrate_multifd()) { > multifd_recv_new_channel(ioc, &local_err); > - } else { > - assert(migrate_postcopy_preempt()); > - f = qemu_file_new_input(ioc); > - postcopy_preempt_new_channel(mis, f); > } > if (local_err) { > error_propagate(errp, local_err); > return; > } > + } else if (channel == CH_POSTCOPY) { > + assert(migrate_postcopy_preempt()); > + assert(!mis->postcopy_qemufile_dst); > + f = qemu_file_new_input(ioc); > + postcopy_preempt_new_channel(mis, f); > } > > - if (migration_should_start_incoming(default_channel)) { > + if (channel != CH_POSTCOPY && migration_has_main_and_multifd_channels()) > { > /* If it's a recovery, we're done */ > if (postcopy_try_recover()) { > return; > @@ -1058,20 +1067,15 @@ void migration_ioc_process_incoming(QIOChannel *ioc, > Error **errp) > */ > bool migration_has_all_channels(void) > { > + if (!migration_has_main_and_multifd_channels()) { > + return false; > + } > + > MigrationIncomingState *mis = migration_incoming_get_current(); > - > - if (!mis->from_src_file) { > + if (migrate_postcopy_preempt() && !mis->postcopy_qemufile_dst) { > return false; > } > > - if (migrate_multifd()) { > - return multifd_recv_all_channels_created(); > - } > - > - if (migrate_postcopy_preempt()) { > - return mis->postcopy_qemufile_dst != NULL; > - } > - > return true; > } > > @@ -1482,6 +1486,8 @@ static void migrate_fd_cleanup(MigrationState *s) > > assert(!migration_is_active()); > > + file_cleanup_outgoing_migration(); > + socket_cleanup_outgoing_migration(); > if (s->state == MIGRATION_STATUS_CANCELLING) { > migrate_set_state(&s->state, MIGRATION_STATUS_CANCELLING, > MIGRATION_STATUS_CANCELLED); > @@ -3373,8 +3379,11 @@ static MigIterateState > migration_iteration_run(MigrationState *s) > } > > /* Still a significant amount to transfer */ > - if (!in_postcopy && must_precopy <= s->threshold_size && can_switchover > && > - qatomic_read(&s->start_postcopy)) { > + if (!in_postcopy && must_precopy <= s->threshold_size > + && can_switchover && qatomic_read(&s->start_postcopy)) { > + if (migrate_multifd()) { > + multifd_send_shutdown(); > + } > if (postcopy_start(s, &local_err)) { > migrate_set_error(s, local_err); > error_report_err(local_err); > diff --git a/migration/multifd-nocomp.c b/migration/multifd-nocomp.c > index 1325dba97c..d0edec7cd1 100644 > --- a/migration/multifd-nocomp.c > +++ b/migration/multifd-nocomp.c > @@ -16,6 +16,7 @@ > #include "file.h" > #include "multifd.h" > #include "options.h" > +#include "migration.h" > #include "qapi/error.h" > #include "qemu/cutils.h" > #include "qemu/error-report.h" > @@ -391,7 +392,7 @@ int multifd_ram_flush_and_sync(QEMUFile *f) > MultiFDSyncReq req; > int ret; > > - if (!migrate_multifd()) { > + if (!migrate_multifd() || migration_in_postcopy()) { > return 0; > } > > diff --git a/migration/multifd.c b/migration/multifd.c > index 97ce831775..fa83a43778 100644 > --- a/migration/multifd.c > +++ b/migration/multifd.c > @@ -467,8 +467,6 @@ static bool > multifd_send_cleanup_channel(MultiFDSendParams *p, Error **errp) > > static void multifd_send_cleanup_state(void) > { > - file_cleanup_outgoing_migration(); > - socket_cleanup_outgoing_migration(); > qemu_sem_destroy(&multifd_send_state->channels_created); > qemu_sem_destroy(&multifd_send_state->channels_ready); > g_free(multifd_send_state->params); > @@ -481,7 +479,7 @@ void multifd_send_shutdown(void) > { > int i; > > - if (!migrate_multifd()) { > + if (!migrate_multifd() || !multifd_send_state) { > return; > } > > diff --git a/migration/options.c b/migration/options.c > index 4db340b502..c4dfe89edd 100644 > --- a/migration/options.c > +++ b/migration/options.c > @@ -480,11 +480,6 @@ bool migrate_caps_check(bool *old_caps, bool *new_caps, > Error **errp) > error_setg(errp, "Postcopy is not compatible with > ignore-shared"); > return false; > } > - > - if (new_caps[MIGRATION_CAPABILITY_MULTIFD]) { > - error_setg(errp, "Postcopy is not yet compatible with multifd"); > - return false; > - } > } > > if (new_caps[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]) { > diff --git a/migration/ram.c b/migration/ram.c > index 6f460fd22d..8f22745aba 100644 > --- a/migration/ram.c > +++ b/migration/ram.c > @@ -1297,7 +1297,7 @@ static int find_dirty_block(RAMState *rs, > PageSearchStatus *pss) > pss->page = 0; > pss->block = QLIST_NEXT_RCU(pss->block, next); > if (!pss->block) { > - if (multifd_ram_sync_per_round()) { > + if (multifd_ram_sync_per_round() && !migration_in_postcopy()) { > QEMUFile *f = rs->pss[RAM_CHANNEL_PRECOPY].pss_channel; > int ret = multifd_ram_flush_and_sync(f); > if (ret < 0) { > @@ -1971,9 +1971,8 @@ static int ram_save_target_page(RAMState *rs, > PageSearchStatus *pss) > } > } > > - if (migrate_multifd()) { > - RAMBlock *block = pss->block; > - return ram_save_multifd_page(block, offset); > + if (migrate_multifd() && !migration_in_postcopy()) { > + return ram_save_multifd_page(pss->block, offset); > } > > if (control_save_page(pss, offset, &res)) { > -- > 2.48.1 > >