We're currently changing the way the source multifd migration handles the shutdown of the multifd channels when TLS is in use to perform a clean termination by calling gnutls_bye().
Older src QEMUs will always close the channel without terminating the TLS session. New dst QEMUs treat an unclean termination as an error. Add multifd_clean_tls_termination (default true) that can be switched on the destination whenever a src QEMU <= 9.2 is in use. (Note that the compat property is only strictly necessary for src QEMUs older than 9.1. Due to synchronization coincidences, src QEMUs 9.1 and 9.2 can put the destination in a condition where it doesn't see the unclean termination. Still, make the property more inclusive to facilitate potential backports.) Signed-off-by: Fabiano Rosas <faro...@suse.de> --- hw/core/machine.c | 1 + migration/migration.h | 33 +++++++++++++++++++++++++++++++++ migration/multifd.c | 15 +++++++++++++-- migration/multifd.h | 2 ++ migration/options.c | 2 ++ 5 files changed, 51 insertions(+), 2 deletions(-) diff --git a/hw/core/machine.c b/hw/core/machine.c index 254cc20c4c..02cff735b3 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -42,6 +42,7 @@ GlobalProperty hw_compat_9_2[] = { { "virtio-balloon-pci-transitional", "vectors", "0" }, { "virtio-balloon-pci-non-transitional", "vectors", "0" }, { "virtio-mem-pci", "vectors", "0" }, + { "migration", "multifd-clean-tls-termination", "false" }, }; const size_t hw_compat_9_2_len = G_N_ELEMENTS(hw_compat_9_2); diff --git a/migration/migration.h b/migration/migration.h index 4c1fafc2b5..77def0b437 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -443,6 +443,39 @@ struct MigrationState { * Default value is false. (since 8.1) */ bool multifd_flush_after_each_section; + + /* + * This variable only makes sense when set on the machine that is + * the destination of a multifd migration with TLS enabled. It + * affects the behavior of the last send->recv iteration with + * regards to termination of the TLS session. + * + * When set: + * + * - the destination QEMU instance can expect to never get a + * GNUTLS_E_PREMATURE_TERMINATION error. Manifested as the error + * message: "The TLS connection was non-properly terminated". + * + * When clear: + * + * - the destination QEMU instance can expect to see a + * GNUTLS_E_PREMATURE_TERMINATION error in any multifd channel + * whenever the last recv() call of that channel happens after + * the source QEMU instance has already issued shutdown() on the + * channel. + * + * Commit 637280aeb2 (since 9.1) introduced a side effect that + * causes the destination instance to not be affected by the + * premature termination, while commit 1d457daf86 (since 10.0) + * causes the premature termination condition to be once again + * reachable. + * + * NOTE: Regardless of the state of this option, a premature + * termination of the TLS connection might happen due to error at + * any moment prior to the last send->recv iteration. + */ + bool multifd_clean_tls_termination; + /* * This decides the size of guest memory chunk that will be used * to track dirty bitmap clearing. The size of memory chunk will diff --git a/migration/multifd.c b/migration/multifd.c index 0296758c08..8045197be8 100644 --- a/migration/multifd.c +++ b/migration/multifd.c @@ -1151,6 +1151,7 @@ void multifd_recv_sync_main(void) static void *multifd_recv_thread(void *opaque) { + MigrationState *s = migrate_get_current(); MultiFDRecvParams *p = opaque; Error *local_err = NULL; bool use_packets = multifd_use_packets(); @@ -1159,18 +1160,28 @@ static void *multifd_recv_thread(void *opaque) trace_multifd_recv_thread_start(p->id); rcu_register_thread(); + if (!s->multifd_clean_tls_termination) { + p->read_flags = QIO_CHANNEL_READ_FLAG_RELAXED_EOF; + } + while (true) { uint32_t flags = 0; bool has_data = false; p->normal_num = 0; + if (use_packets) { + struct iovec iov = { + .iov_base = (void *)p->packet, + .iov_len = p->packet_len + }; + if (multifd_recv_should_exit()) { break; } - ret = qio_channel_read_all_eof(p->c, (void *)p->packet, - p->packet_len, &local_err); + ret = qio_channel_readv_full_all_eof(p->c, &iov, 1, NULL, NULL, + p->read_flags, &local_err); if (!ret) { /* EOF */ assert(!local_err); diff --git a/migration/multifd.h b/migration/multifd.h index bd785b9873..cf408ff721 100644 --- a/migration/multifd.h +++ b/migration/multifd.h @@ -244,6 +244,8 @@ typedef struct { uint32_t zero_num; /* used for de-compression methods */ void *compress_data; + /* Flags for the QIOChannel */ + int read_flags; } MultiFDRecvParams; typedef struct { diff --git a/migration/options.c b/migration/options.c index 1ad950e397..feda354935 100644 --- a/migration/options.c +++ b/migration/options.c @@ -99,6 +99,8 @@ const Property migration_properties[] = { clear_bitmap_shift, CLEAR_BITMAP_SHIFT_DEFAULT), DEFINE_PROP_BOOL("x-preempt-pre-7-2", MigrationState, preempt_pre_7_2, false), + DEFINE_PROP_BOOL("multifd-clean-tls-termination", MigrationState, + multifd_clean_tls_termination, true), /* Migration parameters */ DEFINE_PROP_UINT8("x-throttle-trigger-threshold", MigrationState, -- 2.35.3