On Fri, Feb 07, 2025 at 11:27:58AM -0300, Fabiano Rosas wrote:
> We're currently changing the way the source multifd migration handles
> the shutdown of the multifd channels when TLS is in use to perform a
> clean termination by calling gnutls_bye().
> 
> Older src QEMUs will always close the channel without terminating the
> TLS session. New dst QEMUs treat an unclean termination as an
> error. Due to synchronization conditions, src QEMUs 9.1 and 9.2 are an
> exception and can put the destination in a condition where it ignores
> the unclean termination. For src QEMUs older than 9.1, we'll need a
> compat property on the destination to inform that the src does not
> terminate the TLS session.
> 
> Add multifd_clean_tls_termination (default true) that can be switched
> on the destination whenever a src QEMU <9.1 is in use.

Patch looks good.  Though did you forget to add the compat entry?

I suggest we add it for all pre-9.2, in case whoever backports the recent
changes so it re-exposes again in any distro stables.

IMHO it doesn't hurt us much to be always cautious on 9.1 and 9.2 too by
loosing the termination a bit.

> 
> Signed-off-by: Fabiano Rosas <faro...@suse.de>
> ---
>  migration/migration.h | 33 +++++++++++++++++++++++++++++++++
>  migration/multifd.c   |  8 +++++++-
>  migration/multifd.h   |  2 ++
>  migration/options.c   |  2 ++
>  4 files changed, 44 insertions(+), 1 deletion(-)
> 
> diff --git a/migration/migration.h b/migration/migration.h
> index 4c1fafc2b5..77def0b437 100644
> --- a/migration/migration.h
> +++ b/migration/migration.h
> @@ -443,6 +443,39 @@ struct MigrationState {
>       * Default value is false. (since 8.1)
>       */
>      bool multifd_flush_after_each_section;
> +
> +    /*
> +     * This variable only makes sense when set on the machine that is
> +     * the destination of a multifd migration with TLS enabled. It
> +     * affects the behavior of the last send->recv iteration with
> +     * regards to termination of the TLS session.
> +     *
> +     * When set:
> +     *
> +     * - the destination QEMU instance can expect to never get a
> +     *   GNUTLS_E_PREMATURE_TERMINATION error. Manifested as the error
> +     *   message: "The TLS connection was non-properly terminated".
> +     *
> +     * When clear:
> +     *
> +     * - the destination QEMU instance can expect to see a
> +     *   GNUTLS_E_PREMATURE_TERMINATION error in any multifd channel
> +     *   whenever the last recv() call of that channel happens after
> +     *   the source QEMU instance has already issued shutdown() on the
> +     *   channel.
> +     *
> +     *   Commit 637280aeb2 (since 9.1) introduced a side effect that
> +     *   causes the destination instance to not be affected by the
> +     *   premature termination, while commit 1d457daf86 (since 10.0)
> +     *   causes the premature termination condition to be once again
> +     *   reachable.
> +     *
> +     * NOTE: Regardless of the state of this option, a premature
> +     * termination of the TLS connection might happen due to error at
> +     * any moment prior to the last send->recv iteration.
> +     */
> +    bool multifd_clean_tls_termination;
> +
>      /*
>       * This decides the size of guest memory chunk that will be used
>       * to track dirty bitmap clearing.  The size of memory chunk will
> diff --git a/migration/multifd.c b/migration/multifd.c
> index b4f82b0893..4342399818 100644
> --- a/migration/multifd.c
> +++ b/migration/multifd.c
> @@ -1147,6 +1147,7 @@ void multifd_recv_sync_main(void)
>  
>  static void *multifd_recv_thread(void *opaque)
>  {
> +    MigrationState *s = migrate_get_current();
>      MultiFDRecvParams *p = opaque;
>      Error *local_err = NULL;
>      bool use_packets = multifd_use_packets();
> @@ -1155,6 +1156,10 @@ static void *multifd_recv_thread(void *opaque)
>      trace_multifd_recv_thread_start(p->id);
>      rcu_register_thread();
>  
> +    if (!s->multifd_clean_tls_termination) {
> +        p->read_flags = QIO_CHANNEL_READ_FLAG_RELAXED_EOF;
> +    }
> +
>      while (true) {
>          uint32_t flags = 0;
>          bool has_data = false;
> @@ -1166,7 +1171,8 @@ static void *multifd_recv_thread(void *opaque)
>              }
>  
>              ret = qio_channel_read_all_eof(p->c, (void *)p->packet,
> -                                           p->packet_len, 0, &local_err);
> +                                           p->packet_len, p->read_flags,
> +                                           &local_err);
>              if (!ret) {
>                  /* EOF */
>                  assert(!local_err);
> diff --git a/migration/multifd.h b/migration/multifd.h
> index bd785b9873..cf408ff721 100644
> --- a/migration/multifd.h
> +++ b/migration/multifd.h
> @@ -244,6 +244,8 @@ typedef struct {
>      uint32_t zero_num;
>      /* used for de-compression methods */
>      void *compress_data;
> +    /* Flags for the QIOChannel */
> +    int read_flags;
>  } MultiFDRecvParams;
>  
>  typedef struct {
> diff --git a/migration/options.c b/migration/options.c
> index 1ad950e397..feda354935 100644
> --- a/migration/options.c
> +++ b/migration/options.c
> @@ -99,6 +99,8 @@ const Property migration_properties[] = {
>                        clear_bitmap_shift, CLEAR_BITMAP_SHIFT_DEFAULT),
>      DEFINE_PROP_BOOL("x-preempt-pre-7-2", MigrationState,
>                       preempt_pre_7_2, false),
> +    DEFINE_PROP_BOOL("multifd-clean-tls-termination", MigrationState,
> +                     multifd_clean_tls_termination, true),
>  
>      /* Migration parameters */
>      DEFINE_PROP_UINT8("x-throttle-trigger-threshold", MigrationState,
> -- 
> 2.35.3
> 

-- 
Peter Xu


Reply via email to