Peter Xu <pet...@redhat.com> writes:

> On Fri, Feb 07, 2025 at 11:27:58AM -0300, Fabiano Rosas wrote:
>> We're currently changing the way the source multifd migration handles
>> the shutdown of the multifd channels when TLS is in use to perform a
>> clean termination by calling gnutls_bye().
>> 
>> Older src QEMUs will always close the channel without terminating the
>> TLS session. New dst QEMUs treat an unclean termination as an
>> error. Due to synchronization conditions, src QEMUs 9.1 and 9.2 are an
>> exception and can put the destination in a condition where it ignores
>> the unclean termination. For src QEMUs older than 9.1, we'll need a
>> compat property on the destination to inform that the src does not
>> terminate the TLS session.
>> 
>> Add multifd_clean_tls_termination (default true) that can be switched
>> on the destination whenever a src QEMU <9.1 is in use.
>
> Patch looks good.  Though did you forget to add the compat entry?
>

Indeed.

> I suggest we add it for all pre-9.2, in case whoever backports the recent
> changes so it re-exposes again in any distro stables.
>
> IMHO it doesn't hurt us much to be always cautious on 9.1 and 9.2 too by
> loosing the termination a bit.
>

Ok, I'll put it in hw_compat_9_2.

>> 
>> Signed-off-by: Fabiano Rosas <faro...@suse.de>
>> ---
>>  migration/migration.h | 33 +++++++++++++++++++++++++++++++++
>>  migration/multifd.c   |  8 +++++++-
>>  migration/multifd.h   |  2 ++
>>  migration/options.c   |  2 ++
>>  4 files changed, 44 insertions(+), 1 deletion(-)
>> 
>> diff --git a/migration/migration.h b/migration/migration.h
>> index 4c1fafc2b5..77def0b437 100644
>> --- a/migration/migration.h
>> +++ b/migration/migration.h
>> @@ -443,6 +443,39 @@ struct MigrationState {
>>       * Default value is false. (since 8.1)
>>       */
>>      bool multifd_flush_after_each_section;
>> +
>> +    /*
>> +     * This variable only makes sense when set on the machine that is
>> +     * the destination of a multifd migration with TLS enabled. It
>> +     * affects the behavior of the last send->recv iteration with
>> +     * regards to termination of the TLS session.
>> +     *
>> +     * When set:
>> +     *
>> +     * - the destination QEMU instance can expect to never get a
>> +     *   GNUTLS_E_PREMATURE_TERMINATION error. Manifested as the error
>> +     *   message: "The TLS connection was non-properly terminated".
>> +     *
>> +     * When clear:
>> +     *
>> +     * - the destination QEMU instance can expect to see a
>> +     *   GNUTLS_E_PREMATURE_TERMINATION error in any multifd channel
>> +     *   whenever the last recv() call of that channel happens after
>> +     *   the source QEMU instance has already issued shutdown() on the
>> +     *   channel.
>> +     *
>> +     *   Commit 637280aeb2 (since 9.1) introduced a side effect that
>> +     *   causes the destination instance to not be affected by the
>> +     *   premature termination, while commit 1d457daf86 (since 10.0)
>> +     *   causes the premature termination condition to be once again
>> +     *   reachable.
>> +     *
>> +     * NOTE: Regardless of the state of this option, a premature
>> +     * termination of the TLS connection might happen due to error at
>> +     * any moment prior to the last send->recv iteration.
>> +     */
>> +    bool multifd_clean_tls_termination;
>> +
>>      /*
>>       * This decides the size of guest memory chunk that will be used
>>       * to track dirty bitmap clearing.  The size of memory chunk will
>> diff --git a/migration/multifd.c b/migration/multifd.c
>> index b4f82b0893..4342399818 100644
>> --- a/migration/multifd.c
>> +++ b/migration/multifd.c
>> @@ -1147,6 +1147,7 @@ void multifd_recv_sync_main(void)
>>  
>>  static void *multifd_recv_thread(void *opaque)
>>  {
>> +    MigrationState *s = migrate_get_current();
>>      MultiFDRecvParams *p = opaque;
>>      Error *local_err = NULL;
>>      bool use_packets = multifd_use_packets();
>> @@ -1155,6 +1156,10 @@ static void *multifd_recv_thread(void *opaque)
>>      trace_multifd_recv_thread_start(p->id);
>>      rcu_register_thread();
>>  
>> +    if (!s->multifd_clean_tls_termination) {
>> +        p->read_flags = QIO_CHANNEL_READ_FLAG_RELAXED_EOF;
>> +    }
>> +
>>      while (true) {
>>          uint32_t flags = 0;
>>          bool has_data = false;
>> @@ -1166,7 +1171,8 @@ static void *multifd_recv_thread(void *opaque)
>>              }
>>  
>>              ret = qio_channel_read_all_eof(p->c, (void *)p->packet,
>> -                                           p->packet_len, 0, &local_err);
>> +                                           p->packet_len, p->read_flags,
>> +                                           &local_err);
>>              if (!ret) {
>>                  /* EOF */
>>                  assert(!local_err);
>> diff --git a/migration/multifd.h b/migration/multifd.h
>> index bd785b9873..cf408ff721 100644
>> --- a/migration/multifd.h
>> +++ b/migration/multifd.h
>> @@ -244,6 +244,8 @@ typedef struct {
>>      uint32_t zero_num;
>>      /* used for de-compression methods */
>>      void *compress_data;
>> +    /* Flags for the QIOChannel */
>> +    int read_flags;
>>  } MultiFDRecvParams;
>>  
>>  typedef struct {
>> diff --git a/migration/options.c b/migration/options.c
>> index 1ad950e397..feda354935 100644
>> --- a/migration/options.c
>> +++ b/migration/options.c
>> @@ -99,6 +99,8 @@ const Property migration_properties[] = {
>>                        clear_bitmap_shift, CLEAR_BITMAP_SHIFT_DEFAULT),
>>      DEFINE_PROP_BOOL("x-preempt-pre-7-2", MigrationState,
>>                       preempt_pre_7_2, false),
>> +    DEFINE_PROP_BOOL("multifd-clean-tls-termination", MigrationState,
>> +                     multifd_clean_tls_termination, true),
>>  
>>      /* Migration parameters */
>>      DEFINE_PROP_UINT8("x-throttle-trigger-threshold", MigrationState,
>> -- 
>> 2.35.3
>> 

Reply via email to