Peter Xu <pet...@redhat.com> writes:

> On Tue, Feb 11, 2025 at 12:01:35PM -0300, Fabiano Rosas wrote:
>> The qmp_migrate_cancel() command is poorly tested and code inspection
>> reveals that there might be concurrency issues with its usage. Add a
>> test that runs a migration and calls qmp_migrate_cancel() at specific
>> moments.
>> 
>> In order to make the test more deterministic, instead of calling
>> qmp_migrate_cancel() at random moments during migration, do it after
>> the migration status change events are seen.
>> 
>> The expected result is that qmp_migrate_cancel() on the source ends
>> migration on the source with the "cancelled" state and ends migration
>> on the destination with the "failed" state. The only exception is that
>> a failed migration should continue in the failed state.
>> 
>> Cancelling is not allowed during postcopy (no test is added for this
>> because it's a trivial check in the code).
>> 
>> Signed-off-by: Fabiano Rosas <faro...@suse.de>
>> ---
>>  tests/qtest/migration/precopy-tests.c | 176 ++++++++++++++++++++++++++
>>  1 file changed, 176 insertions(+)
>> 
>> diff --git a/tests/qtest/migration/precopy-tests.c 
>> b/tests/qtest/migration/precopy-tests.c
>> index 162fa69531..ba273d10b9 100644
>> --- a/tests/qtest/migration/precopy-tests.c
>> +++ b/tests/qtest/migration/precopy-tests.c
>> @@ -20,6 +20,7 @@
>>  #include "migration/migration-util.h"
>>  #include "ppc-util.h"
>>  #include "qobject/qlist.h"
>> +#include "qapi-types-migration.h"
>>  #include "qemu/module.h"
>>  #include "qemu/option.h"
>>  #include "qemu/range.h"
>> @@ -536,6 +537,161 @@ static void test_multifd_tcp_cancel(void)
>>      migrate_end(from, to2, true);
>>  }
>>  
>> +static void test_cancel_src_after_failed(QTestState *from, QTestState *to,
>> +                                         const char *uri, const char *phase)
>> +{
>> +    /*
>> +     * No migrate_incoming_qmp() at the start to force source into
>> +     * failed state during migrate_qmp().
>> +     */
>> +
>> +    wait_for_serial("src_serial");
>> +    migrate_ensure_converge(from);
>> +
>> +    migrate_qmp(from, to, uri, NULL, "{}");
>> +
>> +    migration_event_wait(from, phase);
>> +    migrate_cancel(from);
>> +
>> +    /* cancelling will not move the migration out of 'failed' */
>> +
>> +    wait_for_migration_status(from, "failed",
>> +                              (const char * []) { "completed", NULL });
>> +
>> +    /*
>> +     * Not waiting for the destination because it never started
>> +     * migration.
>> +     */
>> +}
>> +
>> +static void test_cancel_src_after_cancelled(QTestState *from, QTestState 
>> *to,
>> +                                            const char *uri, const char 
>> *phase)
>> +{
>> +    migrate_incoming_qmp(to, uri, NULL, "{ 'exit-on-error': false }");
>> +
>> +    wait_for_serial("src_serial");
>> +    migrate_ensure_converge(from);
>> +
>> +    migrate_qmp(from, to, uri, NULL, "{}");
>> +
>> +    /* To move to cancelled/cancelling */
>> +    migrate_cancel(from);
>> +    migration_event_wait(from, phase);
>> +
>> +    /* The migrate_cancel under test */
>> +    migrate_cancel(from);
>> +
>> +    wait_for_migration_status(from, "cancelled",
>> +                              (const char * []) { "completed", NULL });
>> +
>> +    wait_for_migration_status(to, "failed",
>> +                              (const char * []) { "completed", NULL });
>> +}
>> +
>> +static void test_cancel_src_after_complete(QTestState *from, QTestState *to,
>> +                                           const char *uri, const char 
>> *phase)
>> +{
>> +    migrate_incoming_qmp(to, uri, NULL, "{ 'exit-on-error': false }");
>> +
>> +    wait_for_serial("src_serial");
>> +    migrate_ensure_converge(from);
>> +
>> +    migrate_qmp(from, to, uri, NULL, "{}");
>> +
>> +    migration_event_wait(from, phase);
>> +    migrate_cancel(from);
>> +
>> +    /*
>> +     * qmp_migrate_cancel() exits early if migration is not running
>> +     * anymore, the status will not change to cancelled.
>> +     */
>> +    wait_for_migration_complete(from);
>> +    wait_for_migration_complete(to);
>> +}
>> +
>> +static void test_cancel_src_after_none(QTestState *from, QTestState *to,
>> +                                       const char *uri, const char *phase)
>> +{
>> +    /*
>> +     * Test that cancelling without a migration happening does not
>> +     * affect subsequent migrations
>> +     */
>> +    migrate_cancel(to);
>> +
>> +    wait_for_serial("src_serial");
>> +    migrate_cancel(from);
>> +
>> +    migrate_incoming_qmp(to, uri, NULL, "{ 'exit-on-error': false }");
>> +
>> +    migrate_ensure_converge(from);
>> +    migrate_qmp(from, to, uri, NULL, "{}");
>> +
>> +    wait_for_migration_complete(from);
>> +    wait_for_migration_complete(to);
>> +}
>> +
>> +static void test_cancel_src_pre_switchover(QTestState *from, QTestState *to,
>> +                                           const char *uri, const char 
>> *phase)
>> +{
>> +    migrate_set_capability(from, "pause-before-switchover", true);
>> +    migrate_set_capability(to, "pause-before-switchover", true);
>> +
>> +    migrate_set_capability(from, "multifd", true);
>> +    migrate_set_capability(to, "multifd", true);
>> +
>> +    migrate_incoming_qmp(to, uri, NULL, "{ 'exit-on-error': false }");
>> +
>> +    wait_for_serial("src_serial");
>> +    migrate_ensure_converge(from);
>> +
>> +    migrate_qmp(from, to, uri, NULL, "{}");
>> +
>> +    migration_event_wait(from, phase);
>> +    migrate_cancel(from);
>> +    migration_event_wait(from, "cancelling");
>> +
>> +    wait_for_migration_status(from, "cancelled",
>> +                              (const char * []) { "completed", NULL });
>> +
>> +    wait_for_migration_status(to, "failed",
>> +                              (const char * []) { "completed", NULL });
>> +}
>> +
>> +static void test_cancel_src_after_status(void *opaque)
>> +{
>> +    const char *test_path = opaque;
>> +    g_autofree char *phase = g_path_get_basename(test_path);
>> +    g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs);
>> +    QTestState *from, *to;
>> +    MigrateStart args = {
>> +        .hide_stderr = true,
>> +    };
>> +
>> +    if (migrate_start(&from, &to, "defer", &args)) {
>> +        return;
>> +    }
>> +
>> +    if (g_str_equal(phase, "cancelling") ||
>> +        g_str_equal(phase, "cancelled")) {
>> +        test_cancel_src_after_cancelled(from, to, uri, phase);
>> +
>> +    } else if (g_str_equal(phase, "completed")) {
>> +        test_cancel_src_after_complete(from, to, uri, phase);
>> +
>> +    } else if (g_str_equal(phase, "failed")) {
>> +        test_cancel_src_after_failed(from, to, uri, phase);
>> +
>> +    } else if (g_str_equal(phase, "none")) {
>> +        test_cancel_src_after_none(from, to, uri, phase);
>> +
>> +    } else {
>> +        /* any state that comes before pre-switchover */
>> +        test_cancel_src_pre_switchover(from, to, uri, phase);
>
> [1]
>
>> +    }
>> +
>> +    migrate_end(from, to, false);
>> +}
>
> I'm OK with the current status, considering it at least enlarge our cancel
> testcases so definitely good to have:
>
> Reviewed-by: Peter Xu <pet...@redhat.com>
>
> Though one thing to mention is the new "test_full()" abstraction doesn't
> yet look like to benefit us a huge lot, IMHO.
>
> These are the new tests:
>
> # /x86_64/migration/cancel/src/after/none
> # /x86_64/migration/cancel/src/after/setup             [*]
> # /x86_64/migration/cancel/src/after/cancelling
> # /x86_64/migration/cancel/src/after/cancelled
> # /x86_64/migration/cancel/src/after/active
> # /x86_64/migration/cancel/src/after/completed
> # /x86_64/migration/cancel/src/after/failed
> # /x86_64/migration/cancel/src/after/pre-switchover    [*]
>
> We have only one abstracted path [1] to test random status, but that so far
> only covers two cases marked with [*].  It is hard to say whether the
> abstraction is necessary, or maybe it's easier we always register separate
> test cases.  So it's still slightly debatable whether we could make all
> above "if .. if else .. if else ... else" into separate tests.
>

It gets super boilerplatey:


    for (int i = MIGRATION_STATUS_NONE; i < MIGRATION_STATUS__MAX; i++) {
        switch (i) {
        case MIGRATION_STATUS_DEVICE:          /* happens too fast */
        case MIGRATION_STATUS_WAIT_UNPLUG:     /* no support in tests */
        case MIGRATION_STATUS_COLO:            /* no support in tests */
        case MIGRATION_STATUS_POSTCOPY_ACTIVE: /* postcopy can't be cancelled */
        case MIGRATION_STATUS_POSTCOPY_PAUSED:
        case MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP:
        case MIGRATION_STATUS_POSTCOPY_RECOVER:
            continue;
        case MIGRATION_STATUS_NONE:
            migration_test_add("/migration/cancel/src/after/none",
                               test_cancel_src_after_none);
            break;
        case MIGRATION_STATUS_SETUP:
            migration_test_add("/migration/cancel/src/after/setup",
                               test_cancel_src_after_setup);
            break;
        case MIGRATION_STATUS_CANCELLING:
            migration_test_add("/migration/cancel/src/after/cancelling",
                               test_cancel_src_after_cancelling);
            break;
        case MIGRATION_STATUS_CANCELLED:
            migration_test_add("/migration/cancel/src/after/cancelled",
                               test_cancel_src_after_cancelled);
            break;
        case MIGRATION_STATUS_ACTIVE:
            migration_test_add("/migration/cancel/src/after/active",
                               test_cancel_src_after_active);
            break;
        case MIGRATION_STATUS_COMPLETED:
            migration_test_add("/migration/cancel/src/after/completed",
                               test_cancel_src_after_completed);
            break;
        case MIGRATION_STATUS_FAILED:
            migration_test_add("/migration/cancel/src/after/failed",
                               test_cancel_src_after_failed);
            break;
        case MIGRATION_STATUS_PRE_SWITCHOVER:
            migration_test_add("/migration/cancel/src/after/pre-switchover",
                               test_cancel_src_after_pre_switchover);
            break;
        }
    }

}

void test_cancel_src_after_cancelling(void)
{
    test_cancel_src_after_cancel("cancelling");
}

void test_cancel_src_after_cancelled(void)
{
    test_cancel_src_after_cancel("cancelled");
}

void test_cancel_src_after_setup(void)
{
    test_cancel_src_after("setup");
}

void test_cancel_src_after_active(void)
{
    test_cancel_src_after("active");
}

void test_cancel_src_after_pre_switchover(void)
{
    test_cancel_src_after("pre-switchover");
}

static void test_cancel_src_after_failed(void)
{
    ...
    migration_event_wait(from, "failed");
    ...
}

static void test_cancel_src_after_cancel(const char *phase)
{
    ...    
    migration_event_wait(from, phase);
    ...
}

static void test_cancel_src_after_complete(void)
{
    migration_event_wait(from, "complete");
    ...
}

static void test_cancel_src_after_none(void)
{
    ...
}

static void test_cancel_src_after(const char *phase)
{
   ...
   migration_event_wait(from, phase);
   ...
}

Reply via email to