Peter Xu <pet...@redhat.com> writes: > On Tue, Feb 11, 2025 at 12:01:35PM -0300, Fabiano Rosas wrote: >> The qmp_migrate_cancel() command is poorly tested and code inspection >> reveals that there might be concurrency issues with its usage. Add a >> test that runs a migration and calls qmp_migrate_cancel() at specific >> moments. >> >> In order to make the test more deterministic, instead of calling >> qmp_migrate_cancel() at random moments during migration, do it after >> the migration status change events are seen. >> >> The expected result is that qmp_migrate_cancel() on the source ends >> migration on the source with the "cancelled" state and ends migration >> on the destination with the "failed" state. The only exception is that >> a failed migration should continue in the failed state. >> >> Cancelling is not allowed during postcopy (no test is added for this >> because it's a trivial check in the code). >> >> Signed-off-by: Fabiano Rosas <faro...@suse.de> >> --- >> tests/qtest/migration/precopy-tests.c | 176 ++++++++++++++++++++++++++ >> 1 file changed, 176 insertions(+) >> >> diff --git a/tests/qtest/migration/precopy-tests.c >> b/tests/qtest/migration/precopy-tests.c >> index 162fa69531..ba273d10b9 100644 >> --- a/tests/qtest/migration/precopy-tests.c >> +++ b/tests/qtest/migration/precopy-tests.c >> @@ -20,6 +20,7 @@ >> #include "migration/migration-util.h" >> #include "ppc-util.h" >> #include "qobject/qlist.h" >> +#include "qapi-types-migration.h" >> #include "qemu/module.h" >> #include "qemu/option.h" >> #include "qemu/range.h" >> @@ -536,6 +537,161 @@ static void test_multifd_tcp_cancel(void) >> migrate_end(from, to2, true); >> } >> >> +static void test_cancel_src_after_failed(QTestState *from, QTestState *to, >> + const char *uri, const char *phase) >> +{ >> + /* >> + * No migrate_incoming_qmp() at the start to force source into >> + * failed state during migrate_qmp(). >> + */ >> + >> + wait_for_serial("src_serial"); >> + migrate_ensure_converge(from); >> + >> + migrate_qmp(from, to, uri, NULL, "{}"); >> + >> + migration_event_wait(from, phase); >> + migrate_cancel(from); >> + >> + /* cancelling will not move the migration out of 'failed' */ >> + >> + wait_for_migration_status(from, "failed", >> + (const char * []) { "completed", NULL }); >> + >> + /* >> + * Not waiting for the destination because it never started >> + * migration. >> + */ >> +} >> + >> +static void test_cancel_src_after_cancelled(QTestState *from, QTestState >> *to, >> + const char *uri, const char >> *phase) >> +{ >> + migrate_incoming_qmp(to, uri, NULL, "{ 'exit-on-error': false }"); >> + >> + wait_for_serial("src_serial"); >> + migrate_ensure_converge(from); >> + >> + migrate_qmp(from, to, uri, NULL, "{}"); >> + >> + /* To move to cancelled/cancelling */ >> + migrate_cancel(from); >> + migration_event_wait(from, phase); >> + >> + /* The migrate_cancel under test */ >> + migrate_cancel(from); >> + >> + wait_for_migration_status(from, "cancelled", >> + (const char * []) { "completed", NULL }); >> + >> + wait_for_migration_status(to, "failed", >> + (const char * []) { "completed", NULL }); >> +} >> + >> +static void test_cancel_src_after_complete(QTestState *from, QTestState *to, >> + const char *uri, const char >> *phase) >> +{ >> + migrate_incoming_qmp(to, uri, NULL, "{ 'exit-on-error': false }"); >> + >> + wait_for_serial("src_serial"); >> + migrate_ensure_converge(from); >> + >> + migrate_qmp(from, to, uri, NULL, "{}"); >> + >> + migration_event_wait(from, phase); >> + migrate_cancel(from); >> + >> + /* >> + * qmp_migrate_cancel() exits early if migration is not running >> + * anymore, the status will not change to cancelled. >> + */ >> + wait_for_migration_complete(from); >> + wait_for_migration_complete(to); >> +} >> + >> +static void test_cancel_src_after_none(QTestState *from, QTestState *to, >> + const char *uri, const char *phase) >> +{ >> + /* >> + * Test that cancelling without a migration happening does not >> + * affect subsequent migrations >> + */ >> + migrate_cancel(to); >> + >> + wait_for_serial("src_serial"); >> + migrate_cancel(from); >> + >> + migrate_incoming_qmp(to, uri, NULL, "{ 'exit-on-error': false }"); >> + >> + migrate_ensure_converge(from); >> + migrate_qmp(from, to, uri, NULL, "{}"); >> + >> + wait_for_migration_complete(from); >> + wait_for_migration_complete(to); >> +} >> + >> +static void test_cancel_src_pre_switchover(QTestState *from, QTestState *to, >> + const char *uri, const char >> *phase) >> +{ >> + migrate_set_capability(from, "pause-before-switchover", true); >> + migrate_set_capability(to, "pause-before-switchover", true); >> + >> + migrate_set_capability(from, "multifd", true); >> + migrate_set_capability(to, "multifd", true); >> + >> + migrate_incoming_qmp(to, uri, NULL, "{ 'exit-on-error': false }"); >> + >> + wait_for_serial("src_serial"); >> + migrate_ensure_converge(from); >> + >> + migrate_qmp(from, to, uri, NULL, "{}"); >> + >> + migration_event_wait(from, phase); >> + migrate_cancel(from); >> + migration_event_wait(from, "cancelling"); >> + >> + wait_for_migration_status(from, "cancelled", >> + (const char * []) { "completed", NULL }); >> + >> + wait_for_migration_status(to, "failed", >> + (const char * []) { "completed", NULL }); >> +} >> + >> +static void test_cancel_src_after_status(void *opaque) >> +{ >> + const char *test_path = opaque; >> + g_autofree char *phase = g_path_get_basename(test_path); >> + g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs); >> + QTestState *from, *to; >> + MigrateStart args = { >> + .hide_stderr = true, >> + }; >> + >> + if (migrate_start(&from, &to, "defer", &args)) { >> + return; >> + } >> + >> + if (g_str_equal(phase, "cancelling") || >> + g_str_equal(phase, "cancelled")) { >> + test_cancel_src_after_cancelled(from, to, uri, phase); >> + >> + } else if (g_str_equal(phase, "completed")) { >> + test_cancel_src_after_complete(from, to, uri, phase); >> + >> + } else if (g_str_equal(phase, "failed")) { >> + test_cancel_src_after_failed(from, to, uri, phase); >> + >> + } else if (g_str_equal(phase, "none")) { >> + test_cancel_src_after_none(from, to, uri, phase); >> + >> + } else { >> + /* any state that comes before pre-switchover */ >> + test_cancel_src_pre_switchover(from, to, uri, phase); > > [1] > >> + } >> + >> + migrate_end(from, to, false); >> +} > > I'm OK with the current status, considering it at least enlarge our cancel > testcases so definitely good to have: > > Reviewed-by: Peter Xu <pet...@redhat.com> > > Though one thing to mention is the new "test_full()" abstraction doesn't > yet look like to benefit us a huge lot, IMHO. > > These are the new tests: > > # /x86_64/migration/cancel/src/after/none > # /x86_64/migration/cancel/src/after/setup [*] > # /x86_64/migration/cancel/src/after/cancelling > # /x86_64/migration/cancel/src/after/cancelled > # /x86_64/migration/cancel/src/after/active > # /x86_64/migration/cancel/src/after/completed > # /x86_64/migration/cancel/src/after/failed > # /x86_64/migration/cancel/src/after/pre-switchover [*] > > We have only one abstracted path [1] to test random status, but that so far > only covers two cases marked with [*]. It is hard to say whether the > abstraction is necessary, or maybe it's easier we always register separate > test cases. So it's still slightly debatable whether we could make all > above "if .. if else .. if else ... else" into separate tests. >
It gets super boilerplatey: for (int i = MIGRATION_STATUS_NONE; i < MIGRATION_STATUS__MAX; i++) { switch (i) { case MIGRATION_STATUS_DEVICE: /* happens too fast */ case MIGRATION_STATUS_WAIT_UNPLUG: /* no support in tests */ case MIGRATION_STATUS_COLO: /* no support in tests */ case MIGRATION_STATUS_POSTCOPY_ACTIVE: /* postcopy can't be cancelled */ case MIGRATION_STATUS_POSTCOPY_PAUSED: case MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP: case MIGRATION_STATUS_POSTCOPY_RECOVER: continue; case MIGRATION_STATUS_NONE: migration_test_add("/migration/cancel/src/after/none", test_cancel_src_after_none); break; case MIGRATION_STATUS_SETUP: migration_test_add("/migration/cancel/src/after/setup", test_cancel_src_after_setup); break; case MIGRATION_STATUS_CANCELLING: migration_test_add("/migration/cancel/src/after/cancelling", test_cancel_src_after_cancelling); break; case MIGRATION_STATUS_CANCELLED: migration_test_add("/migration/cancel/src/after/cancelled", test_cancel_src_after_cancelled); break; case MIGRATION_STATUS_ACTIVE: migration_test_add("/migration/cancel/src/after/active", test_cancel_src_after_active); break; case MIGRATION_STATUS_COMPLETED: migration_test_add("/migration/cancel/src/after/completed", test_cancel_src_after_completed); break; case MIGRATION_STATUS_FAILED: migration_test_add("/migration/cancel/src/after/failed", test_cancel_src_after_failed); break; case MIGRATION_STATUS_PRE_SWITCHOVER: migration_test_add("/migration/cancel/src/after/pre-switchover", test_cancel_src_after_pre_switchover); break; } } } void test_cancel_src_after_cancelling(void) { test_cancel_src_after_cancel("cancelling"); } void test_cancel_src_after_cancelled(void) { test_cancel_src_after_cancel("cancelled"); } void test_cancel_src_after_setup(void) { test_cancel_src_after("setup"); } void test_cancel_src_after_active(void) { test_cancel_src_after("active"); } void test_cancel_src_after_pre_switchover(void) { test_cancel_src_after("pre-switchover"); } static void test_cancel_src_after_failed(void) { ... migration_event_wait(from, "failed"); ... } static void test_cancel_src_after_cancel(const char *phase) { ... migration_event_wait(from, phase); ... } static void test_cancel_src_after_complete(void) { migration_event_wait(from, "complete"); ... } static void test_cancel_src_after_none(void) { ... } static void test_cancel_src_after(const char *phase) { ... migration_event_wait(from, phase); ... }