On Tue, Oct 08, 2019 at 05:40:46PM +0100, Dr. David Alan Gilbert wrote: >* Wei Yang (richardw.y...@linux.intel.com) wrote: >> Currently, we set PostcopyState blindly to RUNNING, even we found the >> previous state is not LISTENING. This will lead to a corner case. >> >> First let's look at the code flow: >> >> qemu_loadvm_state_main() >> ret = loadvm_process_command() >> loadvm_postcopy_handle_run() >> return -1; >> if (ret < 0) { >> if (postcopy_state_get() == POSTCOPY_INCOMING_RUNNING) >> ... >> } >> >> From above snippet, the corner case is loadvm_postcopy_handle_run() >> always sets state to RUNNING. And then it checks the previous state. If >> the previous state is not LISTENING, it will return -1. But at this >> moment, PostcopyState is already been set to RUNNING. >> >> Then ret is checked in qemu_loadvm_state_main(), when it is -1 >> PostcopyState is checked. Current logic would pause postcopy and retry >> if PostcopyState is RUNNING. This is not what we expect, because >> postcopy is not active yet. >> >> This patch makes sure state is set to RUNNING only previous state is >> LISTENING by introducing an old_state parameter in postcopy_state_set(). >> New state only would be set when current state equals to old_state. >> >> Signed-off-by: Wei Yang <richardw.y...@linux.intel.com> > >OK, it's a shame to use a pointer there, but it works.
You mean second parameter of postcopy_state_set()? I don't have a better idea. Or we introduce a new state POSTCOPY_INCOMING_NOCHECK. Do you feel better with this? >Note, something else; using '-1' as the return value and checking for it >is something we do a lot; but in this case it's an example of an error >we could never recover from so it never makes sense to try and recover. >We should probably look at different types of error. > > >Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com> > >Dave > >> --- >> migration/migration.c | 2 +- >> migration/postcopy-ram.c | 13 +++++++++---- >> migration/postcopy-ram.h | 3 ++- >> migration/savevm.c | 11 ++++++----- >> 4 files changed, 18 insertions(+), 11 deletions(-) >> >> diff --git a/migration/migration.c b/migration/migration.c >> index 34d5e66f06..369cf3826e 100644 >> --- a/migration/migration.c >> +++ b/migration/migration.c >> @@ -447,7 +447,7 @@ static void process_incoming_migration_co(void *opaque) >> assert(mis->from_src_file); >> mis->migration_incoming_co = qemu_coroutine_self(); >> mis->largest_page_size = qemu_ram_pagesize_largest(); >> - postcopy_state_set(POSTCOPY_INCOMING_NONE); >> + postcopy_state_set(POSTCOPY_INCOMING_NONE, NULL); >> migrate_set_state(&mis->state, MIGRATION_STATUS_NONE, >> MIGRATION_STATUS_ACTIVE); >> ret = qemu_loadvm_state(mis->from_src_file); >> diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c >> index b24c4a10c2..8f741d636d 100644 >> --- a/migration/postcopy-ram.c >> +++ b/migration/postcopy-ram.c >> @@ -577,7 +577,7 @@ int postcopy_ram_incoming_cleanup(MigrationIncomingState >> *mis) >> } >> } >> >> - postcopy_state_set(POSTCOPY_INCOMING_END); >> + postcopy_state_set(POSTCOPY_INCOMING_END, NULL); >> >> if (mis->postcopy_tmp_page) { >> munmap(mis->postcopy_tmp_page, mis->largest_page_size); >> @@ -626,7 +626,7 @@ int postcopy_ram_prepare_discard(MigrationIncomingState >> *mis) >> return -1; >> } >> >> - postcopy_state_set(POSTCOPY_INCOMING_DISCARD); >> + postcopy_state_set(POSTCOPY_INCOMING_DISCARD, NULL); >> >> return 0; >> } >> @@ -1457,9 +1457,14 @@ PostcopyState postcopy_state_get(void) >> } >> >> /* Set the state and return the old state */ >> -PostcopyState postcopy_state_set(PostcopyState new_state) >> +PostcopyState postcopy_state_set(PostcopyState new_state, >> + const PostcopyState *old_state) >> { >> - return atomic_xchg(&incoming_postcopy_state, new_state); >> + if (!old_state) { >> + return atomic_xchg(&incoming_postcopy_state, new_state); >> + } else { >> + return atomic_cmpxchg(&incoming_postcopy_state, *old_state, >> new_state); >> + } >> } >> >> /* Register a handler for external shared memory postcopy >> diff --git a/migration/postcopy-ram.h b/migration/postcopy-ram.h >> index d2668cc820..e3dde32155 100644 >> --- a/migration/postcopy-ram.h >> +++ b/migration/postcopy-ram.h >> @@ -109,7 +109,8 @@ void *postcopy_get_tmp_page(MigrationIncomingState *mis); >> >> PostcopyState postcopy_state_get(void); >> /* Set the state and return the old state */ >> -PostcopyState postcopy_state_set(PostcopyState new_state); >> +PostcopyState postcopy_state_set(PostcopyState new_state, >> + const PostcopyState *old_state); >> >> void postcopy_fault_thread_notify(MigrationIncomingState *mis); >> >> diff --git a/migration/savevm.c b/migration/savevm.c >> index f3292eb003..45474d9c95 100644 >> --- a/migration/savevm.c >> +++ b/migration/savevm.c >> @@ -1599,7 +1599,7 @@ enum LoadVMExitCodes { >> static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis, >> uint16_t len) >> { >> - PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_ADVISE); >> + PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_ADVISE, NULL); >> uint64_t remote_pagesize_summary, local_pagesize_summary, remote_tps; >> Error *local_err = NULL; >> >> @@ -1628,7 +1628,7 @@ static int >> loadvm_postcopy_handle_advise(MigrationIncomingState *mis, >> } >> >> if (!postcopy_ram_supported_by_host(mis)) { >> - postcopy_state_set(POSTCOPY_INCOMING_NONE); >> + postcopy_state_set(POSTCOPY_INCOMING_NONE, NULL); >> return -1; >> } >> >> @@ -1841,7 +1841,7 @@ static void *postcopy_ram_listen_thread(void *opaque) >> /* After this message we must be able to immediately receive postcopy data >> */ >> static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis) >> { >> - PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_LISTENING); >> + PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_LISTENING, >> NULL); >> trace_loadvm_postcopy_handle_listen(); >> Error *local_err = NULL; >> >> @@ -1929,10 +1929,11 @@ static void loadvm_postcopy_handle_run_bh(void >> *opaque) >> /* After all discards we can start running and asking for pages */ >> static int loadvm_postcopy_handle_run(MigrationIncomingState *mis) >> { >> - PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_RUNNING); >> + PostcopyState old_ps = POSTCOPY_INCOMING_LISTENING; >> + PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_RUNNING, >> &old_ps); >> >> trace_loadvm_postcopy_handle_run(); >> - if (ps != POSTCOPY_INCOMING_LISTENING) { >> + if (ps != old_ps) { >> error_report("CMD_POSTCOPY_RUN in wrong postcopy state (%d)", ps); >> return -1; >> } >> -- >> 2.17.1 >> >-- >Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK -- Wei Yang Help you, Help me