* zhanghailiang (zhang.zhanghaili...@huawei.com) wrote: > If users require SVM to takeover work, colo incoming thread should > exit from loop while failover BH helps backing to migration incoming > coroutine. > > Signed-off-by: zhanghailiang <zhang.zhanghaili...@huawei.com> > Signed-off-by: Li Zhijian <lizhij...@cn.fujitsu.com> > --- > migration/colo.c | 42 +++++++++++++++++++++++++++++++++++++++--- > 1 file changed, 39 insertions(+), 3 deletions(-) > > diff --git a/migration/colo.c b/migration/colo.c > index 7a42fc6..f31e957 100644 > --- a/migration/colo.c > +++ b/migration/colo.c > @@ -46,6 +46,33 @@ static bool colo_runstate_is_stopped(void) > return runstate_check(RUN_STATE_COLO) || !runstate_is_running(); > } > > +static void secondary_vm_do_failover(void) > +{ > + int old_state; > + MigrationIncomingState *mis = migration_incoming_get_current(); > + > + migrate_set_state(&mis->state, MIGRATION_STATUS_COLO, > + MIGRATION_STATUS_COMPLETED); > + > + if (!autostart) { > + error_report("\"-S\" qemu option will be ignored in secondary side"); > + /* recover runstate to normal migration finish state */ > + autostart = true; > + }
You might find libvirt will need something different for it to be involved during the failover; but for now OK. > + old_state = failover_set_state(FAILOVER_STATUS_HANDLING, > + FAILOVER_STATUS_COMPLETED); > + if (old_state != FAILOVER_STATUS_HANDLING) { > + error_report("Serious error while do failover for secondary VM," > + "old_state: %d", old_state); Same suggestion as previous patch just to improve the error message. > + return; > + } > + /* For Secondary VM, jump to incoming co */ > + if (mis->migration_incoming_co) { > + qemu_coroutine_enter(mis->migration_incoming_co, NULL); > + } > +} > + > static void primary_vm_do_failover(void) > { > MigrationState *s = migrate_get_current(); > @@ -74,6 +101,8 @@ void colo_do_failover(MigrationState *s) > > if (get_colo_mode() == COLO_MODE_PRIMARY) { > primary_vm_do_failover(); > + } else { > + secondary_vm_do_failover(); > } > } > > @@ -404,6 +433,12 @@ void *colo_process_incoming_thread(void *opaque) > continue; > } > } > + > + if (failover_request_is_active()) { > + error_report("failover request"); > + goto out; > + } > + > /* FIXME: This is unnecessary for periodic checkpoint mode */ > ret = colo_ctl_put(mis->to_src_file, COLO_COMMAND_CHECKPOINT_REPLY, > 0); > if (ret < 0) { > @@ -473,10 +508,11 @@ out: > qemu_fclose(fb); > } > qsb_free(buffer); > - > - qemu_mutex_lock_iothread(); > + /* Here, we can ensure BH is hold the global lock, and will join colo > + * incoming thread, so here it is not necessary to lock here again, > + * or there will be a deadlock error. > + */ > colo_release_ram_cache(); > - qemu_mutex_unlock_iothread(); OK, I think I understand that - becuase we know there is a failover request active, then it must be holding the lock? Other than the error message improvement: Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com> Dave > > if (mis->to_src_file) { > qemu_fclose(mis->to_src_file); > -- > 1.8.3.1 > > -- Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK