Make sure master start block replication after slave's block replication started.
Signed-off-by: zhanghailiang <zhang.zhanghaili...@huawei.com> Signed-off-by: Wen Congyang <we...@cn.fujitsu.com> Signed-off-by: Li Zhijian <lizhij...@cn.fujitsu.com> --- migration/colo.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++ trace-events | 2 ++ 2 files changed, 54 insertions(+) diff --git a/migration/colo.c b/migration/colo.c index 23e1131..c08a3ff 100644 --- a/migration/colo.c +++ b/migration/colo.c @@ -20,6 +20,7 @@ #include "qapi-event.h" #include "net/filter.h" #include "net/net.h" +#include "block/block_int.h" static bool vmstate_loading; @@ -54,6 +55,7 @@ static void secondary_vm_do_failover(void) { int old_state; MigrationIncomingState *mis = migration_incoming_get_current(); + Error *local_err = NULL; /* Can not do failover during the process of VM's loading VMstate, Or * it will break the secondary VM. @@ -71,6 +73,12 @@ static void secondary_vm_do_failover(void) migrate_set_state(&mis->state, MIGRATION_STATUS_COLO, MIGRATION_STATUS_COMPLETED); + bdrv_stop_replication_all(true, &local_err); + if (local_err) { + error_report_err(local_err); + } + trace_colo_stop_block_replication("failover"); + if (!autostart) { error_report("\"-S\" qemu option will be ignored in secondary side"); /* recover runstate to normal migration finish state */ @@ -108,6 +116,7 @@ static void primary_vm_do_failover(void) { MigrationState *s = migrate_get_current(); int old_state; + Error *local_err = NULL; migrate_set_state(&s->state, MIGRATION_STATUS_COLO, MIGRATION_STATUS_COMPLETED); @@ -136,6 +145,12 @@ static void primary_vm_do_failover(void) /* Flush the residuary buffered packts */ qemu_release_default_filters_packets(); + bdrv_stop_replication_all(true, &local_err); + if (local_err) { + error_report_err(local_err); + } + trace_colo_stop_block_replication("failover"); + /* Notify COLO thread that failover work is finished */ qemu_sem_post(&s->colo_sem); } @@ -297,6 +312,14 @@ static int colo_do_checkpoint_transaction(MigrationState *s, goto out; } + /* we call this api although this may do nothing on primary side */ + qemu_mutex_lock_iothread(); + bdrv_do_checkpoint_all(&local_err); + qemu_mutex_unlock_iothread(); + if (local_err) { + goto out; + } + colo_put_cmd(s->to_dst_file, COLO_COMMAND_VMSTATE_SEND, &local_err); if (local_err) { goto out; @@ -345,6 +368,10 @@ static int colo_do_checkpoint_transaction(MigrationState *s, qemu_release_default_filters_packets(); if (colo_shutdown) { + qemu_mutex_lock_iothread(); + bdrv_stop_replication_all(false, NULL); + trace_colo_stop_block_replication("shutdown"); + qemu_mutex_unlock_iothread(); colo_put_cmd(s->to_dst_file, COLO_COMMAND_GUEST_SHUTDOWN, &local_err); if (local_err) { goto out; @@ -439,6 +466,13 @@ static void colo_process_checkpoint(MigrationState *s) } qemu_mutex_lock_iothread(); + /* start block replication */ + bdrv_start_replication_all(REPLICATION_MODE_PRIMARY, &local_err); + if (local_err) { + qemu_mutex_unlock_iothread(); + goto out; + } + trace_colo_start_block_replication(); vm_start(); qemu_mutex_unlock_iothread(); trace_colo_vm_state_change("stop", "run"); @@ -534,6 +568,8 @@ static void colo_wait_handle_cmd(QEMUFile *f, int *checkpoint_request, case COLO_COMMAND_GUEST_SHUTDOWN: qemu_mutex_lock_iothread(); vm_stop_force_state(RUN_STATE_COLO); + bdrv_stop_replication_all(false, NULL); + trace_colo_stop_block_replication("shutdown"); qemu_system_shutdown_request_core(); qemu_mutex_unlock_iothread(); /* the main thread will exit and terminate the whole @@ -603,6 +639,15 @@ void *colo_process_incoming_thread(void *opaque) goto out; } + qemu_mutex_lock_iothread(); + /* start block replication */ + bdrv_start_replication_all(REPLICATION_MODE_SECONDARY, &local_err); + qemu_mutex_unlock_iothread(); + if (local_err) { + goto out; + } + trace_colo_start_block_replication(); + colo_put_cmd(mis->to_src_file, COLO_COMMAND_CHECKPOINT_READY, &local_err); if (local_err) { @@ -680,6 +725,13 @@ void *colo_process_incoming_thread(void *opaque) qemu_mutex_unlock_iothread(); goto out; } + /* discard colo disk buffer */ + bdrv_do_checkpoint_all(&local_err); + if (local_err) { + vmstate_loading = false; + qemu_mutex_unlock_iothread(); + goto out; + } vmstate_loading = false; qemu_mutex_unlock_iothread(); diff --git a/trace-events b/trace-events index a50bf45..e04acde 100644 --- a/trace-events +++ b/trace-events @@ -1586,6 +1586,8 @@ colo_vm_state_change(const char *old, const char *new) "Change '%s' => '%s'" colo_put_cmd(const char *msg) "Send '%s' cmd" colo_get_cmd(const char *msg) "Receive '%s' cmd" colo_failover_set_state(int new_state) "new state %d" +colo_start_block_replication(void) "Block replication is started" +colo_stop_block_replication(const char *reason) "Block replication is stopped(reason: '%s')" # kvm-all.c kvm_ioctl(int type, void *arg) "type 0x%x, arg %p" -- 1.8.3.1