Make sure master start block replication after slave's block replication started.
Signed-off-by: zhanghailiang <zhang.zhanghaili...@huawei.com> Signed-off-by: Wen Congyang <we...@cn.fujitsu.com> Signed-off-by: Li Zhijian <lizhij...@cn.fujitsu.com> --- migration/colo.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ trace-events | 2 ++ 2 files changed, 62 insertions(+) diff --git a/migration/colo.c b/migration/colo.c index b1b7905..c534ff9 100644 --- a/migration/colo.c +++ b/migration/colo.c @@ -20,6 +20,7 @@ #include "migration/failover.h" #include "qapi-event.h" #include "net/filter.h" +#include "block/block_int.h" /* * The delay time before qemu begin the procedure of default failover treatment. @@ -62,6 +63,7 @@ static void secondary_vm_do_failover(void) { int old_state; MigrationIncomingState *mis = migration_incoming_get_current(); + Error *local_err = NULL; /* Can not do failover during the process of VM's loading VMstate, Or * it will break the secondary VM. @@ -79,6 +81,12 @@ static void secondary_vm_do_failover(void) migrate_set_state(&mis->state, MIGRATION_STATUS_COLO, MIGRATION_STATUS_COMPLETED); + bdrv_stop_replication_all(true, &local_err); + if (local_err) { + error_report_err(local_err); + } + trace_colo_stop_block_replication("failover"); + if (!autostart) { error_report("\"-S\" qemu option will be ignored in secondary side"); /* recover runstate to normal migration finish state */ @@ -110,6 +118,7 @@ static void primary_vm_do_failover(void) { MigrationState *s = migrate_get_current(); int old_state; + Error *local_err = NULL; if (s->state != MIGRATION_STATUS_FAILED) { migrate_set_state(&s->state, MIGRATION_STATUS_COLO, @@ -134,6 +143,12 @@ static void primary_vm_do_failover(void) } /* Don't buffer any packets while exited COLO */ qemu_set_default_filter_buffers(false); + + bdrv_stop_replication_all(true, &local_err); + if (local_err) { + error_report_err(local_err); + } + trace_colo_stop_block_replication("failover"); } void colo_do_failover(MigrationState *s) @@ -212,6 +227,7 @@ static int colo_do_checkpoint_transaction(MigrationState *s, int colo_shutdown; size_t size; QEMUFile *trans = NULL; + Error *local_err = NULL; ret = colo_ctl_put(s->to_dst_file, COLO_COMMAND_CHECKPOINT_REQUEST, 0); if (ret < 0) { @@ -250,6 +266,16 @@ static int colo_do_checkpoint_transaction(MigrationState *s, goto out; } + /* we call this api although this may do nothing on primary side */ + qemu_mutex_lock_iothread(); + bdrv_do_checkpoint_all(&local_err); + qemu_mutex_unlock_iothread(); + if (local_err) { + error_report_err(local_err); + ret = -1; + goto out; + } + ret = colo_ctl_put(s->to_dst_file, COLO_COMMAND_VMSTATE_SEND, 0); if (ret < 0) { goto out; @@ -296,6 +322,10 @@ static int colo_do_checkpoint_transaction(MigrationState *s, qemu_release_default_filters_packets(); if (colo_shutdown) { + qemu_mutex_lock_iothread(); + bdrv_stop_replication_all(false, NULL); + trace_colo_stop_block_replication("shutdown"); + qemu_mutex_unlock_iothread(); colo_ctl_put(s->to_dst_file, COLO_COMMAND_GUEST_SHUTDOWN, 0); qemu_fflush(s->to_dst_file); colo_shutdown_requested = 0; @@ -341,6 +371,7 @@ static void colo_process_checkpoint(MigrationState *s) int64_t error_time; int ret = 0; uint64_t value; + Error *local_err = NULL; failover_init_state(); @@ -376,6 +407,15 @@ static void colo_process_checkpoint(MigrationState *s) qemu_set_default_filter_buffers(true); qemu_mutex_lock_iothread(); + /* start block replication */ + bdrv_start_replication_all(REPLICATION_MODE_PRIMARY, &local_err); + if (local_err) { + qemu_mutex_unlock_iothread(); + error_report_err(local_err); + ret = -EINVAL; + goto out; + } + trace_colo_start_block_replication(); vm_start(); qemu_mutex_unlock_iothread(); trace_colo_vm_state_change("stop", "run"); @@ -492,6 +532,8 @@ static int colo_wait_handle_cmd(QEMUFile *f, int *checkpoint_request) case COLO_COMMAND_GUEST_SHUTDOWN: qemu_mutex_lock_iothread(); vm_stop_force_state(RUN_STATE_COLO); + bdrv_stop_replication_all(false, NULL); + trace_colo_stop_block_replication("shutdown"); qemu_system_shutdown_request_core(); qemu_mutex_unlock_iothread(); /* the main thread will exit and termiante the whole @@ -524,6 +566,7 @@ void *colo_process_incoming_thread(void *opaque) int64_t error_time, current_time; int ret = 0; uint64_t value; + Error *local_err = NULL; migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE, MIGRATION_STATUS_COLO); @@ -560,6 +603,16 @@ void *colo_process_incoming_thread(void *opaque) goto out; } + qemu_mutex_lock_iothread(); + /* start block replication */ + bdrv_start_replication_all(REPLICATION_MODE_SECONDARY, &local_err); + qemu_mutex_unlock_iothread(); + if (local_err) { + error_report_err(local_err); + goto out; + } + trace_colo_start_block_replication(); + ret = colo_ctl_put(mis->to_src_file, COLO_COMMAND_CHECKPOINT_READY, 0); if (ret < 0) { goto out; @@ -639,6 +692,13 @@ void *colo_process_incoming_thread(void *opaque) qemu_mutex_unlock_iothread(); goto out; } + /* discard colo disk buffer */ + bdrv_do_checkpoint_all(&local_err); + qemu_mutex_unlock_iothread(); + if (local_err) { + vmstate_loading = false; + goto out; + } vmstate_loading = false; qemu_mutex_unlock_iothread(); diff --git a/trace-events b/trace-events index b80c1e0..5f95b3c 100644 --- a/trace-events +++ b/trace-events @@ -1583,6 +1583,8 @@ colo_vm_state_change(const char *old, const char *new) "Change '%s' => '%s'" colo_ctl_put(const char *msg, uint64_t value) "Send '%s' cmd, value: %" PRIu64"" colo_ctl_get(const char *msg, uint64_t value) "Receive '%s' cmd, value: %" PRIu64"" colo_failover_set_state(int new_state) "new state %d" +colo_start_block_replication(void) "Block replication is started" +colo_stop_block_replication(const char *reason) "Block replication is stopped(reason: '%s')" # kvm-all.c kvm_ioctl(int type, void *arg) "type 0x%x, arg %p" -- 1.8.3.1