Only do checkpoint, when the PVM's and SVM's output net packets are inconsistent, We also limit the min time between two continuous checkpoint action, to give VM a change to run.
Signed-off-by: zhanghailiang <zhang.zhanghaili...@huawei.com> Signed-off-by: Li Zhijian <lizhij...@cn.fujitsu.com> --- include/net/colo-nic.h | 2 ++ migration/colo.c | 32 ++++++++++++++++++++++++++++++++ net/colo-nic.c | 5 +++++ 3 files changed, 39 insertions(+) diff --git a/include/net/colo-nic.h b/include/net/colo-nic.h index 7b8ff57..271ab8b 100644 --- a/include/net/colo-nic.h +++ b/include/net/colo-nic.h @@ -30,4 +30,6 @@ void colo_remove_nic_devices(COLONicState *cns); int colo_proxy_init(enum COLOMode mode); void colo_proxy_destroy(enum COLOMode mode); +int colo_proxy_compare(void); + #endif diff --git a/migration/colo.c b/migration/colo.c index c286152..f9f2156 100644 --- a/migration/colo.c +++ b/migration/colo.c @@ -19,6 +19,13 @@ #include "qapi-event.h" #include "net/colo-nic.h" +/* +* We should not do checkpoint one after another without any time interval, +* Because this will lead continuous 'stop' status for VM. +* CHECKPOINT_MIN_PERIOD is the min time limit between two checkpoint action. +*/ +#define CHECKPOINT_MIN_PERIOD 100 /* unit: ms */ + /* Fix me: Convert to use QAPI */ typedef enum COLOCommand { COLO_CHECPOINT_READY = 0x46, @@ -341,6 +348,7 @@ static void *colo_thread(void *opaque) { MigrationState *s = opaque; QEMUFile *colo_control = NULL; + int64_t current_time, checkpoint_time = qemu_clock_get_ms(QEMU_CLOCK_HOST); int i, ret; failover_init_state(); @@ -376,15 +384,39 @@ static void *colo_thread(void *opaque) trace_colo_vm_state_change("stop", "run"); while (s->state == MIGRATION_STATUS_COLO) { + int proxy_checkpoint_req; + if (failover_request_is_active()) { error_report("failover request"); goto out; } + /* wait for a colo checkpoint */ + proxy_checkpoint_req = colo_proxy_compare(); + if (proxy_checkpoint_req < 0) { + goto out; + } else if (!proxy_checkpoint_req) { + /* + * No checkpoint is needed, wait for 1ms and then + * check if we need checkpoint again + */ + g_usleep(1000); + continue; + } else { + int64_t interval; + + current_time = qemu_clock_get_ms(QEMU_CLOCK_HOST); + interval = current_time - checkpoint_time; + if (interval < CHECKPOINT_MIN_PERIOD) { + /* Limit the min time between two checkpoint */ + g_usleep((1000*(CHECKPOINT_MIN_PERIOD - interval))); + } + } /* start a colo checkpoint */ if (colo_do_checkpoint_transaction(s, colo_control)) { goto out; } + checkpoint_time = qemu_clock_get_ms(QEMU_CLOCK_HOST); } out: diff --git a/net/colo-nic.c b/net/colo-nic.c index 49380ce..d2073b8 100644 --- a/net/colo-nic.c +++ b/net/colo-nic.c @@ -427,3 +427,8 @@ void colo_proxy_destroy(enum COLOMode mode) } teardown_nic(mode, getpid()); } + +int colo_proxy_compare(void) +{ + return atomic_xchg(&packet_compare_different, 0); +} -- 1.8.3.1