* Zhang Chen (zhangc...@gmail.com) wrote: > For COLO FT, both the PVM and SVM run at the same time, > only sync the state while it needs. > > So here, let SVM runs while not doing checkpoint, change > DEFAULT_MIGRATE_X_CHECKPOINT_DELAY to 200*100. > > Besides, we forgot to release colo_checkpoint_semd and > colo_delay_timer, fix them here. > > Signed-off-by: zhanghailiang <zhang.zhanghaili...@huawei.com> > Signed-off-by: Zhang Chen <zhangc...@gmail.com> > Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com> > --- > migration/colo.c | 42 ++++++++++++++++++++++++++++++++++++++++-- > migration/migration.c | 4 ++-- > 2 files changed, 42 insertions(+), 4 deletions(-) > > diff --git a/migration/colo.c b/migration/colo.c > index 4381067ed4..081df1835f 100644 > --- a/migration/colo.c > +++ b/migration/colo.c > @@ -25,8 +25,11 @@ > #include "qemu/error-report.h" > #include "migration/failover.h" > #include "replication.h" > +#include "net/colo-compare.h" > +#include "net/colo.h" > > static bool vmstate_loading; > +static Notifier packets_compare_notifier; > > #define COLO_BUFFER_BASE_SIZE (4 * 1024 * 1024) > > @@ -343,6 +346,11 @@ static int colo_do_checkpoint_transaction(MigrationState > *s, > goto out; > } > > + colo_notify_compares_event(NULL, COLO_EVENT_CHECKPOINT, &local_err); > + if (local_err) { > + goto out; > + } > + > /* Disable block migration */ > migrate_set_block_enabled(false, &local_err); > qemu_savevm_state_header(fb); > @@ -400,6 +408,11 @@ out: > return ret; > } > > +static void colo_compare_notify_checkpoint(Notifier *notifier, void *data) > +{ > + colo_checkpoint_notify(data); > +} > + > static void colo_process_checkpoint(MigrationState *s) > { > QIOChannelBuffer *bioc; > @@ -416,6 +429,9 @@ static void colo_process_checkpoint(MigrationState *s) > goto out; > } > > + packets_compare_notifier.notify = colo_compare_notify_checkpoint; > + colo_compare_register_notifier(&packets_compare_notifier); > + > /* > * Wait for Secondary finish loading VM states and enter COLO > * restore. > @@ -461,11 +477,21 @@ out: > qemu_fclose(fb); > } > > - timer_del(s->colo_delay_timer); > - > /* Hope this not to be too long to wait here */ > qemu_sem_wait(&s->colo_exit_sem); > qemu_sem_destroy(&s->colo_exit_sem); > + > + /* > + * It is safe to unregister notifier after failover finished. > + * Besides, colo_delay_timer and colo_checkpoint_sem can't be > + * released befor unregister notifier, or there will be use-after-free > + * error. > + */ > + colo_compare_unregister_notifier(&packets_compare_notifier); > + timer_del(s->colo_delay_timer); > + timer_free(s->colo_delay_timer); > + qemu_sem_destroy(&s->colo_checkpoint_sem); > + > /* > * Must be called after failover BH is completed, > * Or the failover BH may shutdown the wrong fd that > @@ -558,6 +584,11 @@ void *colo_process_incoming_thread(void *opaque) > fb = qemu_fopen_channel_input(QIO_CHANNEL(bioc)); > object_unref(OBJECT(bioc)); > > + qemu_mutex_lock_iothread(); > + vm_start(); > + trace_colo_vm_state_change("stop", "run"); > + qemu_mutex_unlock_iothread(); > + > colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_READY, > &local_err); > if (local_err) { > @@ -577,6 +608,11 @@ void *colo_process_incoming_thread(void *opaque) > goto out; > } > > + qemu_mutex_lock_iothread(); > + vm_stop_force_state(RUN_STATE_COLO); > + trace_colo_vm_state_change("run", "stop"); > + qemu_mutex_unlock_iothread(); > + > /* FIXME: This is unnecessary for periodic checkpoint mode */ > colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_REPLY, > &local_err); > @@ -630,6 +666,8 @@ void *colo_process_incoming_thread(void *opaque) > } > > vmstate_loading = false; > + vm_start(); > + trace_colo_vm_state_change("stop", "run"); > qemu_mutex_unlock_iothread(); > > if (failover_get_state() == FAILOVER_STATUS_RELAUNCH) { > diff --git a/migration/migration.c b/migration/migration.c > index 35f2781b03..bca187275a 100644 > --- a/migration/migration.c > +++ b/migration/migration.c > @@ -76,9 +76,9 @@ > #define DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE (64 * 1024 * 1024) > > /* The delay time (in ms) between two COLO checkpoints > - * Note: Please change this default value to 10000 when we support hybrid > mode. > + * Note: Please change this default value to 20000 when we support hybrid > mode.
You can remove that comment now? Dave > */ > -#define DEFAULT_MIGRATE_X_CHECKPOINT_DELAY 200 > +#define DEFAULT_MIGRATE_X_CHECKPOINT_DELAY (200 * 100) > #define DEFAULT_MIGRATE_MULTIFD_CHANNELS 2 > #define DEFAULT_MIGRATE_MULTIFD_PAGE_COUNT 16 > > -- > 2.17.0 > -- Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK