* zhanghailiang (zhang.zhanghaili...@huawei.com) wrote: > Signed-off-by: zhanghailiang <zhang.zhanghaili...@huawei.com> > Signed-off-by: Gao feng <gaof...@cn.fujitsu.com> > --- > include/net/colo-nic.h | 3 ++- > migration/colo.c | 22 ++++++++++++++++++---- > net/colo-nic.c | 19 +++++++++++++++++++ > 3 files changed, 39 insertions(+), 5 deletions(-) > > diff --git a/include/net/colo-nic.h b/include/net/colo-nic.h > index 67c9807..ddc21cd 100644 > --- a/include/net/colo-nic.h > +++ b/include/net/colo-nic.h > @@ -20,5 +20,6 @@ void colo_add_nic_devices(NetClientState *nc); > void colo_remove_nic_devices(NetClientState *nc); > > int colo_proxy_compare(void); > - > +int colo_proxy_failover(void); > +int colo_proxy_checkpoint(void); > #endif > diff --git a/migration/colo.c b/migration/colo.c > index 579aabf..874971c 100644 > --- a/migration/colo.c > +++ b/migration/colo.c > @@ -94,6 +94,11 @@ static void slave_do_failover(void) > ; > } > > + if (colo_proxy_failover() != 0) { > + error_report("colo proxy failed to do failover"); > + } > + colo_proxy_destroy(COLO_SECONDARY_MODE);
I'm not sure if this is the best thing to do on a secondary failover. If I understand correctly, when it's running, we have: -------+ | br0---eth0 | slave +-tun - xt_SECCOLO - br1---eth1 | -------+ what I think that colo-proxy-destroy is doing is rewiring that as: -------+ | +--------------br0---eth0 | | slave +-tun + br1---eth1 | -------+ but now we've lost the sequence number adjustment data that was held in xt_SECCOLO and so you are likely to break existing TCP connections. Also, I don't think colo-proxy-script is passed a flag to let it know whether the reason it's doing a slave_uninstall is due to a failover or a simple shutdown; and so it assumes it has to do the rewire for a failover. (Actually the script in the qemu repo is newer than the script in the colo-proxy repo, that one doesn't have the rewire at all). Dave > + > colo = NULL; > > if (!autostart) { > @@ -115,7 +120,7 @@ static void master_do_failover(void) > if (!colo_runstate_is_stopped()) { > vm_stop_force_state(RUN_STATE_COLO); > } > - > + colo_proxy_destroy(COLO_PRIMARY_MODE); > if (s->state != MIG_STATE_ERROR) { > migrate_set_state(s, MIG_STATE_COLO, MIG_STATE_COMPLETED); > } > @@ -245,6 +250,11 @@ static int do_colo_transaction(MigrationState *s, > QEMUFile *control) > > qemu_fflush(trans); > > + ret = colo_proxy_checkpoint(); > + if (ret < 0) { > + goto out; > + } > + > ret = colo_ctl_put(s->file, COLO_CHECKPOINT_SEND); > if (ret < 0) { > goto out; > @@ -387,8 +397,6 @@ out: > qemu_bh_schedule(s->cleanup_bh); > qemu_mutex_unlock_iothread(); > > - colo_proxy_destroy(COLO_PRIMARY_MODE); > - > return NULL; > } > > @@ -508,6 +516,12 @@ void *colo_process_incoming_checkpoints(void *opaque) > goto out; > } > > + ret = colo_proxy_checkpoint(); > + if (ret < 0) { > + goto out; > + } > + DPRINTF("proxy begin to do checkpoint\n"); > + > ret = colo_ctl_get(f, COLO_CHECKPOINT_SEND); > if (ret < 0) { > goto out; > @@ -584,6 +598,7 @@ out: > * just kill slave > */ > error_report("SVM is going to exit!"); > + colo_proxy_destroy(COLO_SECONDARY_MODE); > exit(1); > } else { > /* if we went here, means master may dead, we are doing failover */ > @@ -610,6 +625,5 @@ out: > > loadvm_exit_colo(); > > - colo_proxy_destroy(COLO_SECONDARY_MODE); > return NULL; > } > diff --git a/net/colo-nic.c b/net/colo-nic.c > index 563d661..02a454d 100644 > --- a/net/colo-nic.c > +++ b/net/colo-nic.c > @@ -379,6 +379,25 @@ void colo_proxy_destroy(int side) > cp_info.index = -1; > colo_nic_side = -1; > } > + > +int colo_proxy_failover(void) > +{ > + if (colo_proxy_send(NULL, 0, COLO_FAILOVER) < 0) { > + return -1; > + } > + > + return 0; > +} > + > +int colo_proxy_checkpoint(void) > +{ > + if (colo_proxy_send(NULL, 0, COLO_CHECKPOINT) < 0) { > + return -1; > + } > + > + return 0; > +} > + > /* > do checkpoint: return 1 > error: return -1 > -- > 1.7.12.4 > > -- Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK