* zhanghailiang (zhang.zhanghaili...@huawei.com) wrote: > We leave users to choose whatever heartbeat solution they want, if the > heartbeat > is lost, or other errors they detect, they can use experimental command > 'x_colo_lost_heartbeat' to tell COLO to do failover, COLO will do operations > accordingly. > > For example, if the command is sent to the PVM, the Primary side will > exit COLO mode and take over operation. If sent to the Secondary, the > secondary will run failover work, then take over server operation to > become the new Primary. > > Cc: Luiz Capitulino <lcapitul...@redhat.com> > Cc: Eric Blake <ebl...@redhat.com> > Cc: Markus Armbruster <arm...@redhat.com> > Signed-off-by: zhanghailiang <zhang.zhanghaili...@huawei.com> > Signed-off-by: Li Zhijian <lizhij...@cn.fujitsu.com>
Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com> > --- > v11: > - Add more comments for x-colo-lost-heartbeat command (Eric's suggestion) > - Return 'enum' instead of 'int' for get_colo_mode() (Eric's suggestion) > v10: > - Rename command colo_lost_hearbeat to experimental 'x_colo_lost_heartbeat' > > Signed-off-by: zhanghailiang <zhang.zhanghaili...@huawei.com> > --- > hmp-commands.hx | 15 +++++++++++++++ > hmp.c | 8 ++++++++ > hmp.h | 1 + > include/migration/colo.h | 3 +++ > include/migration/failover.h | 20 ++++++++++++++++++++ > migration/Makefile.objs | 2 +- > migration/colo-comm.c | 11 +++++++++++ > migration/colo-failover.c | 41 +++++++++++++++++++++++++++++++++++++++++ > migration/colo.c | 1 + > qapi-schema.json | 29 +++++++++++++++++++++++++++++ > qmp-commands.hx | 19 +++++++++++++++++++ > stubs/migration-colo.c | 8 ++++++++ > 12 files changed, 157 insertions(+), 1 deletion(-) > create mode 100644 include/migration/failover.h > create mode 100644 migration/colo-failover.c > > diff --git a/hmp-commands.hx b/hmp-commands.hx > index bb52e4d..a381b0b 100644 > --- a/hmp-commands.hx > +++ b/hmp-commands.hx > @@ -1039,6 +1039,21 @@ migration (or once already in postcopy). > ETEXI > > { > + .name = "x_colo_lost_heartbeat", > + .args_type = "", > + .params = "", > + .help = "Tell COLO that heartbeat is lost,\n\t\t\t" > + "a failover or takeover is needed.", > + .mhandler.cmd = hmp_x_colo_lost_heartbeat, > + }, > + > +STEXI > +@item x_colo_lost_heartbeat > +@findex x_colo_lost_heartbeat > +Tell COLO that heartbeat is lost, a failover or takeover is needed. > +ETEXI > + > + { > .name = "client_migrate_info", > .args_type = > "protocol:s,hostname:s,port:i?,tls-port:i?,cert-subject:s?", > .params = "protocol hostname port tls-port cert-subject", > diff --git a/hmp.c b/hmp.c > index ee87d38..dc6dc30 100644 > --- a/hmp.c > +++ b/hmp.c > @@ -1310,6 +1310,14 @@ void hmp_migrate_start_postcopy(Monitor *mon, const > QDict *qdict) > hmp_handle_error(mon, &err); > } > > +void hmp_x_colo_lost_heartbeat(Monitor *mon, const QDict *qdict) > +{ > + Error *err = NULL; > + > + qmp_x_colo_lost_heartbeat(&err); > + hmp_handle_error(mon, &err); > +} > + > void hmp_set_password(Monitor *mon, const QDict *qdict) > { > const char *protocol = qdict_get_str(qdict, "protocol"); > diff --git a/hmp.h b/hmp.h > index a8c5b5a..864a300 100644 > --- a/hmp.h > +++ b/hmp.h > @@ -70,6 +70,7 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict > *qdict); > void hmp_migrate_set_cache_size(Monitor *mon, const QDict *qdict); > void hmp_client_migrate_info(Monitor *mon, const QDict *qdict); > void hmp_migrate_start_postcopy(Monitor *mon, const QDict *qdict); > +void hmp_x_colo_lost_heartbeat(Monitor *mon, const QDict *qdict); > void hmp_set_password(Monitor *mon, const QDict *qdict); > void hmp_expire_password(Monitor *mon, const QDict *qdict); > void hmp_eject(Monitor *mon, const QDict *qdict); > diff --git a/include/migration/colo.h b/include/migration/colo.h > index 2676c4a..ba27719 100644 > --- a/include/migration/colo.h > +++ b/include/migration/colo.h > @@ -17,6 +17,7 @@ > #include "migration/migration.h" > #include "qemu/coroutine_int.h" > #include "qemu/thread.h" > +#include "qemu/main-loop.h" > > bool colo_supported(void); > void colo_info_mig_init(void); > @@ -29,4 +30,6 @@ bool migration_incoming_enable_colo(void); > void migration_incoming_exit_colo(void); > void *colo_process_incoming_thread(void *opaque); > bool migration_incoming_in_colo_state(void); > + > +COLOMode get_colo_mode(void); > #endif > diff --git a/include/migration/failover.h b/include/migration/failover.h > new file mode 100644 > index 0000000..1785b52 > --- /dev/null > +++ b/include/migration/failover.h > @@ -0,0 +1,20 @@ > +/* > + * COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO) > + * (a.k.a. Fault Tolerance or Continuous Replication) > + * > + * Copyright (c) 2015 HUAWEI TECHNOLOGIES CO.,LTD. > + * Copyright (c) 2015 FUJITSU LIMITED > + * Copyright (c) 2015 Intel Corporation > + * > + * This work is licensed under the terms of the GNU GPL, version 2 or > + * later. See the COPYING file in the top-level directory. > + */ > + > +#ifndef QEMU_FAILOVER_H > +#define QEMU_FAILOVER_H > + > +#include "qemu-common.h" > + > +void failover_request_active(Error **errp); > + > +#endif > diff --git a/migration/Makefile.objs b/migration/Makefile.objs > index 81b5713..920d1e7 100644 > --- a/migration/Makefile.objs > +++ b/migration/Makefile.objs > @@ -1,6 +1,6 @@ > common-obj-y += migration.o tcp.o > -common-obj-$(CONFIG_COLO) += colo.o > common-obj-y += colo-comm.o > +common-obj-$(CONFIG_COLO) += colo.o colo-failover.o > common-obj-y += vmstate.o > common-obj-y += qemu-file.o qemu-file-buf.o qemu-file-unix.o > qemu-file-stdio.o > common-obj-y += xbzrle.o postcopy-ram.o > diff --git a/migration/colo-comm.c b/migration/colo-comm.c > index 30df3d3..58a6488 100644 > --- a/migration/colo-comm.c > +++ b/migration/colo-comm.c > @@ -20,6 +20,17 @@ typedef struct { > > static COLOInfo colo_info; > > +COLOMode get_colo_mode(void) > +{ > + if (migration_in_colo_state()) { > + return COLO_MODE_PRIMARY; > + } else if (migration_incoming_in_colo_state()) { > + return COLO_MODE_SECONDARY; > + } else { > + return COLO_MODE_UNKNOWN; > + } > +} > + > static void colo_info_pre_save(void *opaque) > { > COLOInfo *s = opaque; > diff --git a/migration/colo-failover.c b/migration/colo-failover.c > new file mode 100644 > index 0000000..e3897c6 > --- /dev/null > +++ b/migration/colo-failover.c > @@ -0,0 +1,41 @@ > +/* > + * COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO) > + * (a.k.a. Fault Tolerance or Continuous Replication) > + * > + * Copyright (c) 2015 HUAWEI TECHNOLOGIES CO., LTD. > + * Copyright (c) 2015 FUJITSU LIMITED > + * Copyright (c) 2015 Intel Corporation > + * > + * This work is licensed under the terms of the GNU GPL, version 2 or > + * later. See the COPYING file in the top-level directory. > + */ > + > +#include "migration/colo.h" > +#include "migration/failover.h" > +#include "qmp-commands.h" > +#include "qapi/qmp/qerror.h" > + > +static QEMUBH *failover_bh; > + > +static void colo_failover_bh(void *opaque) > +{ > + qemu_bh_delete(failover_bh); > + failover_bh = NULL; > + /*TODO: Do failover work */ > +} > + > +void failover_request_active(Error **errp) > +{ > + failover_bh = qemu_bh_new(colo_failover_bh, NULL); > + qemu_bh_schedule(failover_bh); > +} > + > +void qmp_x_colo_lost_heartbeat(Error **errp) > +{ > + if (get_colo_mode() == COLO_MODE_UNKNOWN) { > + error_setg(errp, QERR_FEATURE_DISABLED, "colo"); > + return; > + } > + > + failover_request_active(errp); > +} > diff --git a/migration/colo.c b/migration/colo.c > index ca5df44..7098497 100644 > --- a/migration/colo.c > +++ b/migration/colo.c > @@ -17,6 +17,7 @@ > #include "trace.h" > #include "qemu/error-report.h" > #include "qemu/sockets.h" > +#include "migration/failover.h" > > /* colo buffer */ > #define COLO_BUFFER_BASE_SIZE (4 * 1024 * 1024) > diff --git a/qapi-schema.json b/qapi-schema.json > index a5699a7..feb7d53 100644 > --- a/qapi-schema.json > +++ b/qapi-schema.json > @@ -761,6 +761,35 @@ > 'vmstate-send', 'vmstate-size','vmstate-received', > 'vmstate-loaded' ] } > > +## > +# @COLOMode > +# > +# The colo mode > +# > +# @unknown: unknown mode > +# > +# @primary: master side > +# > +# @secondary: slave side > +# > +# Since: 2.6 > +## > +{ 'enum': 'COLOMode', > + 'data': [ 'unknown', 'primary', 'secondary'] } > + > +## > +# @x-colo-lost-heartbeat > +# > +# Tell qemu that heartbeat is lost, request it to do takeover procedures. > +# If this command is sent to the PVM, the Primary side will exit COLO mode. > +# If sent to the Secondary, the Secondary side will run failover work, > +# then takes over server operation to become the service VM. > +# > +# Since: 2.6 > +## > +{ 'command': 'x-colo-lost-heartbeat' } > + > +## > # @MouseInfo: > # > # Information about a mouse device. > diff --git a/qmp-commands.hx b/qmp-commands.hx > index 89756c9..76ad208 100644 > --- a/qmp-commands.hx > +++ b/qmp-commands.hx > @@ -805,6 +805,25 @@ Example: > EQMP > > { > + .name = "x-colo-lost-heartbeat", > + .args_type = "", > + .mhandler.cmd_new = qmp_marshal_x_colo_lost_heartbeat, > + }, > + > +SQMP > +x-colo-lost-heartbeat > +-------------------- > + > +Tell COLO that heartbeat is lost, a failover or takeover is needed. > + > +Example: > + > +-> { "execute": "x-colo-lost-heartbeat" } > +<- { "return": {} } > + > +EQMP > + > + { > .name = "client_migrate_info", > .args_type = > "protocol:s,hostname:s,port:i?,tls-port:i?,cert-subject:s?", > .params = "protocol hostname port tls-port cert-subject", > diff --git a/stubs/migration-colo.c b/stubs/migration-colo.c > index c12516e..5028f63 100644 > --- a/stubs/migration-colo.c > +++ b/stubs/migration-colo.c > @@ -11,6 +11,7 @@ > */ > > #include "migration/colo.h" > +#include "qmp-commands.h" > > bool colo_supported(void) > { > @@ -35,3 +36,10 @@ void *colo_process_incoming_thread(void *opaque) > { > return NULL; > } > + > +void qmp_x_colo_lost_heartbeat(Error **errp) > +{ > + error_setg(errp, "COLO is not supported, please rerun configure" > + " with --enable-colo option in order to support" > + " COLO feature"); > +} > -- > 1.8.3.1 > > -- Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK