call the callbacks resume/checkpoint/suspend while secondary vm status is consistent with primary.
Signed-off-by: Yang Hongyang <yan...@cn.fujitsu.com> Signed-off-by: Wen Congyang <we...@cn.fujitsu.com> CC: Andrew Cooper <andrew.coop...@citrix.com> --- tools/libxc/xc_sr_common.h | 19 ++++++++++++-- tools/libxc/xc_sr_restore.c | 63 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+), 2 deletions(-) diff --git a/tools/libxc/xc_sr_common.h b/tools/libxc/xc_sr_common.h index 88ef135..229ba0a 100644 --- a/tools/libxc/xc_sr_common.h +++ b/tools/libxc/xc_sr_common.h @@ -132,8 +132,11 @@ struct xc_sr_restore_ops * * @return 0 for success, -1 for failure, or the sentinel value * RECORD_NOT_PROCESSED. + * BROKEN_CHANNEL: if we are under Remus/COLO, this means that the master + * may dead, we will failover. */ #define RECORD_NOT_PROCESSED 1 +#define BROKEN_CHANNEL 2 int (*process_record)(struct xc_sr_context *ctx, struct xc_sr_record *rec); /** @@ -164,6 +167,18 @@ struct xc_sr_context xc_dominfo_t dominfo; + /* + * migration stream + * 0: Plain VM + * 1: Remus + * 2: COLO + */ + enum { + MIG_STREAM_PLAIN, + MIG_STREAM_REMUS, + MIG_STREAM_COLO, + } migration_stream; + union /* Common save or restore data. */ { struct /* Save data. */ @@ -206,13 +221,13 @@ struct xc_sr_context uint32_t guest_page_size; /* Plain VM, or checkpoints over time. */ - bool checkpointed; + int checkpointed; /* Currently buffering records between a checkpoint */ bool buffer_all_records; /* - * With Remus, we buffer the records sent by the primary at checkpoint, + * With Remus/COLO, we buffer the records sent by the primary at checkpoint, * in case the primary will fail, we can recover from the last * checkpoint state. * This should be enough for most of the cases because primary only send diff --git a/tools/libxc/xc_sr_restore.c b/tools/libxc/xc_sr_restore.c index e6f00db..2ce207c 100644 --- a/tools/libxc/xc_sr_restore.c +++ b/tools/libxc/xc_sr_restore.c @@ -1,4 +1,5 @@ #include <arpa/inet.h> +#include <assert.h> #include <assert.h> @@ -446,6 +447,49 @@ static int handle_checkpoint(struct xc_sr_context *ctx) else ctx->restore.buffer_all_records = true; + if ( ctx->restore.checkpointed == MIG_STREAM_COLO ) + { +#define HANDLE_CALLBACK_RETURN_VALUE(ret) \ + do { \ + if ( ret == 1 ) \ + rc = 0; /* Success */ \ + else \ + { \ + if ( ret == 2 ) \ + rc = BROKEN_CHANNEL; \ + else \ + rc = -1; /* Some unspecified error */ \ + goto err; \ + } \ + } while (0) + + /* COLO */ + + /* We need to resume guest */ + rc = ctx->restore.ops.stream_complete(ctx); + if ( rc ) + goto err; + + /* TODO: call restore_results */ + + /* Resume secondary vm */ + ret = ctx->restore.callbacks->postcopy(ctx->restore.callbacks->data); + HANDLE_CALLBACK_RETURN_VALUE(ret); + + /* Wait for a new checkpoint */ + ret = ctx->restore.callbacks->should_checkpoint( + ctx->restore.callbacks->data); + HANDLE_CALLBACK_RETURN_VALUE(ret); + + /* suspend secondary vm */ + ret = ctx->restore.callbacks->suspend(ctx->restore.callbacks->data); + HANDLE_CALLBACK_RETURN_VALUE(ret); + +#undef HANDLE_CALLBACK_RETURN_VALUE + + /* TODO: send dirty bitmap to primary */ + } + err: return rc; } @@ -608,6 +652,8 @@ static int restore(struct xc_sr_context *ctx) goto err; } } + else if ( rc == BROKEN_CHANNEL ) + goto remus_failover; else if ( rc ) goto err; } @@ -615,6 +661,15 @@ static int restore(struct xc_sr_context *ctx) } while ( rec.type != REC_TYPE_END ); remus_failover: + + if ( ctx->restore.checkpointed == MIG_STREAM_COLO ) + { + /* With COLO, we have already called stream_complete */ + rc = 0; + IPRINTF("COLO Failover"); + goto done; + } + /* * With Remus, if we reach here, there must be some error on primary, * failover from the last checkpoint state. @@ -669,6 +724,14 @@ int xc_domain_restore2(xc_interface *xch, int io_fd, uint32_t dom, if (checkpointed_stream) assert(callbacks->checkpoint); + if ( ctx.restore.checkpointed == MIG_STREAM_COLO ) + { + /* this is COLO restore */ + assert(callbacks->suspend && + callbacks->postcopy && + callbacks->should_checkpoint); + } + IPRINTF("In experimental %s", __func__); DPRINTF("fd %d, dom %u, hvm %u, pae %u, superpages %d" ", checkpointed_stream %d", io_fd, dom, hvm, pae, -- 1.9.1 _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel