Use qemu block replication as our block replication solution. Note that guest must be paused before starting COLO, otherwise, the disk won't be consistent between primary and secondary.
Signed-off-by: Wen Congyang <we...@cn.fujitsu.com> for commit message, Signed-off-by: Yang Hongyang <hongyang.y...@easystack.cn> --- tools/libxl/Makefile | 1 + tools/libxl/libxl_colo_qdisk.c | 262 +++++++++++++++++++++++++++++++++++++++ tools/libxl/libxl_colo_restore.c | 31 ++++- tools/libxl/libxl_colo_save.c | 45 ++++++- tools/libxl/libxl_internal.h | 34 +++++ tools/libxl/libxl_qmp.c | 93 ++++++++++++++ 6 files changed, 462 insertions(+), 4 deletions(-) create mode 100644 tools/libxl/libxl_colo_qdisk.c diff --git a/tools/libxl/Makefile b/tools/libxl/Makefile index b11cf34..a4156c1 100644 --- a/tools/libxl/Makefile +++ b/tools/libxl/Makefile @@ -64,6 +64,7 @@ endif LIBXL_OBJS-y += libxl_remus.o libxl_checkpoint_device.o libxl_remus_disk_drbd.o LIBXL_OBJS-y += libxl_colo_restore.o libxl_colo_save.o +LIBXL_OBJS-y += libxl_colo_qdisk.o LIBXL_OBJS-$(CONFIG_X86) += libxl_cpuid.o libxl_x86.o libxl_psr.o LIBXL_OBJS-$(CONFIG_ARM) += libxl_nocpuid.o libxl_arm.o libxl_libfdt_compat.o diff --git a/tools/libxl/libxl_colo_qdisk.c b/tools/libxl/libxl_colo_qdisk.c new file mode 100644 index 0000000..d5de278 --- /dev/null +++ b/tools/libxl/libxl_colo_qdisk.c @@ -0,0 +1,262 @@ +/* + * Copyright (C) 2015 FUJITSU LIMITED + * Author: Wen Congyang <we...@cn.fujitsu.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + */ + +#include "libxl_osdeps.h" /* must come before any other headers */ + +#include "libxl_internal.h" + +typedef struct libxl__colo_qdisk { + bool setuped; +} libxl__colo_qdisk; + +/* ========== init() and cleanup() ========== */ +int init_subkind_qdisk(libxl__checkpoint_devices_state *cds) +{ + /* + * We don't know if we use qemu block replication, so + * we cannot start block replication here. + */ + return 0; +} + +void cleanup_subkind_qdisk(libxl__checkpoint_devices_state *cds) +{ +} + +/* ========== setup() and teardown() ========== */ +static void colo_qdisk_setup(libxl__egc *egc, libxl__checkpoint_device *dev, + bool primary) +{ + const libxl_device_disk *disk = dev->backend_dev; + int ret, rc = 0; + libxl__colo_qdisk *colo_qdisk = NULL; + + /* Convenience aliases */ + libxl__checkpoint_devices_state *const cds = dev->cds; + const char *host = disk->colo_host; + const char *port = disk->colo_port; + const char *export_name = disk->colo_export; + const int domid = cds->domid; + + STATE_AO_GC(dev->cds->ao); + + if (disk->backend != LIBXL_DISK_BACKEND_QDISK || + !libxl_defbool_val(disk->colo_enable)) { + rc = ERROR_CHECKPOINT_DEVOPS_DOES_NOT_MATCH; + goto out; + } + + dev->matched = true; + + GCNEW(colo_qdisk); + dev->concrete_data = colo_qdisk; + + if (primary) { + libxl__colo_save_state *css = cds->concrete_data; + + css->qdisk_used = true; + /* NBD server is not ready, so we cannot start block replication now */ + goto out; + } else { + libxl__colo_restore_state *crs = cds->concrete_data; + + if (!crs->qdisk_used) { + /* start nbd server */ + ret = libxl__qmp_nbd_server_start(gc, domid, host, port); + if (ret) { + rc = ERROR_FAIL; + goto out; + } + crs->host = host; + crs->port = port; + } else { + if (strcmp(crs->host, host) || strcmp(crs->port, port)) { + LOG(ERROR, "The host and port of all disks must be the same"); + rc = ERROR_FAIL; + goto out; + } + } + + crs->qdisk_used = true; + + ret = libxl__qmp_nbd_server_add(gc, domid, export_name); + if (ret) + rc = ERROR_FAIL; + + colo_qdisk->setuped = true; + } + +out: + dev->aodev.rc = rc; + dev->aodev.callback(egc, &dev->aodev); +} + +static void colo_qdisk_teardown(libxl__egc *egc, libxl__checkpoint_device *dev, + bool primary) +{ + int ret, rc = 0; + const libxl__colo_qdisk *colo_qdisk = dev->concrete_data; + const libxl_device_disk *disk = dev->backend_dev; + + /* Convenience aliases */ + libxl__checkpoint_devices_state *const cds = dev->cds; + const int domid = cds->domid; + const char *export_name = disk->colo_export; + + EGC_GC; + + if (primary) { + libxl__colo_save_state *css = cds->concrete_data; + + if (css->qdisk_setuped) { + css->qdisk_setuped = false; + ret = libxl__qmp_block_stop_replication(gc, domid, false); + if (ret) + rc = ERROR_FAIL; + } + + if (!colo_qdisk->setuped) + goto out; + + /* + * There is no way to get the child name, but we know it is children.1 + */ + ret = libxl__qmp_x_blockdev_change(gc, domid, export_name, + "children.1", NULL); + if (ret) + rc = ERROR_FAIL; + } else { + libxl__colo_restore_state *crs = cds->concrete_data; + + if (crs->qdisk_setuped) { + crs->qdisk_setuped = false; + + ret = libxl__qmp_block_stop_replication(gc, domid, false); + if (ret) + rc = ERROR_FAIL; + } + + if (crs->qdisk_used) { + ret = libxl__qmp_nbd_server_stop(gc, domid); + if (ret) + rc = ERROR_FAIL; + } + } + +out: + dev->aodev.rc = rc; + dev->aodev.callback(egc, &dev->aodev); +} + +/* ========== checkpointing APIs ========== */ +/* should be called after libxl__checkpoint_device_instance_ops.preresume */ +int colo_qdisk_preresume(libxl_ctx *ctx, domid_t domid) +{ + GC_INIT(ctx); + int ret; + + ret = libxl__qmp_block_do_checkpoint(gc, domid); + + GC_FREE; + return ret; +} + +int colo_qdisk_start(libxl__egc *egc, domid_t domid, bool primary) +{ + EGC_GC; + + return libxl__qmp_block_start_replication(gc, domid, primary); +} + +static void colo_qdisk_save_preresume(libxl__egc *egc, + libxl__checkpoint_device *dev) +{ + libxl__colo_qdisk *colo_qdisk = dev->concrete_data; + const libxl_device_disk *disk = dev->backend_dev; + int ret, rc = 0; + char *node = NULL; + char *cmd = NULL; + + /* Convenience aliases */ + const int domid = dev->cds->domid; + const char *host = disk->colo_host; + const char *port = disk->colo_port; + const char *export_name = disk->colo_export; + + EGC_GC; + + if (colo_qdisk->setuped) + goto out; + + /* qmp command doesn't support the driver "nbd" */ + node = GCSPRINTF("colo_node%d", + libxl__device_disk_dev_number(disk->vdev, NULL, NULL)); + cmd = GCSPRINTF("drive_add buddy driver=replication,mode=primary," + "file.driver=nbd,file.host=%s,file.port=%s," + "file.export=%s,node-name=%s,if=none", + host, port, export_name, node); + ret = libxl__qmp_hmp(gc, domid, cmd); + if (ret) + rc = ERROR_FAIL; + + ret = libxl__qmp_x_blockdev_change(gc, domid, export_name, NULL, node); + if (ret) + rc = ERROR_FAIL; + + colo_qdisk->setuped = true; + +out: + dev->aodev.rc = rc; + dev->aodev.callback(egc, &dev->aodev); +} + +/* ======== primary ======== */ +static void colo_qdisk_save_setup(libxl__egc *egc, + libxl__checkpoint_device *dev) +{ + colo_qdisk_setup(egc, dev, true); +} + +static void colo_qdisk_save_teardown(libxl__egc *egc, + libxl__checkpoint_device *dev) +{ + colo_qdisk_teardown(egc, dev, true); +} + +const libxl__checkpoint_device_instance_ops colo_save_device_qdisk = { + .kind = LIBXL__DEVICE_KIND_VBD, + .setup = colo_qdisk_save_setup, + .teardown = colo_qdisk_save_teardown, + .preresume = colo_qdisk_save_preresume, +}; + +/* ======== secondary ======== */ +static void colo_qdisk_restore_setup(libxl__egc *egc, + libxl__checkpoint_device *dev) +{ + colo_qdisk_setup(egc, dev, false); +} + +static void colo_qdisk_restore_teardown(libxl__egc *egc, + libxl__checkpoint_device *dev) +{ + colo_qdisk_teardown(egc, dev, false); +} + +const libxl__checkpoint_device_instance_ops colo_restore_device_qdisk = { + .kind = LIBXL__DEVICE_KIND_VBD, + .setup = colo_qdisk_restore_setup, + .teardown = colo_qdisk_restore_teardown, +}; diff --git a/tools/libxl/libxl_colo_restore.c b/tools/libxl/libxl_colo_restore.c index f23ef8f..e5cfbe5 100644 --- a/tools/libxl/libxl_colo_restore.c +++ b/tools/libxl/libxl_colo_restore.c @@ -50,7 +50,10 @@ static void libxl__colo_restore_domain_checkpoint_callback(void *data); static void libxl__colo_restore_domain_should_checkpoint_callback(void *data); static void libxl__colo_restore_domain_suspend_callback(void *data); +extern const libxl__checkpoint_device_instance_ops colo_restore_device_qdisk; + static const libxl__checkpoint_device_instance_ops *colo_restore_ops[] = { + &colo_restore_device_qdisk, NULL, }; @@ -150,7 +153,11 @@ static int init_device_subkind(libxl__checkpoint_devices_state *cds) int rc; STATE_AO_GC(cds->ao); + rc = init_subkind_qdisk(cds); + if (rc) goto out; + rc = 0; +out: return rc; } @@ -158,6 +165,8 @@ static void cleanup_device_subkind(libxl__checkpoint_devices_state *cds) { /* cleanup device subkind-specific state in the libxl ctx */ STATE_AO_GC(cds->ao); + + cleanup_subkind_qdisk(cds); } @@ -217,6 +226,8 @@ void libxl__colo_restore_setup(libxl__egc *egc, GCNEW(crcs); crs->crcs = crcs; crcs->crs = crs; + crs->qdisk_setuped = false; + crs->qdisk_used = false; /* setup dsps */ crcs->dsps.ao = ao; @@ -585,6 +596,22 @@ static void colo_restore_preresume_cb(libxl__egc *egc, goto out; } + if (crs->qdisk_used && !crs->qdisk_setuped) { + if (colo_qdisk_start(egc, crs->domid, false)) { + LOG(ERROR, "starting block replication fails"); + goto out; + } + crs->qdisk_setuped = true; + } + + if (crs->qdisk_setuped) { + rc = colo_qdisk_preresume(CTX, crs->domid); + if (rc) { + LOG(ERROR, "colo_qdisk_preresume() fails"); + goto out; + } + } + colo_restore_resume_vm(egc, crcs); return; @@ -742,8 +769,8 @@ static void colo_setup_checkpoint_devices(libxl__egc *egc, STATE_AO_GC(crs->ao); - /* TODO: disk/nic support */ - cds->device_kind_flags = 0; + /* TODO: nic support */ + cds->device_kind_flags = (1 << LIBXL__DEVICE_KIND_VBD); cds->callback = colo_restore_setup_cds_done; cds->ao = ao; cds->domid = crs->domid; diff --git a/tools/libxl/libxl_colo_save.c b/tools/libxl/libxl_colo_save.c index d6b4e7b..78fcc60 100644 --- a/tools/libxl/libxl_colo_save.c +++ b/tools/libxl/libxl_colo_save.c @@ -19,7 +19,10 @@ #include "libxl_internal.h" #include "libxl_colo.h" +extern const libxl__checkpoint_device_instance_ops colo_save_device_qdisk; + static const libxl__checkpoint_device_instance_ops *colo_ops[] = { + &colo_save_device_qdisk, NULL, }; @@ -30,7 +33,11 @@ static int init_device_subkind(libxl__checkpoint_devices_state *cds) int rc; STATE_AO_GC(cds->ao); + rc = init_subkind_qdisk(cds); + if (rc) goto out; + rc = 0; +out: return rc; } @@ -38,6 +45,8 @@ static void cleanup_device_subkind(libxl__checkpoint_devices_state *cds) { /* cleanup device subkind-specific state in the libxl ctx */ STATE_AO_GC(cds->ao); + + cleanup_subkind_qdisk(cds); } /* ================= colo: setup save environment ================= */ @@ -65,9 +74,12 @@ void libxl__colo_save_setup(libxl__egc *egc, libxl__colo_save_state *css) css->send_fd = dss->fd; css->recv_fd = dss->recv_fd; css->svm_running = false; + css->paused = true; + css->qdisk_setuped = false; + css->qdisk_used = false; - /* TODO: disk/nic support */ - cds->device_kind_flags = 0; + /* TODO: nic support */ + cds->device_kind_flags = (1 << LIBXL__DEVICE_KIND_VBD); cds->ops = colo_ops; cds->callback = colo_save_setup_done; cds->ao = ao; @@ -373,12 +385,41 @@ static void colo_preresume_cb(libxl__egc *egc, goto out; } + if (css->qdisk_used && !css->qdisk_setuped) { + if (colo_qdisk_start(egc, dss->domid, true)) { + LOG(ERROR, "starting block replication fails"); + goto out; + } + css->qdisk_setuped = true; + } + + if (!css->paused) { + rc = colo_qdisk_preresume(CTX, dss->domid); + if (rc) { + LOG(ERROR, "colo_qdisk_preresume() fails"); + goto out; + } + } + /* Resumes the domain and the device model */ if (libxl__domain_resume(gc, dss->domid, /* Fast Suspend */1)) { LOG(ERROR, "cannot resume primary vm"); goto out; } + /* + * The guest should be paused before doing colo because there is + * no disk migration. + */ + if (css->paused) { + rc = libxl_domain_unpause(CTX, dss->domid); + if (rc) { + LOG(ERROR, "cannot unpause primary vm"); + goto out; + } + css->paused = false; + } + /* read CHECKPOINT_SVM_RESUMED */ css->callback = colo_read_svm_resumed_done; css->srs.checkpoint_callback = colo_common_read_stream_done; diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h index 54903af..ddf4980 100644 --- a/tools/libxl/libxl_internal.h +++ b/tools/libxl/libxl_internal.h @@ -1767,6 +1767,25 @@ _hidden int libxl__qmp_set_global_dirty_log(libxl__gc *gc, int domid, bool enabl _hidden int libxl__qmp_insert_cdrom(libxl__gc *gc, int domid, const libxl_device_disk *disk); /* Add a virtual CPU */ _hidden int libxl__qmp_cpu_add(libxl__gc *gc, int domid, int index); +/* Start NBD server */ +_hidden int libxl__qmp_nbd_server_start(libxl__gc *gc, int domid, + const char *host, const char *port); +/* Add a disk to NBD server */ +_hidden int libxl__qmp_nbd_server_add(libxl__gc *gc, int domid, const char *disk); +/* Start block replication */ +_hidden int libxl__qmp_block_start_replication(libxl__gc *gc, int domid, bool primary); +/* Do block checkpoint */ +_hidden int libxl__qmp_block_do_checkpoint(libxl__gc *gc, int domid); +/* Stop block replication */ +_hidden int libxl__qmp_block_stop_replication(libxl__gc *gc, int domid, + bool primary); +/* Stop NBD server */ +_hidden int libxl__qmp_nbd_server_stop(libxl__gc *gc, int domid); +/* Add or remove a child to/from quorum */ +_hidden int libxl__qmp_x_blockdev_change(libxl__gc *gc, int domid, const char *parant, + const char *child, const char *node); +/* run a hmp command in qmp mode */ +_hidden int libxl__qmp_hmp(libxl__gc *gc, int domid, const char *command_line); /* close and free the QMP handler */ _hidden void libxl__qmp_close(libxl__qmp_handler *qmp); /* remove the socket file, if the file has already been removed, @@ -2878,6 +2897,10 @@ int init_subkind_nic(libxl__checkpoint_devices_state *cds); void cleanup_subkind_nic(libxl__checkpoint_devices_state *cds); int init_subkind_drbd_disk(libxl__checkpoint_devices_state *cds); void cleanup_subkind_drbd_disk(libxl__checkpoint_devices_state *cds); +int init_subkind_qdisk(libxl__checkpoint_devices_state *cds); +void cleanup_subkind_qdisk(libxl__checkpoint_devices_state *cds); +int colo_qdisk_preresume(libxl_ctx *ctx, domid_t domid); +int colo_qdisk_start(libxl__egc *egc, domid_t domid, bool primary); typedef void libxl__checkpoint_callback(libxl__egc *, libxl__checkpoint_devices_state *, @@ -3095,6 +3118,11 @@ struct libxl__colo_save_state { libxl__stream_read_state srs; void (*callback)(libxl__egc *, libxl__colo_save_state *, int); bool svm_running; + bool paused; + + /* private, used by qdisk block replication */ + bool qdisk_used; + bool qdisk_setuped; }; /*----- Domain suspend (save) state structure -----*/ @@ -3500,6 +3528,12 @@ struct libxl__colo_restore_state { /* private, colo restore checkpoint state */ libxl__domain_create_cb *saved_cb; void *crcs; + + /* private, used by qdisk block replication */ + bool qdisk_used; + bool qdisk_setuped; + const char *host; + const char *port; }; struct libxl__domain_create_state { diff --git a/tools/libxl/libxl_qmp.c b/tools/libxl/libxl_qmp.c index eec8a44..d5a8d7f 100644 --- a/tools/libxl/libxl_qmp.c +++ b/tools/libxl/libxl_qmp.c @@ -978,6 +978,99 @@ int libxl__qmp_cpu_add(libxl__gc *gc, int domid, int idx) return qmp_run_command(gc, domid, "cpu-add", args, NULL, NULL); } +int libxl__qmp_nbd_server_start(libxl__gc *gc, int domid, + const char *host, const char *port) +{ + libxl__json_object *args = NULL; + libxl__json_object *addr = NULL; + libxl__json_object *data = NULL; + + /* 'addr': { + * 'type': 'inet', + * 'data': { + * 'host': '$nbd_host', + * 'port': '$nbd_port' + * } + * } + */ + qmp_parameters_add_string(gc, &data, "host", host); + qmp_parameters_add_string(gc, &data, "port", port); + + qmp_parameters_add_string(gc, &addr, "type", "inet"); + qmp_parameters_common_add(gc, &addr, "data", data); + + qmp_parameters_common_add(gc, &args, "addr", addr); + + return qmp_run_command(gc, domid, "nbd-server-start", args, NULL, NULL); +} + +int libxl__qmp_nbd_server_add(libxl__gc *gc, int domid, const char *disk) +{ + libxl__json_object *args = NULL; + + qmp_parameters_add_string(gc, &args, "device", disk); + qmp_parameters_add_bool(gc, &args, "writable", true); + + return qmp_run_command(gc, domid, "nbd-server-add", args, NULL, NULL); +} + +int libxl__qmp_block_start_replication(libxl__gc *gc, int domid, bool primary) +{ + libxl__json_object *args = NULL; + + qmp_parameters_add_bool(gc, &args, "enable", true); + qmp_parameters_add_bool(gc, &args, "primary", primary); + + return qmp_run_command(gc, domid, "xen-set-block-replication", args, + NULL, NULL); +} + +int libxl__qmp_block_do_checkpoint(libxl__gc *gc, int domid) +{ + return qmp_run_command(gc, domid, "xen-do-block-checkpoint", NULL, + NULL, NULL); +} + +int libxl__qmp_block_stop_replication(libxl__gc *gc, int domid, bool primary) +{ + libxl__json_object *args = NULL; + + qmp_parameters_add_bool(gc, &args, "enable", false); + qmp_parameters_add_bool(gc, &args, "primary", primary); + + return qmp_run_command(gc, domid, "xen-set-block-replication", args, + NULL, NULL); +} + +int libxl__qmp_nbd_server_stop(libxl__gc *gc, int domid) +{ + return qmp_run_command(gc, domid, "nbd-server-stop", NULL, NULL, NULL); +} + +int libxl__qmp_x_blockdev_change(libxl__gc *gc, int domid, const char *parent, + const char *child, const char *node) +{ + libxl__json_object *args = NULL; + + qmp_parameters_add_string(gc, &args, "parent", parent); + if (child) + qmp_parameters_add_string(gc, &args, "child", child); + if (node) + qmp_parameters_add_string(gc, &args, "node", node); + + return qmp_run_command(gc, domid, "x-blockdev-change", args, NULL, NULL); +} + +int libxl__qmp_hmp(libxl__gc *gc, int domid, const char *command_line) +{ + libxl__json_object *args = NULL; + + qmp_parameters_add_string(gc, &args, "command-line", command_line); + + return qmp_run_command(gc, domid, "human-monitor-command", args, + NULL, NULL); +} + int libxl__qmp_initializations(libxl__gc *gc, uint32_t domid, const libxl_domain_config *guest_config) { -- 2.5.0 _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel