node-name gives a name to the created BDS and registers it in the node graph.
to-replace-node-name can be used when drive-mirror is called with sync=full. The purpose of these fields is to be able to reconstruct and replace a broken quorum file. drive-mirror will bdrv_swap the new BDS named node-name with the one pointed by to-replace-node-name when the mirroring is finished. Signed-off-by: Benoit Canet <ben...@irqsave.net> --- block.c | 27 +++++++++++++++++++++ block/mirror.c | 62 +++++++++++++++++++++++++++++++++++++---------- blockdev.c | 51 +++++++++++++++++++++++++++++++++++--- hmp.c | 1 + include/block/block.h | 5 ++++ include/block/block_int.h | 4 ++- qapi/block-core.json | 9 +++++++ qmp-commands.hx | 5 ++++ 8 files changed, 147 insertions(+), 17 deletions(-) diff --git a/block.c b/block.c index 17f763d..5c03c2b 100644 --- a/block.c +++ b/block.c @@ -5795,3 +5795,30 @@ bool bdrv_is_first_non_filter(BlockDriverState *candidate) return false; } + +BlockDriverState *check_to_replace_node(const char *to_replace_node_name, + Error **errp) +{ + BlockDriverState *to_replace_bs = bdrv_find_node(to_replace_node_name); + if (!to_replace_bs) { + error_setg(errp, "to-replace-node-name=%s not found", + to_replace_node_name); + return NULL; + } + + /* the code should only be able to replace the top first non filter + * node of the graph. For example the first BDS under a quorum. + */ + if (!bdrv_is_first_non_filter(to_replace_bs)) { + error_set(errp, QERR_FEATURE_DISABLED, + "drive-mirror and replace node-name"); + return NULL; + } + + if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) { + return NULL; + } + + return to_replace_bs; +} + diff --git a/block/mirror.c b/block/mirror.c index 94c8661..68526ae 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -32,6 +32,12 @@ typedef struct MirrorBlockJob { RateLimit limit; BlockDriverState *target; BlockDriverState *base; + /* The name of the graph node to replace */ + char *to_replace_node_name; + /* The block BDS to replace */ + BlockDriverState *to_replace; + /* Used to block operations on the drive-mirror-replace target */ + Error *replace_blocker; bool is_none_mode; BlockdevOnError on_source_error, on_target_error; bool synced; @@ -490,10 +496,14 @@ immediate_exit: bdrv_release_dirty_bitmap(bs, s->dirty_bitmap); bdrv_iostatus_disable(s->target); if (s->should_complete && ret == 0) { - if (bdrv_get_flags(s->target) != bdrv_get_flags(s->common.bs)) { - bdrv_reopen(s->target, bdrv_get_flags(s->common.bs), NULL); + BlockDriverState *to_replace = s->common.bs; + if (s->to_replace) { + to_replace = s->to_replace; } - bdrv_swap(s->target, s->common.bs); + if (bdrv_get_flags(s->target) != bdrv_get_flags(to_replace)) { + bdrv_reopen(s->target, bdrv_get_flags(to_replace), NULL); + } + bdrv_swap(s->target, to_replace); if (s->common.driver->job_type == BLOCK_JOB_TYPE_COMMIT) { /* drop the bs loop chain formed by the swap: break the loop then * trigger the unref from the top one */ @@ -502,6 +512,11 @@ immediate_exit: bdrv_unref(p); } } + if (s->to_replace) { + bdrv_op_unblock_all(s->to_replace, s->replace_blocker); + error_free(s->replace_blocker); + bdrv_unref(s->to_replace); + } bdrv_unref(s->target); block_job_completed(&s->common, ret); } @@ -540,6 +555,23 @@ static void mirror_complete(BlockJob *job, Error **errp) return; } + /* check the target bs is not block and block all operations on it */ + if (s->to_replace_node_name) { + s->to_replace = check_to_replace_node(s->to_replace_node_name, errp); + + if (!s->to_replace) { + return; + } + + error_setg(&s->replace_blocker, + "block device is in use by block-job-complete"); + bdrv_op_block_all(s->to_replace, s->replace_blocker); + bdrv_ref(s->to_replace); + + g_free(s->to_replace_node_name); + s->to_replace_node_name = NULL; + } + s->should_complete = true; block_job_resume(job); } @@ -562,14 +594,15 @@ static const BlockJobDriver commit_active_job_driver = { }; static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target, - int64_t speed, int64_t granularity, - int64_t buf_size, - BlockdevOnError on_source_error, - BlockdevOnError on_target_error, - BlockDriverCompletionFunc *cb, - void *opaque, Error **errp, - const BlockJobDriver *driver, - bool is_none_mode, BlockDriverState *base) + const char *to_replace_node_name, + int64_t speed, int64_t granularity, + int64_t buf_size, + BlockdevOnError on_source_error, + BlockdevOnError on_target_error, + BlockDriverCompletionFunc *cb, + void *opaque, Error **errp, + const BlockJobDriver *driver, + bool is_none_mode, BlockDriverState *base) { MirrorBlockJob *s; @@ -600,6 +633,7 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target, return; } + s->to_replace_node_name = g_strdup(to_replace_node_name); s->on_source_error = on_source_error; s->on_target_error = on_target_error; s->target = target; @@ -621,6 +655,7 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target, } void mirror_start(BlockDriverState *bs, BlockDriverState *target, + const char *to_replace_node_name, int64_t speed, int64_t granularity, int64_t buf_size, MirrorSyncMode mode, BlockdevOnError on_source_error, BlockdevOnError on_target_error, @@ -632,7 +667,8 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target, is_none_mode = mode == MIRROR_SYNC_MODE_NONE; base = mode == MIRROR_SYNC_MODE_TOP ? bs->backing_hd : NULL; - mirror_start_job(bs, target, speed, granularity, buf_size, + mirror_start_job(bs, target, to_replace_node_name, + speed, granularity, buf_size, on_source_error, on_target_error, cb, opaque, errp, &mirror_job_driver, is_none_mode, base); } @@ -680,7 +716,7 @@ void commit_active_start(BlockDriverState *bs, BlockDriverState *base, } bdrv_ref(base); - mirror_start_job(bs, base, speed, 0, 0, + mirror_start_job(bs, base, NULL, speed, 0, 0, on_error, on_error, cb, opaque, &local_err, &commit_active_job_driver, false, base); if (local_err) { diff --git a/blockdev.c b/blockdev.c index 4cbcc56..bccc2f6 100644 --- a/blockdev.c +++ b/blockdev.c @@ -2106,6 +2106,9 @@ BlockDeviceInfoList *qmp_query_named_block_nodes(Error **errp) void qmp_drive_mirror(const char *device, const char *target, bool has_format, const char *format, + bool has_new_node_name, const char *new_node_name, + bool has_to_replace_node_name, + const char *to_replace_node_name, enum MirrorSyncMode sync, bool has_mode, enum NewImageMode mode, bool has_speed, int64_t speed, @@ -2119,6 +2122,7 @@ void qmp_drive_mirror(const char *device, const char *target, BlockDriverState *source, *target_bs; BlockDriver *drv = NULL; Error *local_err = NULL; + QDict *options = NULL; int flags; int64_t size; int ret; @@ -2192,6 +2196,28 @@ void qmp_drive_mirror(const char *device, const char *target, return; } + if (has_to_replace_node_name) { + BlockDriverState *to_replace_bs; + + if (!has_new_node_name) { + error_setg(errp, "a new-node-name must be provided when replacing a" + " named node of the graph"); + return; + } + + to_replace_bs = check_to_replace_node(to_replace_node_name, errp); + + if (!to_replace_bs) { + return; + } + + if (size < bdrv_getlength(to_replace_bs)) { + error_setg(errp, "cannot replace to-replace-node-name image with a " + "mirror image that would be smaller in size"); + return; + } + } + if ((sync == MIRROR_SYNC_MODE_FULL || !source) && mode != NEW_IMAGE_MODE_EXISTING) { @@ -2220,18 +2246,37 @@ void qmp_drive_mirror(const char *device, const char *target, return; } + /* if we are planning to replace a graph node name the code should do a full + * mirror of the source image + */ + if (has_to_replace_node_name && sync != MIRROR_SYNC_MODE_FULL) { + error_setg(errp, + "to-replace-node-name can only be used with sync=full"); + return; + } + + if (has_new_node_name) { + options = qdict_new(); + qdict_put(options, "node-name", qstring_from_str(new_node_name)); + } + /* Mirroring takes care of copy-on-write using the source's backing * file. */ target_bs = NULL; - ret = bdrv_open(&target_bs, target, NULL, NULL, flags | BDRV_O_NO_BACKING, - drv, &local_err); + ret = bdrv_open(&target_bs, target, NULL, options, + flags | BDRV_O_NO_BACKING, drv, &local_err); if (ret < 0) { error_propagate(errp, local_err); return; } - mirror_start(bs, target_bs, speed, granularity, buf_size, sync, + /* pass the node name to replace to mirror start since it's loose coupling + * and will allow to check whether the node still exist at mirror completion + */ + mirror_start(bs, target_bs, + has_to_replace_node_name ? to_replace_node_name : NULL, + speed, granularity, buf_size, sync, on_source_error, on_target_error, block_job_cb, bs, &local_err); if (local_err != NULL) { diff --git a/hmp.c b/hmp.c index ccc35d4..9a4b8da 100644 --- a/hmp.c +++ b/hmp.c @@ -930,6 +930,7 @@ void hmp_drive_mirror(Monitor *mon, const QDict *qdict) } qmp_drive_mirror(device, filename, !!format, format, + false, NULL, false, NULL, full ? MIRROR_SYNC_MODE_FULL : MIRROR_SYNC_MODE_TOP, true, mode, false, 0, false, 0, false, 0, false, 0, false, 0, &err); diff --git a/include/block/block.h b/include/block/block.h index 7d86e29..96afd01 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -179,6 +179,7 @@ typedef enum BlockOpType { BLOCK_OP_TYPE_MIRROR, BLOCK_OP_TYPE_RESIZE, BLOCK_OP_TYPE_STREAM, + BLOCK_OP_TYPE_REPLACE, BLOCK_OP_TYPE_MAX, } BlockOpType; @@ -319,6 +320,10 @@ bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs, BlockDriverState *candidate); bool bdrv_is_first_non_filter(BlockDriverState *candidate); +/* check if a named node can be replaced when doing drive-mirror */ +BlockDriverState *check_to_replace_node(const char *to_replace_node_name, + Error **errp); + /* async block I/O */ typedef void BlockDriverDirtyHandler(BlockDriverState *bs, int64_t sector, int sector_num); diff --git a/include/block/block_int.h b/include/block/block_int.h index 8d58334..ce65ed8 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -86,7 +86,6 @@ struct BlockDriver { */ bool (*bdrv_recurse_is_first_non_filter)(BlockDriverState *bs, BlockDriverState *candidate); - int (*bdrv_probe)(const uint8_t *buf, int buf_size, const char *filename); int (*bdrv_probe_device)(const char *filename); @@ -492,6 +491,8 @@ void commit_active_start(BlockDriverState *bs, BlockDriverState *base, * mirror_start: * @bs: Block device to operate on. * @target: Block device to write to. + * @to_replace_node_name: Block graph node name to replace once the mirror is + * done. Can only be used when full mirroring is selected. * @speed: The maximum speed, in bytes per second, or 0 for unlimited. * @granularity: The chosen granularity for the dirty bitmap. * @buf_size: The amount of data that can be in flight at one time. @@ -508,6 +509,7 @@ void commit_active_start(BlockDriverState *bs, BlockDriverState *base, * @bs will be switched to read from @target. */ void mirror_start(BlockDriverState *bs, BlockDriverState *target, + const char *to_replace_node_name, int64_t speed, int64_t granularity, int64_t buf_size, MirrorSyncMode mode, BlockdevOnError on_source_error, BlockdevOnError on_target_error, diff --git a/qapi/block-core.json b/qapi/block-core.json index eec881b..23dcecd 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -769,6 +769,14 @@ # @format: #optional the format of the new destination, default is to # probe if @mode is 'existing', else the format of the source # +# @new-node-name: #optional the new block driver state node name in the graph +# (Since 2.1) +# +# @to-replace-node-name: #optional with sync=full graph node name to be +# replaced by the new image when a whole image copy is +# done. This can be used to repair broken Quorum files. +# (Since 2.1) +# # @mode: #optional whether and how QEMU should create a new image, default is # 'absolute-paths'. # @@ -801,6 +809,7 @@ ## { 'command': 'drive-mirror', 'data': { 'device': 'str', 'target': 'str', '*format': 'str', + '*new-node-name': 'str', '*to-replace-node-name': 'str', 'sync': 'MirrorSyncMode', '*mode': 'NewImageMode', '*speed': 'int', '*granularity': 'uint32', '*buf-size': 'int', '*on-source-error': 'BlockdevOnError', diff --git a/qmp-commands.hx b/qmp-commands.hx index d8aa4ed..bae74f5 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -1293,6 +1293,7 @@ EQMP { .name = "drive-mirror", .args_type = "sync:s,device:B,target:s,speed:i?,mode:s?,format:s?," + "new-node-name:s?,to-replace-node-name:s?," "on-source-error:s?,on-target-error:s?," "granularity:i?,buf-size:i?", .mhandler.cmd_new = qmp_marshal_input_drive_mirror, @@ -1314,6 +1315,10 @@ Arguments: - "device": device name to operate on (json-string) - "target": name of new image file (json-string) - "format": format of new image (json-string, optional) +- "new-node-name": the name of the new block driver state in the node graph + (json-string, optional) +- "to-replace-node-name": the block driver node name to replace when finished + (json-string, optional) - "mode": how an image file should be created into the target file/device (NewImageMode, optional, default 'absolute-paths') - "speed": maximum speed of the streaming job, in bytes per second -- 1.9.1