02.09.2019 17:35, Max Reitz wrote: > On 31.08.19 11:57, Vladimir Sementsov-Ogievskiy wrote: >> 09.08.2019 19:13, Max Reitz wrote: >>> This includes some permission limiting (for example, we only need to >>> take the RESIZE permission for active commits where the base is smaller >>> than the top). >>> >>> Signed-off-by: Max Reitz <mre...@redhat.com> >>> --- >>> block/mirror.c | 117 ++++++++++++++++++++++++++++++++++++++----------- >>> blockdev.c | 47 +++++++++++++++++--- >>> 2 files changed, 131 insertions(+), 33 deletions(-) >>> >>> diff --git a/block/mirror.c b/block/mirror.c >>> index 54bafdf176..6ddbfb9708 100644 >>> --- a/block/mirror.c >>> +++ b/block/mirror.c >>> @@ -42,6 +42,7 @@ typedef struct MirrorBlockJob { >>> BlockBackend *target; >>> BlockDriverState *mirror_top_bs; >>> BlockDriverState *base; >>> + BlockDriverState *base_overlay; >>> >>> /* The name of the graph node to replace */ >>> char *replaces; >>> @@ -665,8 +666,10 @@ static int mirror_exit_common(Job *job) >>> &error_abort); >>> if (!abort && s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) { >>> BlockDriverState *backing = s->is_none_mode ? src : s->base; >>> - if (backing_bs(target_bs) != backing) { >>> - bdrv_set_backing_hd(target_bs, backing, &local_err); >>> + BlockDriverState *unfiltered_target = >>> bdrv_skip_rw_filters(target_bs); >>> + >>> + if (bdrv_filtered_cow_bs(unfiltered_target) != backing) { >>> + bdrv_set_backing_hd(unfiltered_target, backing, &local_err); >>> if (local_err) { >>> error_report_err(local_err); >>> ret = -EPERM; >>> @@ -715,7 +718,7 @@ static int mirror_exit_common(Job *job) >>> * valid. >>> */ >>> block_job_remove_all_bdrv(bjob); >>> - bdrv_replace_node(mirror_top_bs, backing_bs(mirror_top_bs), >>> &error_abort); >>> + bdrv_replace_node(mirror_top_bs, mirror_top_bs->backing->bs, >>> &error_abort); >>> >>> /* We just changed the BDS the job BB refers to (with either or both >>> of the >>> * bdrv_replace_node() calls), so switch the BB back so the cleanup >>> does >>> @@ -812,7 +815,8 @@ static int coroutine_fn >>> mirror_dirty_init(MirrorBlockJob *s) >>> return 0; >>> } >>> >>> - ret = bdrv_is_allocated_above(bs, base, false, offset, bytes, >>> &count); >>> + ret = bdrv_is_allocated_above(bs, s->base_overlay, true, offset, >>> bytes, >>> + &count); >>> if (ret < 0) { >>> return ret; >>> } >>> @@ -908,7 +912,7 @@ static int coroutine_fn mirror_run(Job *job, Error >>> **errp) >>> } else { >>> s->target_cluster_size = BDRV_SECTOR_SIZE; >>> } >>> - if (backing_filename[0] && !target_bs->backing && >>> + if (backing_filename[0] && !bdrv_backing_chain_next(target_bs) && >>> s->granularity < s->target_cluster_size) { >>> s->buf_size = MAX(s->buf_size, s->target_cluster_size); >>> s->cow_bitmap = bitmap_new(length); >>> @@ -1088,8 +1092,9 @@ static void mirror_complete(Job *job, Error **errp) >>> if (s->backing_mode == MIRROR_OPEN_BACKING_CHAIN) { >>> int ret; >>> >>> - assert(!target->backing); >>> - ret = bdrv_open_backing_file(target, NULL, "backing", errp); >>> + assert(!bdrv_backing_chain_next(target)); >> >> Preexisting, but seems we may crash here, I don't see where it is checked >> before, to >> return error if there is some backing. And even if we do so, we don't >> prevent appearing >> of target backing during mirror operation. > > The idea is that MIRROR_OPEN_BACKING_CHAIN is set only when using > drive-mirror with mode=existing. In this case, we also set > BDRV_O_NO_BACKING for the target. > > You’re right that a user could add a backing chain to the target during > the operation. They really have to make an effort to shoot themselves > in the foot for this because the target must have an auto-generated node > name. > > I suppose the best would be not to open the backing chain if the target > node already has a backing child?
Hmm, but we still should generate an error, as we can't do what was requested. > >>> + ret = bdrv_open_backing_file(bdrv_skip_rw_filters(target), NULL, >>> + "backing", errp); >>> if (ret < 0) { >>> return; >>> } >>> @@ -1531,8 +1536,8 @@ static BlockJob *mirror_start_job( >>> MirrorBlockJob *s; >>> MirrorBDSOpaque *bs_opaque; >>> BlockDriverState *mirror_top_bs; >>> - bool target_graph_mod; >>> bool target_is_backing; >>> + uint64_t target_perms, target_shared_perms; >>> Error *local_err = NULL; >>> int ret; >>> >>> @@ -1551,7 +1556,7 @@ static BlockJob *mirror_start_job( >>> buf_size = DEFAULT_MIRROR_BUF_SIZE; >>> } >>> >>> - if (bs == target) { >>> + if (bdrv_skip_rw_filters(bs) == bdrv_skip_rw_filters(target)) { >>> error_setg(errp, "Can't mirror node into itself"); >>> return NULL; >>> } >>> @@ -1615,15 +1620,50 @@ static BlockJob *mirror_start_job( >>> * In the case of active commit, things look a bit different, though, >>> * because the target is an already populated backing file in active >>> use. >>> * We can allow anything except resize there.*/ >>> + >>> + target_perms = BLK_PERM_WRITE; >>> + target_shared_perms = BLK_PERM_WRITE_UNCHANGED; >>> + >>> target_is_backing = bdrv_chain_contains(bs, target); >>> - target_graph_mod = (backing_mode != MIRROR_LEAVE_BACKING_CHAIN); >>> + if (target_is_backing) { >>> + int64_t bs_size, target_size; >> >> <empty after definitions> > > Is that part of any of our guidelines? :-) > > Sure, will add. Not sure. Someone asked me about it on list in past and I'm used to. > >>> + bs_size = bdrv_getlength(bs); >>> + if (bs_size < 0) { >>> + error_setg_errno(errp, -bs_size, >>> + "Could not inquire top image size"); >>> + goto fail; >>> + } >>> + >>> + target_size = bdrv_getlength(target); >>> + if (target_size < 0) { >>> + error_setg_errno(errp, -target_size, >>> + "Could not inquire base image size"); >>> + goto fail; >>> + } >>> + >>> + if (target_size < bs_size) { >>> + target_perms |= BLK_PERM_RESIZE; >>> + } >>> + >>> + target_shared_perms |= BLK_PERM_CONSISTENT_READ >>> + | BLK_PERM_WRITE >>> + | BLK_PERM_GRAPH_MOD; >>> + } else if (bdrv_chain_contains(bs, bdrv_skip_rw_filters(target))) { >>> + /* >>> + * We may want to allow this in the future, but it would >>> + * require taking some extra care. >>> + */ >>> + error_setg(errp, "Cannot mirror to a filter on top of a node in >>> the " >>> + "source's backing chain"); >>> + goto fail; >>> + } >>> + >>> + if (backing_mode != MIRROR_LEAVE_BACKING_CHAIN) { >>> + target_perms |= BLK_PERM_GRAPH_MOD; >>> + } >>> + >>> s->target = blk_new(s->common.job.aio_context, >>> - BLK_PERM_WRITE | BLK_PERM_RESIZE | >>> - (target_graph_mod ? BLK_PERM_GRAPH_MOD : 0), >>> - BLK_PERM_WRITE_UNCHANGED | >>> - (target_is_backing ? BLK_PERM_CONSISTENT_READ | >>> - BLK_PERM_WRITE | >>> - BLK_PERM_GRAPH_MOD : 0)); >>> + target_perms, target_shared_perms); >>> ret = blk_insert_bs(s->target, target, errp); >>> if (ret < 0) { >>> goto fail; >>> @@ -1647,6 +1687,7 @@ static BlockJob *mirror_start_job( >>> s->backing_mode = backing_mode; >>> s->copy_mode = copy_mode; >>> s->base = base; >>> + s->base_overlay = bdrv_find_overlay(bs, base); >>> s->granularity = granularity; >>> s->buf_size = ROUND_UP(buf_size, granularity); >>> s->unmap = unmap; >>> @@ -1693,15 +1734,39 @@ static BlockJob *mirror_start_job( >>> /* In commit_active_start() all intermediate nodes disappear, so >>> * any jobs in them must be blocked */ >>> if (target_is_backing) { >>> - BlockDriverState *iter; >>> - for (iter = backing_bs(bs); iter != target; iter = >>> backing_bs(iter)) { >>> - /* XXX BLK_PERM_WRITE needs to be allowed so we don't block >>> - * ourselves at s->base (if writes are blocked for a node, >>> they are >>> - * also blocked for its backing file). The other options would >>> be a >>> - * second filter driver above s->base (== target). */ >>> + BlockDriverState *iter, *filtered_target; >>> + uint64_t iter_shared_perms; >>> + >>> + /* >>> + * The topmost node with >>> + * bdrv_skip_rw_filters(filtered_target) == >>> bdrv_skip_rw_filters(target) >>> + */ >>> + filtered_target = bdrv_filtered_cow_bs(bdrv_find_overlay(bs, >>> target)); >>> + >>> + assert(bdrv_skip_rw_filters(filtered_target) == >>> + bdrv_skip_rw_filters(target)); >>> + >>> + /* >>> + * XXX BLK_PERM_WRITE needs to be allowed so we don't block >>> + * ourselves at s->base (if writes are blocked for a node, they are >>> + * also blocked for its backing file). The other options would be a >>> + * second filter driver above s->base (== target). >>> + */ >>> + iter_shared_perms = BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE; >>> + >>> + for (iter = bdrv_filtered_bs(bs); iter != target; >>> + iter = bdrv_filtered_bs(iter)) >>> + { >>> + if (iter == filtered_target) { >>> + /* >>> + * From here on, all nodes are filters on the base. >>> + * This allows us to share BLK_PERM_CONSISTENT_READ. >> >> I'd prefere to add something like: "because we share it on target (see >> target BlockBackend creation >> and corresponding comment above)". > > I’d rather not refer to other comments in case they change… Maybe just > “This allows us to share BLK_PERM_CONSISTENT_READ, as we do on the > target.”? I think if someone is interested, they will scan the file for > what permissions are shared on the target anyway. OK. Yes I just wanted to stress that we just duplicate behavior about target, as it helped me to understand. > >>> + */ >>> + iter_shared_perms |= BLK_PERM_CONSISTENT_READ; >>> + } >>> + >>> ret = block_job_add_bdrv(&s->common, "intermediate node", >>> iter, 0, >>> - BLK_PERM_WRITE_UNCHANGED | >>> BLK_PERM_WRITE, >>> - errp); >>> + iter_shared_perms, errp); >>> if (ret < 0) { >>> goto fail; >>> } >>> @@ -1737,7 +1802,7 @@ fail: >>> bs_opaque->stop = true; >>> bdrv_child_refresh_perms(mirror_top_bs, mirror_top_bs->backing, >>> &error_abort); >>> - bdrv_replace_node(mirror_top_bs, backing_bs(mirror_top_bs), >>> &error_abort); >>> + bdrv_replace_node(mirror_top_bs, mirror_top_bs->backing->bs, >>> &error_abort); >>> >>> bdrv_unref(mirror_top_bs); >>> >>> @@ -1764,7 +1829,7 @@ void mirror_start(const char *job_id, >>> BlockDriverState *bs, >>> return; >>> } >>> is_none_mode = mode == MIRROR_SYNC_MODE_NONE; >>> - base = mode == MIRROR_SYNC_MODE_TOP ? backing_bs(bs) : NULL; >>> + base = mode == MIRROR_SYNC_MODE_TOP ? bdrv_backing_chain_next(bs) : >>> NULL; >>> mirror_start_job(job_id, bs, creation_flags, target, replaces, >>> speed, granularity, buf_size, backing_mode, >>> on_source_error, on_target_error, unmap, NULL, NULL, >>> diff --git a/blockdev.c b/blockdev.c >>> index c540802127..c451f553f7 100644 >> >> >> block/mirroc.c is OK for me. Continue with blockdev.c... >> >>> --- a/blockdev.c >>> +++ b/blockdev.c >>> @@ -3851,7 +3851,7 @@ static void blockdev_mirror_common(const char >>> *job_id, BlockDriverState *bs, >>> return; >>> } >>> >>> - if (!bs->backing && sync == MIRROR_SYNC_MODE_TOP) { >>> + if (!bdrv_backing_chain_next(bs) && sync == MIRROR_SYNC_MODE_TOP) { >>> sync = MIRROR_SYNC_MODE_FULL; >>> } >>> >>> @@ -3900,7 +3900,7 @@ static void blockdev_mirror_common(const char >>> *job_id, BlockDriverState *bs, >>> >>> void qmp_drive_mirror(DriveMirror *arg, Error **errp) >>> { >>> - BlockDriverState *bs; >>> + BlockDriverState *bs, *unfiltered_bs; >>> BlockDriverState *source, *target_bs; >>> AioContext *aio_context; >>> BlockMirrorBackingMode backing_mode; >>> @@ -3909,6 +3909,7 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) >>> int flags; >>> int64_t size; >>> const char *format = arg->format; >>> + const char *replaces_node_name = NULL; >>> int ret; >>> >>> bs = qmp_get_root_bs(arg->device, errp); >>> @@ -3921,6 +3922,16 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) >>> return; >>> } >>> >>> + /* >>> + * If the user has not instructed us otherwise, we should let the >>> + * block job run from @bs (thus taking into account all filters on >>> + * it) but replace @unfiltered_bs when it finishes (thus not >>> + * removing those filters). >>> + * (And if there are any explicit filters, we should assume the >>> + * user knows how to use the @replaces option.) >>> + */ >>> + unfiltered_bs = bdrv_skip_implicit_filters(bs); >>> + >>> aio_context = bdrv_get_aio_context(bs); >>> aio_context_acquire(aio_context); >>> >>> @@ -3934,8 +3945,14 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) >>> } >>> >>> flags = bs->open_flags | BDRV_O_RDWR; >>> - source = backing_bs(bs); >>> + source = bdrv_filtered_cow_bs(unfiltered_bs); >>> if (!source && arg->sync == MIRROR_SYNC_MODE_TOP) { >> >> >> Hmm, you handle this case a bit differently here and in >> blockdev_mirror_common.. >> Can we handle it only in blockdev_mirror_common, to be consistent with >> qmp_blockdev_mirror? > > What exactly do you mean? The difference between skipping all filters > and just skipping implicit filters? Hm. > > First, the check in blockdev_mirror_common() should actually be > unnecessary. In qmp_{blockdev,drive}_mirror(), we do nearly the same > check anyway (and then force sync=full if there is no backing file). Hmm, I see it only in qmp_drive_mirror, not in _blockdev_ > So > if all three functions did the same check, we wouldn’t need it in > blockdev_mirror_common(). And if it was so, better have one check in _common than two duplicated. > > Second, let’s look at the difference in an example: One where > blockdev_mirror_common() would not decide to enforce mode=full, but > qmp_{blockdev,drive}_mirror() would. > This happens when @bs is an explicit filter over some overlay with a > backing file, e.g.: > > throttle --file--> qcow2 --backing--> raw > > It’s correct to run the mirror job from the throttle node; but @source > should be bdrv_backing_chain_next() so it will point to the raw node. > Currently, it is NULL (because the throttle node does not have a COW child). > > But then again, I’ve made qmp_{blockdev,drive}_mirror() throw an error > in such a case: > >>> + if (bdrv_filtered_bs(unfiltered_bs)) { >>> + /* @unfiltered_bs is an explicit filter */ >>> + error_setg(errp, "Cannot perform sync=top mirror through an " >>> + "explicitly added filter node on the source"); >>> + goto out; >>> + } > > So it isn’t really a problem. Still, does the error make sense? Should > we just allow that case by letting source be > bdrv_filtered_cow_bs(bdrv_skip_rw_filters(bs))? Looks good. As I understand, you have the test (40/42) for this case and it works for _blockdev_ and for for _drive_ version of command. Of course, they'd better behave in same manner. > > (BTW, I just noticed that @base seems to be pretty much unused in > block/mirror.c. It only really uses @base_overlay now. So I suppose it > makes sense to remove it in v7.) > >>> arg->sync = MIRROR_SYNC_MODE_FULL; >>> } >>> if (arg->sync == MIRROR_SYNC_MODE_NONE) { >>> @@ -3954,6 +3971,9 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) >>> " named node of the graph"); >>> goto out; >>> } >>> + replaces_node_name = arg->replaces; >>> + } else if (unfiltered_bs != bs) { >>> + replaces_node_name = unfiltered_bs->node_name; >>> } >>> >>> if (arg->mode == NEW_IMAGE_MODE_ABSOLUTE_PATHS) { >>> @@ -3973,6 +3993,9 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) >>> bdrv_img_create(arg->target, format, >>> NULL, NULL, NULL, size, flags, false, >>> &local_err); >>> } else { >>> + /* Implicit filters should not appear in the filename */ >>> + BlockDriverState *explicit_backing = >>> bdrv_skip_implicit_filters(source); >>> + >>> switch (arg->mode) { >>> case NEW_IMAGE_MODE_EXISTING: >>> break; >>> @@ -3980,8 +4003,8 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) >>> /* create new image with backing file */ >>> bdrv_refresh_filename(source); >>> bdrv_img_create(arg->target, format, >>> - source->filename, >>> - source->drv->format_name, >>> + explicit_backing->filename, >>> + explicit_backing->drv->format_name, >>> NULL, size, flags, false, &local_err); >>> break; >>> default: >>> @@ -4017,7 +4040,7 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) >>> } >>> >>> blockdev_mirror_common(arg->has_job_id ? arg->job_id : NULL, bs, >>> target_bs, >>> - arg->has_replaces, arg->replaces, arg->sync, >>> + !!replaces_node_name, replaces_node_name, >>> arg->sync, >>> backing_mode, arg->has_speed, arg->speed, >>> arg->has_granularity, arg->granularity, >>> arg->has_buf_size, arg->buf_size, >>> @@ -4053,7 +4076,7 @@ void qmp_blockdev_mirror(bool has_job_id, const char >>> *job_id, >>> bool has_auto_dismiss, bool auto_dismiss, >>> Error **errp) >>> { >>> - BlockDriverState *bs; >>> + BlockDriverState *bs, *unfiltered_bs; >>> BlockDriverState *target_bs; >>> AioContext *aio_context; >>> BlockMirrorBackingMode backing_mode = MIRROR_LEAVE_BACKING_CHAIN; >>> @@ -4065,6 +4088,16 @@ void qmp_blockdev_mirror(bool has_job_id, const char >>> *job_id, >>> return; >>> } >>> >>> + /* >>> + * Same as in qmp_drive_mirror(): >> >> Then, may be better do it in blockdev_mirror_common ? > > Hm, maybe. Should we decide to let @source be > bdrv_filtered_cow_bs(bdrv_skip_rw_filters(bs)) in qmp_drive_mirror(), I > don’t think we need @unfiltered_bs there to determine @source. > > Max > >>> We want to run the job from @bs, >>> + * but we want to replace @unfiltered_bs on completion. >>> + */ >>> + unfiltered_bs = bdrv_skip_implicit_filters(bs); >>> + if (!has_replaces && unfiltered_bs != bs) { >>> + replaces = unfiltered_bs->node_name; >>> + has_replaces = true; >>> + } >>> + >>> target_bs = bdrv_lookup_bs(target, target, errp); >>> if (!target_bs) { >>> return; >>> >> >> > > -- Best regards, Vladimir