When formatting a qcow2 image, we only need the WRITE permission on the data-file child to preallocate data, so for metadata-only preallocation (or none at all), we can suppress that permission via the BDRV_O_NO_IO flag. That promises to actually not do any I/O at all, but writing is actually the only thing we would do, so it applies. (BDRV_O_NO_IO does not preclude reading/writing from/to the metadata file.)
Similarly, we will only resize the data-file if it is currently smaller than the supposed virtual disk size; so it is already big enough, we can suppress the RESIZE permission by removing the BDRV_O_RESIZE flag. This commit allows creating a qcow2 image with an existing raw image as its external data file while that raw image is in use by the VM. Signed-off-by: Hanna Czenczek <[email protected]> --- block/qcow2.c | 69 +++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 64 insertions(+), 5 deletions(-) diff --git a/block/qcow2.c b/block/qcow2.c index dd0f47c0ff..00958a0552 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -3641,6 +3641,7 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp) size_t cluster_size; int version; int refcount_order; + int blk_flags; uint64_t *refcount_table; int ret; uint8_t compression_type = QCOW2_COMPRESSION_TYPE_ZLIB; @@ -3908,20 +3909,48 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp) * table) */ options = qdict_new(); + blk_flags = BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_NO_FLUSH; qdict_put_str(options, "driver", "qcow2"); qdict_put_str(options, "file", bs->node_name); if (data_bs) { qdict_put_str(options, "data-file", data_bs->node_name); + + /* + * If possible, suppress all permissions we can. We must keep the + * BDRV_O_RDWR flag because the metadata child must still be written, + * but we can add BDRV_O_NO_IO if we know that the data file child will + * not receive any I/O, to suppress taking the WRITE permission on it. + * We can only do so as long as none of the operations on `blk` will + * do I/O on the data file. Such I/O accesses can only happen during + * resize (which grows the image from length 0 to qcow2_opts->size) with + * data preallocation. So as long as no data preallocation has been + * requested, BDRV_O_NO_IO will work. + */ + if (qcow2_opts->preallocation == PREALLOC_MODE_METADATA || + qcow2_opts->preallocation == PREALLOC_MODE_OFF) { + blk_flags |= BDRV_O_NO_IO; + } + + /* + * Similarly for BDRV_O_RESIZE: Suppressing it means we will not take + * the RESIZE permission. The data-file child is only grown if too + * small, never shrunk; so if it already is big enough, no need for + * BDRV_O_RESIZE. + */ + bdrv_graph_co_rdlock(); + if (bdrv_co_getlength(data_bs) >= (int64_t)qcow2_opts->size) { + blk_flags &= ~BDRV_O_RESIZE; + } + bdrv_graph_co_rdunlock(); } - blk = blk_co_new_open(NULL, NULL, options, - BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_NO_FLUSH, - errp); + blk = blk_co_new_open(NULL, NULL, options, blk_flags, errp); if (blk == NULL) { ret = -EIO; goto out; } bdrv_graph_co_rdlock(); + /* BDRV_O_NO_IO note: No data-file I/O */ ret = qcow2_alloc_clusters(blk_bs(blk), 3 * cluster_size); if (ret < 0) { bdrv_graph_co_rdunlock(); @@ -3940,7 +3969,10 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp) s->image_data_file = g_strdup(data_bs->filename); } - /* Create a full header (including things like feature table) */ + /* + * Create a full header (including things like feature table). + * BDRV_O_NO_IO note: No data-file I/O + */ ret = qcow2_update_header(blk_bs(blk)); bdrv_graph_co_rdunlock(); @@ -3949,7 +3981,13 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp) goto out; } - /* Okay, now that we have a valid image, let's give it the right size */ + /* + * Okay, now that we have a valid image, let's give it the right size. + * BDRV_O_NO_IO note: This will only read/write from/to data-file if data + * preallocation has been requested. + * BDRV_O_RESIZE note: We pass @exact = false, so the data-file is only + * resized if it is smaller than qcow2_opts->size. + */ bdrv_graph_co_rdlock(); ret = qcow2_co_truncate(blk_bs(blk), qcow2_opts->size, false, qcow2_opts->preallocation, 0, errp); @@ -3968,6 +4006,7 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp) } bdrv_graph_co_rdlock(); + /* BDRV_O_NO_IO note: No data-file I/O */ ret = bdrv_co_change_backing_file(blk_bs(blk), qcow2_opts->backing_file, backing_format, false); bdrv_graph_co_rdunlock(); @@ -3983,6 +4022,7 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp) /* Want encryption? There you go. */ if (qcow2_opts->encrypt) { bdrv_graph_co_rdlock(); + /* BDRV_O_NO_IO note: No data-file I/O */ ret = qcow2_set_up_encryption(blk_bs(blk), qcow2_opts->encrypt, errp); bdrv_graph_co_rdunlock(); @@ -4481,6 +4521,15 @@ fail: return ret; } +/** + * Resize the qcow2 image. + * To support BDRV_O_NO_IO and !BDRV_O_RESIZE from qcow2_co_create(), this + * function must: + * - If @exact is false, resize an external data file only if its size is less + * than @offset + * - Only access (write to) an external data file if @prealloc prescribes data + * preallocation (FALLOC/FULL). + */ static int coroutine_fn GRAPH_RDLOCK qcow2_co_truncate(BlockDriverState *bs, int64_t offset, bool exact, PreallocMode prealloc, BdrvRequestFlags flags, Error **errp) @@ -4634,6 +4683,11 @@ qcow2_co_truncate(BlockDriverState *bs, int64_t offset, bool exact, break; case PREALLOC_MODE_METADATA: + /* + * Note for BDRV_O_NO_IO and !BDRV_O_RESIZE: This will not do I/O on an + * external data file, and will only resize it if its current length is + * less than `offset`. + */ ret = preallocate_co(bs, old_length, offset, prealloc, errp); if (ret < 0) { goto fail; @@ -4652,6 +4706,11 @@ qcow2_co_truncate(BlockDriverState *bs, int64_t offset, bool exact, /* With a data file, preallocation means just allocating the metadata * and forwarding the truncate request to the data file */ if (has_data_file(bs)) { + /* + * Note for BDRV_O_NO_IO and !BDRV_O_RESIZE: This *will* write data + * to an external data file, but only resize it if its current + * length is less than `offset`. + */ ret = preallocate_co(bs, old_length, offset, prealloc, errp); if (ret < 0) { goto fail; -- 2.53.0
