When formatting a qcow2 image, we only need the WRITE permission on the
data-file child to preallocate data, so for metadata-only preallocation
(or none at all), we can suppress that permission via the BDRV_O_NO_IO
flag.  That promises to actually not do any I/O at all, but writing is
actually the only thing we would do, so it applies.  (BDRV_O_NO_IO does
not preclude reading/writing from/to the metadata file.)

Similarly, we will only resize the data-file if it is currently smaller
than the supposed virtual disk size; so it is already big enough, we can
suppress the RESIZE permission by removing the BDRV_O_RESIZE flag.

This commit allows creating a qcow2 image with an existing raw image as
its external data file while that raw image is in use by the VM.

Signed-off-by: Hanna Czenczek <[email protected]>
---
 block/qcow2.c | 69 +++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 64 insertions(+), 5 deletions(-)

diff --git a/block/qcow2.c b/block/qcow2.c
index dd0f47c0ff..00958a0552 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -3641,6 +3641,7 @@ qcow2_co_create(BlockdevCreateOptions *create_options, 
Error **errp)
     size_t cluster_size;
     int version;
     int refcount_order;
+    int blk_flags;
     uint64_t *refcount_table;
     int ret;
     uint8_t compression_type = QCOW2_COMPRESSION_TYPE_ZLIB;
@@ -3908,20 +3909,48 @@ qcow2_co_create(BlockdevCreateOptions *create_options, 
Error **errp)
      * table)
      */
     options = qdict_new();
+    blk_flags = BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_NO_FLUSH;
     qdict_put_str(options, "driver", "qcow2");
     qdict_put_str(options, "file", bs->node_name);
     if (data_bs) {
         qdict_put_str(options, "data-file", data_bs->node_name);
+
+        /*
+         * If possible, suppress all permissions we can.  We must keep the
+         * BDRV_O_RDWR flag because the metadata child must still be written,
+         * but we can add BDRV_O_NO_IO if we know that the data file child will
+         * not receive any I/O, to suppress taking the WRITE permission on it.
+         * We can only do so as long as none of the operations on `blk` will
+         * do I/O on the data file.  Such I/O accesses can only happen during
+         * resize (which grows the image from length 0 to qcow2_opts->size) 
with
+         * data preallocation.  So as long as no data preallocation has been
+         * requested, BDRV_O_NO_IO will work.
+         */
+        if (qcow2_opts->preallocation == PREALLOC_MODE_METADATA ||
+            qcow2_opts->preallocation == PREALLOC_MODE_OFF) {
+            blk_flags |= BDRV_O_NO_IO;
+        }
+
+        /*
+         * Similarly for BDRV_O_RESIZE: Suppressing it means we will not take
+         * the RESIZE permission.  The data-file child is only grown if too
+         * small, never shrunk; so if it already is big enough, no need for
+         * BDRV_O_RESIZE.
+         */
+        bdrv_graph_co_rdlock();
+        if (bdrv_co_getlength(data_bs) >= (int64_t)qcow2_opts->size) {
+            blk_flags &= ~BDRV_O_RESIZE;
+        }
+        bdrv_graph_co_rdunlock();
     }
-    blk = blk_co_new_open(NULL, NULL, options,
-                          BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_NO_FLUSH,
-                          errp);
+    blk = blk_co_new_open(NULL, NULL, options, blk_flags, errp);
     if (blk == NULL) {
         ret = -EIO;
         goto out;
     }
 
     bdrv_graph_co_rdlock();
+    /* BDRV_O_NO_IO note: No data-file I/O */
     ret = qcow2_alloc_clusters(blk_bs(blk), 3 * cluster_size);
     if (ret < 0) {
         bdrv_graph_co_rdunlock();
@@ -3940,7 +3969,10 @@ qcow2_co_create(BlockdevCreateOptions *create_options, 
Error **errp)
         s->image_data_file = g_strdup(data_bs->filename);
     }
 
-    /* Create a full header (including things like feature table) */
+    /*
+     * Create a full header (including things like feature table).
+     * BDRV_O_NO_IO note: No data-file I/O
+     */
     ret = qcow2_update_header(blk_bs(blk));
     bdrv_graph_co_rdunlock();
 
@@ -3949,7 +3981,13 @@ qcow2_co_create(BlockdevCreateOptions *create_options, 
Error **errp)
         goto out;
     }
 
-    /* Okay, now that we have a valid image, let's give it the right size */
+    /*
+     * Okay, now that we have a valid image, let's give it the right size.
+     * BDRV_O_NO_IO note: This will only read/write from/to data-file if data
+     * preallocation has been requested.
+     * BDRV_O_RESIZE note: We pass @exact = false, so the data-file is only
+     * resized if it is smaller than qcow2_opts->size.
+     */
     bdrv_graph_co_rdlock();
     ret = qcow2_co_truncate(blk_bs(blk), qcow2_opts->size, false,
                             qcow2_opts->preallocation, 0, errp);
@@ -3968,6 +4006,7 @@ qcow2_co_create(BlockdevCreateOptions *create_options, 
Error **errp)
         }
 
         bdrv_graph_co_rdlock();
+        /* BDRV_O_NO_IO note: No data-file I/O */
         ret = bdrv_co_change_backing_file(blk_bs(blk), 
qcow2_opts->backing_file,
                                           backing_format, false);
         bdrv_graph_co_rdunlock();
@@ -3983,6 +4022,7 @@ qcow2_co_create(BlockdevCreateOptions *create_options, 
Error **errp)
     /* Want encryption? There you go. */
     if (qcow2_opts->encrypt) {
         bdrv_graph_co_rdlock();
+        /* BDRV_O_NO_IO note: No data-file I/O */
         ret = qcow2_set_up_encryption(blk_bs(blk), qcow2_opts->encrypt, errp);
         bdrv_graph_co_rdunlock();
 
@@ -4481,6 +4521,15 @@ fail:
     return ret;
 }
 
+/**
+ * Resize the qcow2 image.
+ * To support BDRV_O_NO_IO and !BDRV_O_RESIZE from qcow2_co_create(), this
+ * function must:
+ * - If @exact is false, resize an external data file only if its size is less
+ *   than @offset
+ * - Only access (write to) an external data file if @prealloc prescribes data
+ *   preallocation (FALLOC/FULL).
+ */
 static int coroutine_fn GRAPH_RDLOCK
 qcow2_co_truncate(BlockDriverState *bs, int64_t offset, bool exact,
                   PreallocMode prealloc, BdrvRequestFlags flags, Error **errp)
@@ -4634,6 +4683,11 @@ qcow2_co_truncate(BlockDriverState *bs, int64_t offset, 
bool exact,
         break;
 
     case PREALLOC_MODE_METADATA:
+        /*
+         * Note for BDRV_O_NO_IO and !BDRV_O_RESIZE: This will not do I/O on an
+         * external data file, and will only resize it if its current length is
+         * less than `offset`.
+         */
         ret = preallocate_co(bs, old_length, offset, prealloc, errp);
         if (ret < 0) {
             goto fail;
@@ -4652,6 +4706,11 @@ qcow2_co_truncate(BlockDriverState *bs, int64_t offset, 
bool exact,
         /* With a data file, preallocation means just allocating the metadata
          * and forwarding the truncate request to the data file */
         if (has_data_file(bs)) {
+            /*
+             * Note for BDRV_O_NO_IO and !BDRV_O_RESIZE: This *will* write data
+             * to an external data file, but only resize it if its current
+             * length is less than `offset`.
+             */
             ret = preallocate_co(bs, old_length, offset, prealloc, errp);
             if (ret < 0) {
                 goto fail;
-- 
2.53.0


Reply via email to