[Qemu-devel] [RFC PATCH] drive-backup 'stream' mode

Wolfgang Richter Fri, 11 Oct 2013 08:34:26 -0700

Idea: Introduce a mode for drive-backup that duplicates writes to
another target, not CoW.  It is useful for introspecting (my use
case), and for keeping a remote block device in sync with writes
(helps with migration or backup).




Issue with current modes:  All of the current modes are well-designed
to support point-in-time snapshots, but none of them handle keeping
another drive up-to-date as new writes continuously occur.  The 'None'
mode documentation is a bit ambiguous in this regard, but what it
actually implements is a very low overhead CoW snapshot.



Patch: Fixes ambiguity in the 'None' mode documentation, introduces a
new mode 'stream' which duplicates writes without reading any data
from the original disk.

I put the logic for copying the write into a new coroutine called
'backup_do_stream' as it needs almost nothing from the original
'backup_do_cow' function (no bit map, no reads from a block device,
etc.).  The other major change is that tracked requests also contain a
handle to the QIOV involved in the write (and it is passed along).

This is based off of v1.6.0 code.







diff --git a/block.c b/block.c
index 01b66d8..159f825 100644
--- a/block.c
+++ b/block.c
@@ -1872,12 +1872,14 @@ static void tracked_request_end(BdrvTrackedRequest *req)
 static void tracked_request_begin(BdrvTrackedRequest *req,
                                   BlockDriverState *bs,
                                   int64_t sector_num,
-                                  int nb_sectors, bool is_write)
+                                  int nb_sectors, bool is_write,
+                                  QEMUIOVector *qiov)
 {
     *req = (BdrvTrackedRequest){
         .bs = bs,
         .sector_num = sector_num,
         .nb_sectors = nb_sectors,
+        .qiov = qiov,
         .is_write = is_write,
         .co = qemu_coroutine_self(),
     };
@@ -2528,7 +2530,7 @@ static int coroutine_fn
bdrv_co_do_readv(BlockDriverState *bs,
         wait_for_overlapping_requests(bs, sector_num, nb_sectors);
     }

-    tracked_request_begin(&req, bs, sector_num, nb_sectors, false);
+    tracked_request_begin(&req, bs, sector_num, nb_sectors, false, NULL);

     if (flags & BDRV_REQ_COPY_ON_READ) {
         int pnum;
@@ -2634,7 +2636,7 @@ static int coroutine_fn
bdrv_co_do_writev(BlockDriverState *bs,
         wait_for_overlapping_requests(bs, sector_num, nb_sectors);
     }

-    tracked_request_begin(&req, bs, sector_num, nb_sectors, true);
+    tracked_request_begin(&req, bs, sector_num, nb_sectors, true, qiov);

     ret = notifier_with_return_list_notify(&bs->before_write_notifiers, &req);

diff --git a/block/backup.c b/block/backup.c
index 6ae8a05..686a53f 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -84,6 +84,37 @@ static void cow_request_end(CowRequest *req)
     qemu_co_queue_restart_all(&req->wait_queue);
 }

+static int coroutine_fn backup_do_stream(BlockDriverState *bs,
+                                         int64_t sector_num, int nb_sectors,
+                                         QEMUIOVector *qiov)
+{
+    BackupBlockJob *job = (BackupBlockJob *)bs->job;
+    CowRequest cow_request;
+    int ret = 0;
+    int64_t start = sector_num, end = sector_num + nb_sectors;
+
+    qemu_co_rwlock_rdlock(&job->flush_rwlock);
+
+    wait_for_overlapping_requests(job, start, end);
+    cow_request_begin(&cow_request, job, start, end);
+
+    ret = bdrv_co_writev(job->target,
+                         sector_num, nb_sectors,
+                         qiov);
+
+    /* Publish progress, guest I/O counts as progress too.  Note that the
+     * offset field is an opaque progress value, it is not a disk offset.
+     */
+    job->sectors_read += sector_num;
+    job->common.offset += sector_num * BDRV_SECTOR_SIZE;
+
+    cow_request_end(&cow_request);
+
+    qemu_co_rwlock_unlock(&job->flush_rwlock);
+
+    return ret;
+}
+
 static int coroutine_fn backup_do_cow(BlockDriverState *bs,
                                       int64_t sector_num, int nb_sectors,
                                       bool *error_is_read)
@@ -181,7 +212,12 @@ static int coroutine_fn backup_before_write_notify(
 {
     BdrvTrackedRequest *req = opaque;

-    return backup_do_cow(req->bs, req->sector_num, req->nb_sectors, NULL);
+    if (MIRROR_SYNC_MODE_STREAM) {
+        return backup_do_stream(req->bs, req->sector_num, req->nb_sectors,
+                                req->qiov);
+    } else {
+        return backup_do_cow(req->bs, req->sector_num, req->nb_sectors, NULL);
+    }
 }

 static void backup_set_speed(BlockJob *job, int64_t speed, Error **errp)
@@ -248,7 +284,8 @@ static void coroutine_fn backup_run(void *opaque)

     bdrv_add_before_write_notifier(bs, &before_write);

-    if (job->sync_mode == MIRROR_SYNC_MODE_NONE) {
+    if (job->sync_mode == MIRROR_SYNC_MODE_NONE ||
+        job->sync_mode == MIRROR_SYNC_MODE_STREAM) {
         while (!block_job_is_cancelled(&job->common)) {
             /* Yield until the job is cancelled.  We just let our before_write
              * notify callback service CoW requests. */
index e45f2a0..13ab769 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -63,6 +63,7 @@ typedef struct BdrvTrackedRequest {
     BlockDriverState *bs;
     int64_t sector_num;
     int nb_sectors;
+    QEMUIOVector *qiov;
     bool is_write;
     QLIST_ENTRY(BdrvTrackedRequest) list;
     Coroutine *co; /* owner, used for deadlock detection */
diff --git a/qapi-schema.json b/qapi-schema.json
index a51f7d2..9a3008a 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -1311,12 +1311,14 @@
 #
 # @full: copies data from all images to the destination
 #
-# @none: only copy data written from now on
+# @none: only copy on write data written from now on
+#
+# @stream: copy every new write to target
 #
 # Since: 1.3
 ##
 { 'enum': 'MirrorSyncMode',
-  'data': ['top', 'full', 'none'] }
+  'data': ['top', 'full', 'none', 'stream'] }

 ##
 # @BlockJobInfo:
diff --git a/qmp-commands.hx b/qmp-commands.hx
index cf47e3f..39056bd 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -944,7 +944,8 @@ Arguments:
             (json-string, optional)
 - "sync": what parts of the disk image should be copied to the destination;
   possibilities include "full" for all the disk, "top" for only the sectors
-  allocated in the topmost image, or "none" to only replicate new I/O
+  allocated in the topmost image, "none" to CoW on new I/O, or "stream"
+  to send every new write to the target
   (MirrorSyncMode).
 - "mode": whether and how QEMU should create a new image
           (NewImageMode, optional, default 'absolute-paths')

[Qemu-devel] [RFC PATCH] drive-backup 'stream' mode

Reply via email to