On Tue, 01/13 12:50, John Snow wrote: > > > On 01/13/2015 04:37 AM, Fam Zheng wrote: > >On Mon, 01/12 11:31, John Snow wrote: > >>For "dirty-bitmap" sync mode, the block job will iterate through the > >>given dirty bitmap to decide if a sector needs backup (backup all the > >>dirty clusters and skip clean ones), just as allocation conditions of > >>"top" sync mode. > >> > >>Signed-off-by: Fam Zheng <f...@redhat.com> > >>Signed-off-by: John Snow <js...@redhat.com> > >>--- > >> block.c | 5 ++ > >> block/backup.c | 120 > >> ++++++++++++++++++++++++++++++++++++++-------- > >> block/mirror.c | 4 ++ > >> blockdev.c | 14 +++++- > >> hmp.c | 3 +- > >> include/block/block.h | 1 + > >> include/block/block_int.h | 2 + > >> qapi/block-core.json | 11 +++-- > >> qmp-commands.hx | 7 +-- > >> 9 files changed, 137 insertions(+), 30 deletions(-) > >> > >>diff --git a/block.c b/block.c > >>index 3f33b9d..5eb6788 100644 > >>--- a/block.c > >>+++ b/block.c > >>@@ -5633,6 +5633,11 @@ static void bdrv_reset_dirty(BlockDriverState *bs, > >>int64_t cur_sector, > >> } > >> } > >> > >>+void bdrv_set_dirty_iter(HBitmapIter *hbi, int64_t offset) > >>+{ > >>+ hbitmap_iter_init(hbi, hbi->hb, offset); > > > >What's the reason for this indirection? Can caller just use > >hbitmap_iter_init? > > > > Essentially it is just a rename of hbitmap_iter_init to make its usage here > clear, that we are manually advancing the pointer. How we accomplish that > (hbitmap_iter_init) is an implementation detail. > > Yes, I can just call this directly from block/backup, but at the time I was > less sure of how I would advance the pointer, so I created a wrapper where I > could change details as needed, if I needed to.
OK, either is fine for me. > > >>+} > >>+ > >> int64_t bdrv_get_dirty_count(BlockDriverState *bs, BdrvDirtyBitmap > >> *bitmap) > >> { > >> return hbitmap_count(bitmap->bitmap); > >>diff --git a/block/backup.c b/block/backup.c > >>index 792e655..0626a3e 100644 > >>--- a/block/backup.c > >>+++ b/block/backup.c > >>@@ -37,6 +37,8 @@ typedef struct CowRequest { > >> typedef struct BackupBlockJob { > >> BlockJob common; > >> BlockDriverState *target; > >>+ /* bitmap for sync=dirty-bitmap */ > >>+ BdrvDirtyBitmap *sync_bitmap; > >> MirrorSyncMode sync_mode; > >> RateLimit limit; > >> BlockdevOnError on_source_error; > >>@@ -242,6 +244,31 @@ static void backup_complete(BlockJob *job, void > >>*opaque) > >> g_free(data); > >> } > >> > >>+static bool coroutine_fn yield_and_check(BackupBlockJob *job) > >>+{ > >>+ if (block_job_is_cancelled(&job->common)) { > >>+ return true; > >>+ } > >>+ > >>+ /* we need to yield so that qemu_aio_flush() returns. > >>+ * (without, VM does not reboot) > >>+ */ > >>+ if (job->common.speed) { > >>+ uint64_t delay_ns = ratelimit_calculate_delay(&job->limit, > >>+ job->sectors_read); > >>+ job->sectors_read = 0; > >>+ block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME, delay_ns); > >>+ } else { > >>+ block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME, 0); > >>+ } > >>+ > >>+ if (block_job_is_cancelled(&job->common)) { > >>+ return true; > >>+ } > >>+ > >>+ return false; > >>+} > >>+ > >> static void coroutine_fn backup_run(void *opaque) > >> { > >> BackupBlockJob *job = opaque; > >>@@ -254,13 +281,13 @@ static void coroutine_fn backup_run(void *opaque) > >> }; > >> int64_t start, end; > >> int ret = 0; > >>+ bool error_is_read; > >> > >> QLIST_INIT(&job->inflight_reqs); > >> qemu_co_rwlock_init(&job->flush_rwlock); > >> > >> start = 0; > >>- end = DIV_ROUND_UP(job->common.len / BDRV_SECTOR_SIZE, > >>- BACKUP_SECTORS_PER_CLUSTER); > >>+ end = DIV_ROUND_UP(job->common.len, BACKUP_CLUSTER_SIZE); > >> > >> job->bitmap = hbitmap_alloc(end, 0); > >> > >>@@ -278,28 +305,45 @@ static void coroutine_fn backup_run(void *opaque) > >> qemu_coroutine_yield(); > >> job->common.busy = true; > >> } > >>+ } else if (job->sync_mode == MIRROR_SYNC_MODE_DIRTY_BITMAP) { > >>+ /* Dirty Bitmap sync has a slightly different iteration method */ > >>+ HBitmapIter hbi; > >>+ int64_t sector; > >>+ int64_t cluster; > >>+ bool polyrhythmic; > >>+ > >>+ bdrv_dirty_iter_init(bs, job->sync_bitmap, &hbi); > >>+ /* Does the granularity happen to match our backup cluster size? */ > >>+ polyrhythmic = (bdrv_dirty_bitmap_granularity(bs, > >>job->sync_bitmap) != > >>+ BACKUP_CLUSTER_SIZE); > >>+ > >>+ /* Find the next dirty /sector/ and copy that /cluster/ */ > >>+ while ((sector = hbitmap_iter_next(&hbi)) != -1) { > >>+ if (yield_and_check(job)) { > >>+ goto leave; > >>+ } > >>+ cluster = sector / BACKUP_SECTORS_PER_CLUSTER; > >>+ > >>+ do { > >>+ ret = backup_do_cow(bs, cluster * > >>BACKUP_SECTORS_PER_CLUSTER, > >>+ BACKUP_SECTORS_PER_CLUSTER, > >>&error_is_read); > >>+ if ((ret < 0) && > >>+ backup_error_action(job, error_is_read, -ret) == > >>+ BLOCK_ERROR_ACTION_REPORT) { > >>+ goto leave; > >>+ } > >>+ } while (ret < 0); > >>+ > >>+ /* Advance (or rewind) our iterator if we need to. */ > >>+ if (polyrhythmic) { > >>+ bdrv_set_dirty_iter(&hbi, > >>+ (cluster + 1) * > >>BACKUP_SECTORS_PER_CLUSTER); > >>+ } > >>+ } > >> } else { > >> /* Both FULL and TOP SYNC_MODE's require copying.. */ > >> for (; start < end; start++) { > >>- bool error_is_read; > >>- > >>- if (block_job_is_cancelled(&job->common)) { > >>- break; > >>- } > >>- > >>- /* we need to yield so that qemu_aio_flush() returns. > >>- * (without, VM does not reboot) > >>- */ > >>- if (job->common.speed) { > >>- uint64_t delay_ns = ratelimit_calculate_delay( > >>- &job->limit, job->sectors_read); > >>- job->sectors_read = 0; > >>- block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME, > >>delay_ns); > >>- } else { > >>- block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME, 0); > >>- } > >>- > >>- if (block_job_is_cancelled(&job->common)) { > >>+ if (yield_and_check(job)) { > >> break; > >> } > >> > >>@@ -351,12 +395,23 @@ static void coroutine_fn backup_run(void *opaque) > >> } > >> } > >> > >>+leave: > >> notifier_with_return_remove(&before_write); > >> > >> /* wait until pending backup_do_cow() calls have completed */ > >> qemu_co_rwlock_wrlock(&job->flush_rwlock); > >> qemu_co_rwlock_unlock(&job->flush_rwlock); > >> > >>+ if (job->sync_bitmap) { > >>+ if (ret < 0) { > >>+ /* Merge the successor back into the parent, delete nothing. */ > >>+ assert(bdrv_reclaim_dirty_bitmap(bs, job->sync_bitmap, NULL)); > > > >Why can't reclaim fail here? If we canassert, please move the expression out > >because it has a side effect. > > > > It shouldn't fail; if it does, something went very wrong. The only chance > this has to fail is if the sync bitmap has no successor, but we explicitly > installed one (and explicitly check that it succeeded) before going into the > block backup. > > I am not sure what other meaningful recovery could happen in this case, > though we *could* just report an error and continue on, acknowledging that > the BdrvDirtyBitmap is now compromised and of questionable validity. > > Does anyone have an error handling preference here? I don't, as long as you move it out from assert: int r = bdrv_reclaim_dirty_bitmap(bs, job->sync_bitmap, NULL); assert(r); Fam