+}
+
+static void send_bitmap_header(QEMUFile *f, DirtyBitmapMigBitmapState *dbms,
+ uint32_t additional_flags)
+{
+ BlockDriverState *bs = dbms->bs;
+ BdrvDirtyBitmap *bitmap = dbms->bitmap;
+ uint32_t flags = additional_flags;
+ trace_send_bitmap_header_enter();
+
+ if (bs != dirty_bitmap_mig_state.prev_bs) {
+ dirty_bitmap_mig_state.prev_bs = bs;
+ flags |= DIRTY_BITMAP_MIG_FLAG_DEVICE_NAME;
+ }
+
+ if (bitmap != dirty_bitmap_mig_state.prev_bitmap) {
+ dirty_bitmap_mig_state.prev_bitmap = bitmap;
+ flags |= DIRTY_BITMAP_MIG_FLAG_BITMAP_NAME;
+ }
+
+ qemu_put_bitmap_flags(f, flags);
+
+ if (flags & DIRTY_BITMAP_MIG_FLAG_DEVICE_NAME) {
+ qemu_put_counted_string(f, dbms->node_name);
+ }
+
+ if (flags & DIRTY_BITMAP_MIG_FLAG_BITMAP_NAME) {
+ qemu_put_counted_string(f, bdrv_dirty_bitmap_name(bitmap));
+ }
+}
+
+static void send_bitmap_start(QEMUFile *f, DirtyBitmapMigBitmapState *dbms)
+{
+ send_bitmap_header(f, dbms, DIRTY_BITMAP_MIG_FLAG_START);
+ qemu_put_be32(f, bdrv_dirty_bitmap_granularity(dbms->bitmap));
+ qemu_put_byte(f, bdrv_dirty_bitmap_enabled(dbms->bitmap));
+}
+
+static void send_bitmap_complete(QEMUFile *f, DirtyBitmapMigBitmapState *dbms)
+{
+ send_bitmap_header(f, dbms, DIRTY_BITMAP_MIG_FLAG_COMPLETE);
+}
+
+static void send_bitmap_bits(QEMUFile *f, DirtyBitmapMigBitmapState *dbms,
+ uint64_t start_sector, uint32_t nr_sectors)
+{
+ /* align for buffer_is_zero() */
+ uint64_t align = 4 * sizeof(long);
+ uint64_t unaligned_size =
+ bdrv_dirty_bitmap_serialization_size(dbms->bitmap,
+ start_sector, nr_sectors);
+ uint64_t buf_size = (unaligned_size + align - 1) & ~(align - 1);
+ uint8_t *buf = g_malloc0(buf_size);
+ uint32_t flags = DIRTY_BITMAP_MIG_FLAG_BITS;
+
+ bdrv_dirty_bitmap_serialize_part(dbms->bitmap, buf,
+ start_sector, nr_sectors);
+
+ if (buffer_is_zero(buf, buf_size)) {
+ g_free(buf);
+ buf = NULL;
+ flags |= DIRTY_BITMAP_MIG_FLAG_ZEROES;
+ }
+
+ trace_send_bitmap_bits(flags, start_sector, nr_sectors, buf_size);
+
+ send_bitmap_header(f, dbms, flags);
+
+ qemu_put_be64(f, start_sector);
+ qemu_put_be32(f, nr_sectors);
+
+ /* if a block is zero we need to flush here since the network
+ * bandwidth is now a lot higher than the storage device bandwidth.
+ * thus if we queue zero blocks we slow down the migration. */
+ if (flags & DIRTY_BITMAP_MIG_FLAG_ZEROES) {
+ qemu_fflush(f);
+ } else {
+ qemu_put_be64(f, buf_size);
+ qemu_put_buffer(f, buf, buf_size);
+ }
+
+ g_free(buf);
+}
+
+
+/* Called with iothread lock taken. */
+
+static void init_dirty_bitmap_migration(void)
+{
+ BlockDriverState *bs;
+ BdrvDirtyBitmap *bitmap;
+ DirtyBitmapMigBitmapState *dbms;
+ BdrvNextIterator it;
+ uint64_t total_bytes = 0;
+
+ dirty_bitmap_mig_state.bulk_completed = false;
+ dirty_bitmap_mig_state.prev_bs = NULL;
+ dirty_bitmap_mig_state.prev_bitmap = NULL;
+
+ for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
+ for (bitmap = bdrv_next_dirty_bitmap(bs, NULL); bitmap;
+ bitmap = bdrv_next_dirty_bitmap(bs, bitmap)) {
+ if (!bdrv_dirty_bitmap_name(bitmap)) {
+ continue;
+ }
+
+ if (!bdrv_get_device_or_node_name(bs)) {
+ /* not named non-root node */
+ continue;
+ }
+
+ dbms = g_new0(DirtyBitmapMigBitmapState, 1);
+ dbms->bs = bs;
+ dbms->node_name = bdrv_get_node_name(bs);
+ if (!dbms->node_name || dbms->node_name[0] == '\0') {
+ dbms->node_name = bdrv_get_device_name(bs);
+ }
+ dbms->bitmap = bitmap;
+ dbms->total_sectors = bdrv_nb_sectors(bs);
+ dbms->sectors_per_chunk = CHUNK_SIZE * 8 *
+ bdrv_dirty_bitmap_granularity(bitmap) >> BDRV_SECTOR_BITS;
+
+ total_bytes +=
+ bdrv_dirty_bitmap_serialization_size(bitmap,
+ 0, dbms->total_sectors);
+
+ QSIMPLEQ_INSERT_TAIL(&dirty_bitmap_mig_state.dbms_list,
+ dbms, entry);
+ }
+ }
+}
+
+/* Called with no lock taken. */
+static void bulk_phase_send_chunk(QEMUFile *f, DirtyBitmapMigBitmapState *dbms)
+{
+ uint32_t nr_sectors = MIN(dbms->total_sectors - dbms->cur_sector,
+ dbms->sectors_per_chunk);
+
+ send_bitmap_bits(f, dbms, dbms->cur_sector, nr_sectors);
+
+ dbms->cur_sector += nr_sectors;
+ if (dbms->cur_sector >= dbms->total_sectors) {
+ dbms->bulk_completed = true;
+ }
+}
+
+/* Called with no lock taken. */
+static void bulk_phase(QEMUFile *f, bool limit)
+{
+ DirtyBitmapMigBitmapState *dbms;
+
+ QSIMPLEQ_FOREACH(dbms, &dirty_bitmap_mig_state.dbms_list, entry) {
+ while (!dbms->bulk_completed) {
+ bulk_phase_send_chunk(f, dbms);
+ if (limit && qemu_file_rate_limit(f)) {
+ return;
+ }
+ }
+ }
+
+ dirty_bitmap_mig_state.bulk_completed = true;
+}
+
+/* Called with iothread lock taken. */
+static void dirty_bitmap_mig_cleanup(void)
+{
+ DirtyBitmapMigBitmapState *dbms;
+
+ while ((dbms = QSIMPLEQ_FIRST(&dirty_bitmap_mig_state.dbms_list)) != NULL)
{
+ QSIMPLEQ_REMOVE_HEAD(&dirty_bitmap_mig_state.dbms_list, entry);
+ g_free(dbms);
+ }
+}
+
+/* for SaveVMHandlers */
+static void dirty_bitmap_migration_cleanup(void *opaque)
+{
+ dirty_bitmap_mig_cleanup();
+}
+
+static int dirty_bitmap_save_iterate(QEMUFile *f, void *opaque)
+{
+ trace_dirty_bitmap_save_iterate(
+ migration_in_postcopy(migrate_get_current()));
+
+ if (migration_in_postcopy(migrate_get_current()) &&
+ !dirty_bitmap_mig_state.bulk_completed) {
+ bulk_phase(f, true);
+ }
+
+ qemu_put_bitmap_flags(f, DIRTY_BITMAP_MIG_FLAG_EOS);
+
+ return dirty_bitmap_mig_state.bulk_completed;
+}
+
+/* Called with iothread lock taken. */
+
+static int dirty_bitmap_save_complete(QEMUFile *f, void *opaque)
+{
+ DirtyBitmapMigBitmapState *dbms;
+ trace_dirty_bitmap_save_complete_enter();
+
+ if (!dirty_bitmap_mig_state.bulk_completed) {
+ bulk_phase(f, false);
+ }
+
+ QSIMPLEQ_FOREACH(dbms, &dirty_bitmap_mig_state.dbms_list, entry) {
+ send_bitmap_complete(f, dbms);
+ }
+
+ qemu_put_bitmap_flags(f, DIRTY_BITMAP_MIG_FLAG_EOS);
+
+ trace_dirty_bitmap_save_complete_finish();
+
+ dirty_bitmap_mig_cleanup();
+ return 0;
+}
+
+static void dirty_bitmap_save_pending(QEMUFile *f, void *opaque,
+ uint64_t max_size,
+ uint64_t *res_precopy_only,
+ uint64_t *res_compatible,
+ uint64_t *res_postcopy_only)
+{
+ DirtyBitmapMigBitmapState *dbms;
+ uint64_t pending = 0;
+
+ qemu_mutex_lock_iothread();
+
+ QSIMPLEQ_FOREACH(dbms, &dirty_bitmap_mig_state.dbms_list, entry) {
+ uint64_t gran = bdrv_dirty_bitmap_granularity(dbms->bitmap);
+ uint64_t sectors = dbms->bulk_completed ? 0 :
+ dbms->total_sectors - dbms->cur_sector;
+
+ pending += (sectors * BDRV_SECTOR_SIZE + gran - 1) / gran;
+ }
+
+ qemu_mutex_unlock_iothread();
+
+ trace_dirty_bitmap_save_pending(pending, max_size);
+
+ *res_postcopy_only += pending;
+}
+
+/* First occurrence of this bitmap. It should be created if doesn't exist */
+static int dirty_bitmap_load_start(QEMUFile *f, DirtyBitmapLoadState *s)
+{
+ Error *local_err = NULL;
+ uint32_t granularity = qemu_get_be32(f);
+ bool enabled = qemu_get_byte(f);
+
+ if (!s->bitmap) {
+ s->bitmap = bdrv_create_dirty_bitmap(s->bs, granularity,
+ s->bitmap_name, &local_err);
+ if (!s->bitmap) {
+ error_report_err(local_err);
+ return -EINVAL;
+ }
+ } else {
+ uint32_t dest_granularity =
+ bdrv_dirty_bitmap_granularity(s->bitmap);
+ if (dest_granularity != granularity) {
+ fprintf(stderr,
+ "Error: "
+ "Migrated bitmap granularity (%" PRIu32 ") "
+ "doesn't match the destination bitmap '%s' "
+ "granularity (%" PRIu32 ")\n",
+ granularity,
+ bdrv_dirty_bitmap_name(s->bitmap),
+ dest_granularity);
+ return -EINVAL;
+ }
+ }
+
+ bdrv_disable_dirty_bitmap(s->bitmap);
+ if (enabled) {
+ DirtyBitmapLoadBitmapState *b;
+
+ bdrv_dirty_bitmap_create_successor(s->bs, s->bitmap, &local_err);
+ if (local_err) {
+ error_report_err(local_err);
+ return -EINVAL;
+ }
+
+ b = g_new(DirtyBitmapLoadBitmapState, 1);
+ b->bs = s->bs;
+ b->bitmap = s->bitmap;
+ b->migrated = false;
+ enabled_bitmaps = g_slist_prepend(enabled_bitmaps, b);
+ }
+
+ return 0;
+}
+
+void dirty_bitmap_mig_before_vm_start(void)
+{
+ GSList *item;
+
+ qemu_mutex_lock(&finish_lock);
+
+ for (item = enabled_bitmaps; item; item = g_slist_next(item)) {
+ DirtyBitmapLoadBitmapState *b = item->data;
+
+ if (b->migrated) {
+ bdrv_enable_dirty_bitmap(b->bitmap);
+ } else {
+ bdrv_dirty_bitmap_enable_successor(b->bitmap);
+ }
+
+ g_free(b);
+ }
+
+ g_slist_free(enabled_bitmaps);
+ enabled_bitmaps = NULL;
+
+ qemu_mutex_unlock(&finish_lock);
+}
+
+static void dirty_bitmap_load_complete(QEMUFile *f, DirtyBitmapLoadState *s)
+{
+ GSList *item;
+ trace_dirty_bitmap_load_complete();
+ bdrv_dirty_bitmap_deserialize_finish(s->bitmap);
+
+ qemu_mutex_lock(&finish_lock);
+
+ for (item = enabled_bitmaps; item; item = g_slist_next(item)) {
+ DirtyBitmapLoadBitmapState *b = item->data;
+
+ if (b->bitmap == s->bitmap) {
+ b->migrated = true;
+ }
+ }
+
+ if (bdrv_dirty_bitmap_frozen(s->bitmap)) {
+ if (enabled_bitmaps == NULL) {
+ /* in postcopy */
+ AioContext *aio_context = bdrv_get_aio_context(s->bs);
+ aio_context_acquire(aio_context);
+
+ bdrv_reclaim_dirty_bitmap(s->bs, s->bitmap, &error_abort);
+ bdrv_enable_dirty_bitmap(s->bitmap);
+
+ aio_context_release(aio_context);
+ } else {
+ /* target not started, successor is empty */
+ bdrv_dirty_bitmap_release_successor(s->bs, s->bitmap);
+ }
+ }
+
+ qemu_mutex_unlock(&finish_lock);
+}
+
+static int dirty_bitmap_load_bits(QEMUFile *f, DirtyBitmapLoadState *s)
+{
+ uint64_t first_sector = qemu_get_be64(f);
+ uint32_t nr_sectors = qemu_get_be32(f);
+ trace_dirty_bitmap_load_bits_enter(first_sector, nr_sectors);
+
+ if (s->flags & DIRTY_BITMAP_MIG_FLAG_ZEROES) {
+ trace_dirty_bitmap_load_bits_zeroes();
+ bdrv_dirty_bitmap_deserialize_zeroes(s->bitmap, first_sector,
+ nr_sectors, false);
+ } else {
+ uint8_t *buf;
+ uint64_t buf_size = qemu_get_be64(f);
+ uint64_t needed_size =
+ bdrv_dirty_bitmap_serialization_size(s->bitmap,
+ first_sector, nr_sectors);
+
+ if (needed_size > buf_size) {
+ fprintf(stderr,
+ "Error: Migrated bitmap granularity doesn't "
+ "match the destination bitmap '%s' granularity\n",
+ bdrv_dirty_bitmap_name(s->bitmap));
+ return -EINVAL;
+ }
+
+ buf = g_malloc(buf_size);
+ qemu_get_buffer(f, buf, buf_size);
+ bdrv_dirty_bitmap_deserialize_part(s->bitmap, buf,
+ first_sector,
+ nr_sectors, false);
+ g_free(buf);
+ }
+
+ return 0;
+}
+
+static int dirty_bitmap_load_header(QEMUFile *f, DirtyBitmapLoadState *s)
+{
+ Error *local_err = NULL;
+ s->flags = qemu_get_bitmap_flags(f);
+ trace_dirty_bitmap_load_header(s->flags);
+
+ if (s->flags & DIRTY_BITMAP_MIG_FLAG_DEVICE_NAME) {
+ if (!qemu_get_counted_string(f, s->node_name)) {
+ fprintf(stderr, "Unable to read node name string\n");
+ return -EINVAL;
+ }
+ s->bs = bdrv_lookup_bs(s->node_name, s->node_name, &local_err);
+ if (!s->bs) {
+ error_report("%s", error_get_pretty(local_err));
+ error_free(local_err);
+ return -EINVAL;
+ }
+ } else if (!s->bs) {
+ fprintf(stderr, "Error: block device name is not set\n");
+ return -EINVAL;
+ }
+
+ if (s->flags & DIRTY_BITMAP_MIG_FLAG_BITMAP_NAME) {
+ if (!qemu_get_counted_string(f, s->bitmap_name)) {
+ fprintf(stderr, "Unable to read node name string\n");
+ return -EINVAL;
+ }
+ s->bitmap = bdrv_find_dirty_bitmap(s->bs, s->bitmap_name);
+
+ /* bitmap may be NULL here, it wouldn't be an error if it is the
+ * first occurrence of the bitmap */
+ if (!s->bitmap && !(s->flags & DIRTY_BITMAP_MIG_FLAG_START)) {
+ fprintf(stderr, "Error: unknown dirty bitmap "
+ "'%s' for block device '%s'\n",
+ s->bitmap_name, s->node_name);
+ return -EINVAL;
+ }
+ } else if (!s->bitmap) {
+ fprintf(stderr, "Error: block device name is not set\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int dirty_bitmap_load(QEMUFile *f, void *opaque, int version_id)
+{
+ static DirtyBitmapLoadState s;
+
+ int ret = 0;
+
+ trace_dirty_bitmap_load_enter();
+
+ do {
+ dirty_bitmap_load_header(f, &s);
+
+ if (s.flags & DIRTY_BITMAP_MIG_FLAG_START) {
+ ret = dirty_bitmap_load_start(f, &s);
+ } else if (s.flags & DIRTY_BITMAP_MIG_FLAG_COMPLETE) {
+ dirty_bitmap_load_complete(f, &s);
+ } else if (s.flags & DIRTY_BITMAP_MIG_FLAG_BITS) {
+ ret = dirty_bitmap_load_bits(f, &s);
+ }
+
+ if (!ret) {
+ ret = qemu_file_get_error(f);
+ }
+
+ if (ret) {
+ return ret;
+ }
+ } while (!(s.flags & DIRTY_BITMAP_MIG_FLAG_EOS));
+
+ trace_dirty_bitmap_load_success();
+ return 0;
+}
+
+static int dirty_bitmap_save_setup(QEMUFile *f, void *opaque)
+{
+ DirtyBitmapMigBitmapState *dbms = NULL;
+ init_dirty_bitmap_migration();
+
+ QSIMPLEQ_FOREACH(dbms, &dirty_bitmap_mig_state.dbms_list, entry) {
+ send_bitmap_start(f, dbms);
+ }
+ qemu_put_bitmap_flags(f, DIRTY_BITMAP_MIG_FLAG_EOS);
+
+ return 0;
+}
+
+static bool dirty_bitmap_is_active(void *opaque)
+{
+ return migrate_dirty_bitmaps();
+}
+
+static bool dirty_bitmap_is_active_iterate(void *opaque)
+{
+ return dirty_bitmap_is_active(opaque) && !runstate_is_running();
+}
+
+static bool dirty_bitmap_has_postcopy(void *opaque)
+{
+ return true;
+}
+
+static SaveVMHandlers savevm_dirty_bitmap_handlers = {
+ .save_live_setup = dirty_bitmap_save_setup,
+ .save_live_complete_postcopy = dirty_bitmap_save_complete,
+ .save_live_complete_precopy = dirty_bitmap_save_complete,
+ .has_postcopy = dirty_bitmap_has_postcopy,
+ .save_live_pending = dirty_bitmap_save_pending,
+ .save_live_iterate = dirty_bitmap_save_iterate,
+ .is_active_iterate = dirty_bitmap_is_active_iterate,
+ .load_state = dirty_bitmap_load,
+ .cleanup = dirty_bitmap_migration_cleanup,
+ .is_active = dirty_bitmap_is_active,
+};
+
+void dirty_bitmap_mig_init(void)
+{
+ QSIMPLEQ_INIT(&dirty_bitmap_mig_state.dbms_list);
+
+ register_savevm_live(NULL, "dirty-bitmap", 0, 1,
+ &savevm_dirty_bitmap_handlers,
+ &dirty_bitmap_mig_state);
+}
diff --git a/migration/migration.c b/migration/migration.c
index 179bd04..edf5623 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -122,6 +122,9 @@ MigrationIncomingState *migration_incoming_get_current(void)
QLIST_INIT(&mis_current.loadvm_handlers);
qemu_mutex_init(&mis_current.rp_mutex);
qemu_event_init(&mis_current.main_thread_load_event, false);
+
+ init_dirty_bitmap_incoming_migration();
+
once = true;
}
return &mis_current;
diff --git a/migration/savevm.c b/migration/savevm.c
index 54572ef..927a680 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -1651,6 +1651,8 @@ static void loadvm_postcopy_handle_run_bh(void *opaque)
trace_loadvm_postcopy_handle_run_vmstart();
+ dirty_bitmap_mig_before_vm_start();
+
if (autostart) {
/* Hold onto your hats, starting the CPU */
vm_start();
diff --git a/migration/trace-events b/migration/trace-events
index 0212929..38fca41 100644
--- a/migration/trace-events
+++ b/migration/trace-events
@@ -222,3 +222,17 @@ colo_vm_state_change(const char *old, const char *new) "Change
'%s' => '%s'"
colo_send_message(const char *msg) "Send '%s' message"
colo_receive_message(const char *msg) "Receive '%s' message"
colo_failover_set_state(const char *new_state) "new state %s"
+
+# migration/block-dirty-bitmap.c
+send_bitmap_header_enter(void) ""
+send_bitmap_bits(uint32_t flags, uint64_t start_sector, uint32_t nr_sectors, uint64_t data_size) "\n flags:
%x\n start_sector: %" PRIu64 "\n nr_sectors: %" PRIu32 "\n data_size: %" PRIu64
"\n"
+dirty_bitmap_save_iterate(int in_postcopy) "in postcopy: %d"
+dirty_bitmap_save_complete_enter(void) ""
+dirty_bitmap_save_complete_finish(void) ""
+dirty_bitmap_save_pending(uint64_t pending, uint64_t max_size) "pending %" PRIu64 "
max: %" PRIu64
+dirty_bitmap_load_complete(void) ""
+dirty_bitmap_load_bits_enter(uint64_t first_sector, uint32_t nr_sectors) "chunk: %"
PRIu64 " %" PRIu32
+dirty_bitmap_load_bits_zeroes(void) ""
+dirty_bitmap_load_header(uint32_t flags) "flags %x"
+dirty_bitmap_load_enter(void) ""
+dirty_bitmap_load_success(void) ""
diff --git a/vl.c b/vl.c
index b4eaf03..f1ee9ff 100644
--- a/vl.c
+++ b/vl.c
@@ -4405,6 +4405,7 @@ int main(int argc, char **argv, char **envp)
blk_mig_init();
ram_mig_init();
+ dirty_bitmap_mig_init();
/* If the currently selected machine wishes to override the units-per-bus
* property of its default HBA interface type, do so now. */
--
1.8.3.1