* Cédric Le Goater (c...@kaod.org) wrote: > On the POWER9 processor, the XIVE interrupt controller can control > interrupt sources using MMIO to trigger events, to EOI or to turn off > the sources. Priority management and interrupt acknowledgment is also > controlled by MMIO in the presenter sub-engine. > > These MMIO regions are exposed to guests in QEMU with a set of 'ram > device' memory mappings, similarly to VFIO, and the VMAs are populated > dynamically with the appropriate pages using a fault handler. > > But, these regions are an issue for migration. We need to discard the > associated RAMBlocks from the RAM state on the source VM and let the > destination VM rebuild the memory mappings on the new host in the > post_load() operation just before resuming the system. > > To achieve this goal, the following introduces a new RAMBlock flag > RAM_MIGRATABLE which is updated in the vmstate_register_ram() and > vmstate_unregister_ram() routines. This flag is then used by the > migration to identify RAMBlocks to discard on the source. Some checks > are also performed on the destination to make sure nothing invalid was > sent. > > This change impacts the boston, malta and jazz mips boards for which > migration compatibility is broken. > > Signed-off-by: Cédric Le Goater <c...@kaod.org>
Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com> > --- > > Changes since v3: > > - removed a superfluous extra space > - fixed ramblock_recv_map_init() and ram_load_cleanup() to not alloc > (and free) the receivedmap bitmap for non-migratable RAMBlocks > - introduced a qemu_ram_foreach_migratable_block() helper for > postcopy support > > Changes since v2: > > - added an error_report() in ram_save_host_page() > - un/set the RAMBlock RAM_MIGRATABLE directly under vmstate_un/register_ram() > with some new flag helpers > > exec.c | 38 ++++++++++++++++++++++++++++++++++++++ > include/exec/cpu-common.h | 4 ++++ > migration/postcopy-ram.c | 12 ++++++------ > migration/ram.c | 46 ++++++++++++++++++++++++++++++++++------------ > migration/savevm.c | 2 ++ > 5 files changed, 84 insertions(+), 18 deletions(-) > > diff --git a/exec.c b/exec.c > index c7fcefa851b2..948747223869 100644 > --- a/exec.c > +++ b/exec.c > @@ -104,6 +104,9 @@ static MemoryRegion io_mem_unassigned; > * (Set during postcopy) > */ > #define RAM_UF_ZEROPAGE (1 << 3) > + > +/* RAM can be migrated */ > +#define RAM_MIGRATABLE (1 << 4) > #endif > > #ifdef TARGET_PAGE_BITS_VARY > @@ -1797,6 +1800,21 @@ void qemu_ram_set_uf_zeroable(RAMBlock *rb) > rb->flags |= RAM_UF_ZEROPAGE; > } > > +bool qemu_ram_is_migratable(RAMBlock *rb) > +{ > + return rb->flags & RAM_MIGRATABLE; > +} > + > +void qemu_ram_set_migratable(RAMBlock *rb) > +{ > + rb->flags |= RAM_MIGRATABLE; > +} > + > +void qemu_ram_unset_migratable(RAMBlock *rb) > +{ > + rb->flags &= ~RAM_MIGRATABLE; > +} > + > /* Called with iothread lock held. */ > void qemu_ram_set_idstr(RAMBlock *new_block, const char *name, DeviceState > *dev) > { > @@ -3740,6 +3758,26 @@ int qemu_ram_foreach_block(RAMBlockIterFunc func, void > *opaque) > return ret; > } > > +int qemu_ram_foreach_migratable_block(RAMBlockIterFunc func, void *opaque) > +{ > + RAMBlock *block; > + int ret = 0; > + > + rcu_read_lock(); > + RAMBLOCK_FOREACH(block) { > + if (!qemu_ram_is_migratable(block)) { > + continue; > + } > + ret = func(block->idstr, block->host, block->offset, > + block->used_length, opaque); > + if (ret) { > + break; > + } > + } > + rcu_read_unlock(); > + return ret; > +} > + > /* > * Unmap pages of memory from start to start+length such that > * they a) read as 0, b) Trigger whatever fault mechanism > diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h > index 24d335f95d45..0b58e262f301 100644 > --- a/include/exec/cpu-common.h > +++ b/include/exec/cpu-common.h > @@ -75,6 +75,9 @@ const char *qemu_ram_get_idstr(RAMBlock *rb); > bool qemu_ram_is_shared(RAMBlock *rb); > bool qemu_ram_is_uf_zeroable(RAMBlock *rb); > void qemu_ram_set_uf_zeroable(RAMBlock *rb); > +bool qemu_ram_is_migratable(RAMBlock *rb); > +void qemu_ram_set_migratable(RAMBlock *rb); > +void qemu_ram_unset_migratable(RAMBlock *rb); > > size_t qemu_ram_pagesize(RAMBlock *block); > size_t qemu_ram_pagesize_largest(void); > @@ -119,6 +122,7 @@ typedef int (RAMBlockIterFunc)(const char *block_name, > void *host_addr, > ram_addr_t offset, ram_addr_t length, void *opaque); > > int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque); > +int qemu_ram_foreach_migratable_block(RAMBlockIterFunc func, void *opaque); > int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length); > > #endif > diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c > index 8ceeaa2a93f6..bec405bc39cc 100644 > --- a/migration/postcopy-ram.c > +++ b/migration/postcopy-ram.c > @@ -374,7 +374,7 @@ bool > postcopy_ram_supported_by_host(MigrationIncomingState *mis) > } > > /* We don't support postcopy with shared RAM yet */ > - if (qemu_ram_foreach_block(test_ramblock_postcopiable, NULL)) { > + if (qemu_ram_foreach_migratable_block(test_ramblock_postcopiable, NULL)) > { > goto out; > } > > @@ -502,7 +502,7 @@ static int cleanup_range(const char *block_name, void > *host_addr, > */ > int postcopy_ram_incoming_init(MigrationIncomingState *mis, size_t ram_pages) > { > - if (qemu_ram_foreach_block(init_range, NULL)) { > + if (qemu_ram_foreach_migratable_block(init_range, NULL)) { > return -1; > } > > @@ -524,7 +524,7 @@ int postcopy_ram_incoming_cleanup(MigrationIncomingState > *mis) > return -1; > } > > - if (qemu_ram_foreach_block(cleanup_range, mis)) { > + if (qemu_ram_foreach_migratable_block(cleanup_range, mis)) { > return -1; > } > /* Let the fault thread quit */ > @@ -593,7 +593,7 @@ static int nhp_range(const char *block_name, void > *host_addr, > */ > int postcopy_ram_prepare_discard(MigrationIncomingState *mis) > { > - if (qemu_ram_foreach_block(nhp_range, mis)) { > + if (qemu_ram_foreach_migratable_block(nhp_range, mis)) { > return -1; > } > > @@ -604,7 +604,7 @@ int postcopy_ram_prepare_discard(MigrationIncomingState > *mis) > > /* > * Mark the given area of RAM as requiring notification to unwritten areas > - * Used as a callback on qemu_ram_foreach_block. > + * Used as a callback on qemu_ram_foreach_migratable_block. > * host_addr: Base of area to mark > * offset: Offset in the whole ram arena > * length: Length of the section > @@ -1053,7 +1053,7 @@ int postcopy_ram_enable_notify(MigrationIncomingState > *mis) > mis->have_fault_thread = true; > > /* Mark so that we get notified of accesses to unwritten areas */ > - if (qemu_ram_foreach_block(ram_block_enable_notify, mis)) { > + if (qemu_ram_foreach_migratable_block(ram_block_enable_notify, mis)) { > return -1; > } > > diff --git a/migration/ram.c b/migration/ram.c > index 912810c18e0f..102bfe5c571a 100644 > --- a/migration/ram.c > +++ b/migration/ram.c > @@ -153,11 +153,16 @@ out: > return ret; > } > > +/* Should be holding either ram_list.mutex, or the RCU lock. */ > +#define RAMBLOCK_FOREACH_MIGRATABLE(block) \ > + RAMBLOCK_FOREACH(block) \ > + if (!qemu_ram_is_migratable(block)) {} else > + > static void ramblock_recv_map_init(void) > { > RAMBlock *rb; > > - RAMBLOCK_FOREACH(rb) { > + RAMBLOCK_FOREACH_MIGRATABLE(rb) { > assert(!rb->receivedmap); > rb->receivedmap = bitmap_new(rb->max_length >> > qemu_target_page_bits()); > } > @@ -813,6 +818,10 @@ unsigned long migration_bitmap_find_dirty(RAMState *rs, > RAMBlock *rb, > unsigned long *bitmap = rb->bmap; > unsigned long next; > > + if (!qemu_ram_is_migratable(rb)) { > + return size; > + } > + > if (rs->ram_bulk_stage && start > 0) { > next = start + 1; > } else { > @@ -858,7 +867,7 @@ uint64_t ram_pagesize_summary(void) > RAMBlock *block; > uint64_t summary = 0; > > - RAMBLOCK_FOREACH(block) { > + RAMBLOCK_FOREACH_MIGRATABLE(block) { > summary |= block->page_size; > } > > @@ -882,7 +891,7 @@ static void migration_bitmap_sync(RAMState *rs) > > qemu_mutex_lock(&rs->bitmap_mutex); > rcu_read_lock(); > - RAMBLOCK_FOREACH(block) { > + RAMBLOCK_FOREACH_MIGRATABLE(block) { > migration_bitmap_sync_range(rs, block, 0, block->used_length); > } > rcu_read_unlock(); > @@ -1521,6 +1530,11 @@ static int ram_save_host_page(RAMState *rs, > PageSearchStatus *pss, > size_t pagesize_bits = > qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS; > > + if (!qemu_ram_is_migratable(pss->block)) { > + error_report("block %s should not be migrated !", pss->block->idstr); > + return 0; > + } > + > do { > /* Check the pages is dirty and if it is send it */ > if (!migration_bitmap_clear_dirty(rs, pss->block, pss->page)) { > @@ -1619,7 +1633,7 @@ uint64_t ram_bytes_total(void) > uint64_t total = 0; > > rcu_read_lock(); > - RAMBLOCK_FOREACH(block) { > + RAMBLOCK_FOREACH_MIGRATABLE(block) { > total += block->used_length; > } > rcu_read_unlock(); > @@ -1674,7 +1688,7 @@ static void ram_save_cleanup(void *opaque) > */ > memory_global_dirty_log_stop(); > > - QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { > + RAMBLOCK_FOREACH_MIGRATABLE(block) { > g_free(block->bmap); > block->bmap = NULL; > g_free(block->unsentmap); > @@ -1737,7 +1751,7 @@ void > ram_postcopy_migrated_memory_release(MigrationState *ms) > { > struct RAMBlock *block; > > - RAMBLOCK_FOREACH(block) { > + RAMBLOCK_FOREACH_MIGRATABLE(block) { > unsigned long *bitmap = block->bmap; > unsigned long range = block->used_length >> TARGET_PAGE_BITS; > unsigned long run_start = find_next_zero_bit(bitmap, range, 0); > @@ -1815,7 +1829,7 @@ static int > postcopy_each_ram_send_discard(MigrationState *ms) > struct RAMBlock *block; > int ret; > > - RAMBLOCK_FOREACH(block) { > + RAMBLOCK_FOREACH_MIGRATABLE(block) { > PostcopyDiscardState *pds = > postcopy_discard_send_init(ms, block->idstr); > > @@ -2023,7 +2037,7 @@ int ram_postcopy_send_discard_bitmap(MigrationState *ms) > rs->last_sent_block = NULL; > rs->last_page = 0; > > - QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { > + RAMBLOCK_FOREACH_MIGRATABLE(block) { > unsigned long pages = block->used_length >> TARGET_PAGE_BITS; > unsigned long *bitmap = block->bmap; > unsigned long *unsentmap = block->unsentmap; > @@ -2182,7 +2196,7 @@ static void ram_list_init_bitmaps(void) > > /* Skip setting bitmap if there is no RAM */ > if (ram_bytes_total()) { > - QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { > + RAMBLOCK_FOREACH_MIGRATABLE(block) { > pages = block->max_length >> TARGET_PAGE_BITS; > block->bmap = bitmap_new(pages); > bitmap_set(block->bmap, 0, pages); > @@ -2263,7 +2277,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque) > > qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE); > > - RAMBLOCK_FOREACH(block) { > + RAMBLOCK_FOREACH_MIGRATABLE(block) { > qemu_put_byte(f, strlen(block->idstr)); > qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr)); > qemu_put_be64(f, block->used_length); > @@ -2507,6 +2521,11 @@ static inline RAMBlock *ram_block_from_stream(QEMUFile > *f, int flags) > return NULL; > } > > + if (!qemu_ram_is_migratable(block)) { > + error_report("block %s should not be migrated !", id); > + return NULL; > + } > + > return block; > } > > @@ -2749,7 +2768,7 @@ static int ram_load_cleanup(void *opaque) > xbzrle_load_cleanup(); > compress_threads_load_cleanup(); > > - RAMBLOCK_FOREACH(rb) { > + RAMBLOCK_FOREACH_MIGRATABLE(rb) { > g_free(rb->receivedmap); > rb->receivedmap = NULL; > } > @@ -3011,7 +3030,10 @@ static int ram_load(QEMUFile *f, void *opaque, int > version_id) > length = qemu_get_be64(f); > > block = qemu_ram_block_by_name(id); > - if (block) { > + if (block && !qemu_ram_is_migratable(block)) { > + error_report("block %s should not be migrated !", id); > + ret = -EINVAL; > + } else if (block) { > if (length != block->used_length) { > Error *local_err = NULL; > > diff --git a/migration/savevm.c b/migration/savevm.c > index e2be02afe42c..9ebfba738ea4 100644 > --- a/migration/savevm.c > +++ b/migration/savevm.c > @@ -2501,11 +2501,13 @@ void vmstate_register_ram(MemoryRegion *mr, > DeviceState *dev) > { > qemu_ram_set_idstr(mr->ram_block, > memory_region_name(mr), dev); > + qemu_ram_set_migratable(mr->ram_block); > } > > void vmstate_unregister_ram(MemoryRegion *mr, DeviceState *dev) > { > qemu_ram_unset_idstr(mr->ram_block); > + qemu_ram_unset_migratable(mr->ram_block); > } > > void vmstate_register_ram_global(MemoryRegion *mr) > -- > 2.13.6 > -- Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK