On Fri, Oct 03, 2014 at 06:47:35PM +0100, Dr. David Alan Gilbert (git) wrote: > From: "Dr. David Alan Gilbert" <dgilb...@redhat.com> > > The PMI holds the state of each page on the incoming side, > so that we can tell if the page is missing, already received > or there is a request outstanding for it. > > Signed-off-by: Dr. David Alan Gilbert <dgilb...@redhat.com>
Reviewed-by: David Gibson <da...@gibson.dropbear.id.au> Though there are a couple of minor comments below: > --- > include/migration/migration.h | 19 ++++ > include/migration/postcopy-ram.h | 12 +++ > include/qemu/typedefs.h | 1 + > postcopy-ram.c | 220 > +++++++++++++++++++++++++++++++++++++++ > 4 files changed, 252 insertions(+) > > diff --git a/include/migration/migration.h b/include/migration/migration.h > index 2ff9d35..1405a15 100644 > --- a/include/migration/migration.h > +++ b/include/migration/migration.h > @@ -57,6 +57,24 @@ struct MigrationRetPathState { > > typedef struct MigrationState MigrationState; > > +/* Postcopy page-map-incoming - data about each page on the inbound side */ > + > +typedef enum { > + POSTCOPY_PMI_MISSING, /* page hasn't yet been received */ > + POSTCOPY_PMI_REQUESTED, /* Kernel asked for a page, but we've not got it > */ > + POSTCOPY_PMI_RECEIVED /* We've got the page */ > +} PostcopyPMIState; > + > +struct PostcopyPMI { > + QemuMutex mutex; > + unsigned long *received_map; /* Pages that we have received */ > + unsigned long *requested_map; /* Pages that we're sending a request for > */ > + unsigned long host_mask; /* A mask with enough bits set to cover one > + host page in the PMI */ > + unsigned long host_bits; /* The number of bits in the map > representing > + one host page */ > +}; > + > /* State for the incoming migration */ > struct MigrationIncomingState { > QEMUFile *file; > @@ -71,6 +89,7 @@ struct MigrationIncomingState { > > QEMUFile *return_path; > QemuMutex rp_mutex; /* We send replies from multiple threads */ > + PostcopyPMI postcopy_pmi; > }; > > MigrationIncomingState *migration_incoming_get_current(void); > diff --git a/include/migration/postcopy-ram.h > b/include/migration/postcopy-ram.h > index dcd1afa..addb88a 100644 > --- a/include/migration/postcopy-ram.h > +++ b/include/migration/postcopy-ram.h > @@ -13,7 +13,19 @@ > #ifndef QEMU_POSTCOPY_RAM_H > #define QEMU_POSTCOPY_RAM_H > > +#include "migration/migration.h" > + > /* Return 0 if the host supports everything we need to do postcopy-ram */ > int postcopy_ram_hosttest(void); > > +/* > + * In 'advise' mode record that a page has been received. > + */ > +void postcopy_hook_early_receive(MigrationIncomingState *mis, > + size_t bitmap_index); > + > +void postcopy_pmi_destroy(MigrationIncomingState *mis); > +void postcopy_pmi_discard_range(MigrationIncomingState *mis, > + size_t start, size_t npages); > +void postcopy_pmi_dump(MigrationIncomingState *mis); > #endif > diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h > index 8539de6..61b330c 100644 > --- a/include/qemu/typedefs.h > +++ b/include/qemu/typedefs.h > @@ -77,6 +77,7 @@ typedef struct QEMUSGList QEMUSGList; > typedef struct SHPCDevice SHPCDevice; > typedef struct FWCfgState FWCfgState; > typedef struct PcGuestInfo PcGuestInfo; > +typedef struct PostcopyPMI PostcopyPMI; > typedef struct Range Range; > typedef struct AdapterInfo AdapterInfo; > > diff --git a/postcopy-ram.c b/postcopy-ram.c > index bba5c71..210585c 100644 > --- a/postcopy-ram.c > +++ b/postcopy-ram.c > @@ -23,6 +23,9 @@ > #include "qemu-common.h" > #include "migration/migration.h" > #include "migration/postcopy-ram.h" > +#include "sysemu/sysemu.h" > +#include "qemu/bitmap.h" > +#include "qemu/error-report.h" > > //#define DEBUG_POSTCOPY > > @@ -82,6 +85,216 @@ > #if defined(__linux__) && defined(MADV_USERFAULT) && \ > defined(__NR_remap_anon_pages) > > +/* ---------------------------------------------------------------------- */ > +/* Postcopy pagemap-inbound (pmi) - data structures that record the */ > +/* state of each page used by the inbound postcopy */ > +/* It's a pair of bitmaps (of the same structure as the migration bitmaps)*/ > +/* holding one bit per target-page, although all operations work on host */ > +/* pages. */ > +__attribute__ (( unused )) /* Until later in patch series */ > +static void postcopy_pmi_init(MigrationIncomingState *mis, size_t ram_pages) > +{ > + unsigned int tpb = qemu_target_page_bits(); > + unsigned long host_bits; > + > + qemu_mutex_init(&mis->postcopy_pmi.mutex); > + mis->postcopy_pmi.received_map = bitmap_new(ram_pages); > + mis->postcopy_pmi.requested_map = bitmap_new(ram_pages); > + bitmap_clear(mis->postcopy_pmi.received_map, 0, ram_pages); > + bitmap_clear(mis->postcopy_pmi.requested_map, 0, ram_pages); > + /* > + * Each bit in the map represents one 'target page' which is no bigger > + * than a host page but can be smaller. It's useful to have some > + * convenience masks for later So, there's no inherent reason a target page couldn't be bigger than a host page. It's fair enough not to handle that case for now, but something somewhere should probably verify that it's no the case. > + */ > + > + /* > + * The number of bits one host page takes up in the bitmap > + * e.g. on a 64k host page, 4k Target page, host_bits=64/4=16 > + */ > + host_bits = sysconf(_SC_PAGESIZE) / (1ul << tpb); > + /* Should be a power of 2 */ > + assert(host_bits && !(host_bits & (host_bits - 1))); > + /* > + * If the host_bits isn't a division of the number of bits in long > + * then the code gets a lot more complex; disallow for now > + * (I'm not aware of a system where it's true anyway) > + */ > + assert(((sizeof(long) * 8) % host_bits) == 0); > + > + mis->postcopy_pmi.host_bits = host_bits; > + /* A mask, starting at bit 0, containing host_bits continuous set bits */ > + mis->postcopy_pmi.host_mask = (1ul << host_bits) - 1; > + > + assert((ram_pages % host_bits) == 0); > +} > + > +void postcopy_pmi_destroy(MigrationIncomingState *mis) > +{ > + if (mis->postcopy_pmi.received_map) { > + g_free(mis->postcopy_pmi.received_map); g_free() is safe to call on NULL anyway, isn't it? > + mis->postcopy_pmi.received_map = NULL; > + } > + if (mis->postcopy_pmi.requested_map) { > + g_free(mis->postcopy_pmi.requested_map); > + mis->postcopy_pmi.requested_map = NULL; > + } > + qemu_mutex_destroy(&mis->postcopy_pmi.mutex); > +} > + > +/* > + * Mark a set of pages in the PMI as being clear; this is used by the discard > + * at the start of postcopy, and before the postcopy stream starts. > + */ > +void postcopy_pmi_discard_range(MigrationIncomingState *mis, > + size_t start, size_t npages) > +{ > + bitmap_clear(mis->postcopy_pmi.received_map, start, npages); > +} > + > +/* > + * Test a host-page worth of bits in the map starting at bitmap_index > + * The bits should all be consistent > + */ > +static bool test_hpbits(MigrationIncomingState *mis, > + size_t bitmap_index, unsigned long *map) > +{ > + long masked; > + > + assert((bitmap_index & (mis->postcopy_pmi.host_bits-1)) == 0); > + > + masked = (map[BIT_WORD(bitmap_index)] >> > + (bitmap_index % BITS_PER_LONG)) & > + mis->postcopy_pmi.host_mask; > + > + assert((masked == 0) || (masked == mis->postcopy_pmi.host_mask)); > + return !!masked; > +} > + > +/* > + * Set host-page worth of bits in the map starting at bitmap_index > + */ > +static void set_hpbits(MigrationIncomingState *mis, > + size_t bitmap_index, unsigned long *map) > +{ > + assert((bitmap_index & (mis->postcopy_pmi.host_bits-1)) == 0); > + > + map[BIT_WORD(bitmap_index)] |= mis->postcopy_pmi.host_mask << > + (bitmap_index % BITS_PER_LONG); > +} > + > +/* > + * Clear host-page worth of bits in the map starting at bitmap_index > + */ > +static void clear_hpbits(MigrationIncomingState *mis, > + size_t bitmap_index, unsigned long *map) > +{ > + assert((bitmap_index & (mis->postcopy_pmi.host_bits-1)) == 0); > + > + map[BIT_WORD(bitmap_index)] &= ~(mis->postcopy_pmi.host_mask << > + (bitmap_index % BITS_PER_LONG)); > +} > + > +/* > + * Retrieve the state of the given page > + * Note: This version for use by callers already holding the lock > + */ > +static PostcopyPMIState postcopy_pmi_get_state_nolock( > + MigrationIncomingState *mis, > + size_t bitmap_index) > +{ > + bool received, requested; > + > + received = test_hpbits(mis, bitmap_index, > mis->postcopy_pmi.received_map); > + requested = test_hpbits(mis, bitmap_index, > mis->postcopy_pmi.requested_map); > + > + if (received) { > + assert(!requested); Clearing the requested bit when you set the received bit seems a bit pointless. (requested && received) isn't meaningfully different from (!requested && received) but there seems no reason to go to extra trouble to avoid that state, and having the record might be interesting for gathering statistics. > + return POSTCOPY_PMI_RECEIVED; > + } else { > + return requested ? POSTCOPY_PMI_REQUESTED : POSTCOPY_PMI_MISSING; > + } > +} > + > +/* Retrieve the state of the given page */ > +__attribute__ (( unused )) /* Until later in patch series */ > +static PostcopyPMIState postcopy_pmi_get_state(MigrationIncomingState *mis, > + size_t bitmap_index) > +{ > + PostcopyPMIState ret; > + qemu_mutex_lock(&mis->postcopy_pmi.mutex); > + ret = postcopy_pmi_get_state_nolock(mis, bitmap_index); > + qemu_mutex_unlock(&mis->postcopy_pmi.mutex); > + > + return ret; > +} > + > +/* > + * Set the page state to the given state if the previous state was as > expected > + * Return the actual previous state. > + */ > +__attribute__ (( unused )) /* Until later in patch series */ > +static PostcopyPMIState postcopy_pmi_change_state(MigrationIncomingState > *mis, > + size_t bitmap_index, > + PostcopyPMIState expected_state, > + PostcopyPMIState new_state) > +{ > + PostcopyPMIState old_state; > + > + qemu_mutex_lock(&mis->postcopy_pmi.mutex); > + old_state = postcopy_pmi_get_state_nolock(mis, bitmap_index); > + > + if (old_state == expected_state) { > + switch (new_state) { > + case POSTCOPY_PMI_MISSING: > + assert(0); /* This shouldn't actually happen - use discard_range */ > + break; > + > + case POSTCOPY_PMI_REQUESTED: > + assert(old_state == POSTCOPY_PMI_MISSING); > + set_hpbits(mis, bitmap_index, mis->postcopy_pmi.requested_map); > + break; > + > + case POSTCOPY_PMI_RECEIVED: > + assert(old_state == POSTCOPY_PMI_MISSING || > + old_state == POSTCOPY_PMI_REQUESTED); > + set_hpbits(mis, bitmap_index, mis->postcopy_pmi.received_map); > + clear_hpbits(mis, bitmap_index, mis->postcopy_pmi.requested_map); > + break; > + } > + } > + > + qemu_mutex_unlock(&mis->postcopy_pmi.mutex); > + return old_state; > +} > + > +/* > + * Useful when debugging postcopy, although if it failed early the > + * received map can be quite sparse and thus big when dumped. > + */ > +void postcopy_pmi_dump(MigrationIncomingState *mis) > +{ > + fprintf(stderr, "postcopy_pmi_dump: requested\n"); > + ram_debug_dump_bitmap(mis->postcopy_pmi.requested_map, false); > + fprintf(stderr, "postcopy_pmi_dump: received\n"); > + ram_debug_dump_bitmap(mis->postcopy_pmi.received_map, true); > + fprintf(stderr, "postcopy_pmi_dump: end\n"); > +} > + > +/* Called by ram_load prior to mapping the page */ > +void postcopy_hook_early_receive(MigrationIncomingState *mis, > + size_t bitmap_index) > +{ > + if (mis->postcopy_ram_state == POSTCOPY_RAM_INCOMING_ADVISE) { A silent no-op if you're not in the expected migration phase doesn't seem right. Should this be an assert() instead? > + /* > + * If we're in precopy-advise mode we need to track received pages > even > + * though we don't need to place pages atomically yet. > + * In advise mode there's only a single thread, so don't need locks > + */ > + set_bit(bitmap_index, mis->postcopy_pmi.received_map); > + } > +} > + > int postcopy_ram_hosttest(void) > { > /* TODO: Needs guarding with CONFIG_ once we have libc's that have the > defs > @@ -156,5 +369,12 @@ int postcopy_ram_hosttest(void) > return -1; > } > > +/* Called by ram_load prior to mapping the page */ > +void postcopy_hook_early_receive(MigrationIncomingState *mis, > + size_t bitmap_index) > +{ > + /* We don't support postcopy so don't care */ > +} > + > #endif > -- David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/~dgibson
pgpmEoiPlNAbP.pgp
Description: PGP signature