From: "Dr. David Alan Gilbert" <dgilb...@redhat.com> The PMI holds the state of each page on the incoming side, so that we can tell if the page is missing, already received or there is a request outstanding for it.
Signed-off-by: Dr. David Alan Gilbert <dgilb...@redhat.com> --- include/migration/migration.h | 19 ++++ include/migration/postcopy-ram.h | 12 +++ include/qemu/typedefs.h | 1 + postcopy-ram.c | 220 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 252 insertions(+) diff --git a/include/migration/migration.h b/include/migration/migration.h index 2ff9d35..1405a15 100644 --- a/include/migration/migration.h +++ b/include/migration/migration.h @@ -57,6 +57,24 @@ struct MigrationRetPathState { typedef struct MigrationState MigrationState; +/* Postcopy page-map-incoming - data about each page on the inbound side */ + +typedef enum { + POSTCOPY_PMI_MISSING, /* page hasn't yet been received */ + POSTCOPY_PMI_REQUESTED, /* Kernel asked for a page, but we've not got it */ + POSTCOPY_PMI_RECEIVED /* We've got the page */ +} PostcopyPMIState; + +struct PostcopyPMI { + QemuMutex mutex; + unsigned long *received_map; /* Pages that we have received */ + unsigned long *requested_map; /* Pages that we're sending a request for */ + unsigned long host_mask; /* A mask with enough bits set to cover one + host page in the PMI */ + unsigned long host_bits; /* The number of bits in the map representing + one host page */ +}; + /* State for the incoming migration */ struct MigrationIncomingState { QEMUFile *file; @@ -71,6 +89,7 @@ struct MigrationIncomingState { QEMUFile *return_path; QemuMutex rp_mutex; /* We send replies from multiple threads */ + PostcopyPMI postcopy_pmi; }; MigrationIncomingState *migration_incoming_get_current(void); diff --git a/include/migration/postcopy-ram.h b/include/migration/postcopy-ram.h index dcd1afa..addb88a 100644 --- a/include/migration/postcopy-ram.h +++ b/include/migration/postcopy-ram.h @@ -13,7 +13,19 @@ #ifndef QEMU_POSTCOPY_RAM_H #define QEMU_POSTCOPY_RAM_H +#include "migration/migration.h" + /* Return 0 if the host supports everything we need to do postcopy-ram */ int postcopy_ram_hosttest(void); +/* + * In 'advise' mode record that a page has been received. + */ +void postcopy_hook_early_receive(MigrationIncomingState *mis, + size_t bitmap_index); + +void postcopy_pmi_destroy(MigrationIncomingState *mis); +void postcopy_pmi_discard_range(MigrationIncomingState *mis, + size_t start, size_t npages); +void postcopy_pmi_dump(MigrationIncomingState *mis); #endif diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h index 8539de6..61b330c 100644 --- a/include/qemu/typedefs.h +++ b/include/qemu/typedefs.h @@ -77,6 +77,7 @@ typedef struct QEMUSGList QEMUSGList; typedef struct SHPCDevice SHPCDevice; typedef struct FWCfgState FWCfgState; typedef struct PcGuestInfo PcGuestInfo; +typedef struct PostcopyPMI PostcopyPMI; typedef struct Range Range; typedef struct AdapterInfo AdapterInfo; diff --git a/postcopy-ram.c b/postcopy-ram.c index bba5c71..210585c 100644 --- a/postcopy-ram.c +++ b/postcopy-ram.c @@ -23,6 +23,9 @@ #include "qemu-common.h" #include "migration/migration.h" #include "migration/postcopy-ram.h" +#include "sysemu/sysemu.h" +#include "qemu/bitmap.h" +#include "qemu/error-report.h" //#define DEBUG_POSTCOPY @@ -82,6 +85,216 @@ #if defined(__linux__) && defined(MADV_USERFAULT) && \ defined(__NR_remap_anon_pages) +/* ---------------------------------------------------------------------- */ +/* Postcopy pagemap-inbound (pmi) - data structures that record the */ +/* state of each page used by the inbound postcopy */ +/* It's a pair of bitmaps (of the same structure as the migration bitmaps)*/ +/* holding one bit per target-page, although all operations work on host */ +/* pages. */ +__attribute__ (( unused )) /* Until later in patch series */ +static void postcopy_pmi_init(MigrationIncomingState *mis, size_t ram_pages) +{ + unsigned int tpb = qemu_target_page_bits(); + unsigned long host_bits; + + qemu_mutex_init(&mis->postcopy_pmi.mutex); + mis->postcopy_pmi.received_map = bitmap_new(ram_pages); + mis->postcopy_pmi.requested_map = bitmap_new(ram_pages); + bitmap_clear(mis->postcopy_pmi.received_map, 0, ram_pages); + bitmap_clear(mis->postcopy_pmi.requested_map, 0, ram_pages); + /* + * Each bit in the map represents one 'target page' which is no bigger + * than a host page but can be smaller. It's useful to have some + * convenience masks for later + */ + + /* + * The number of bits one host page takes up in the bitmap + * e.g. on a 64k host page, 4k Target page, host_bits=64/4=16 + */ + host_bits = sysconf(_SC_PAGESIZE) / (1ul << tpb); + /* Should be a power of 2 */ + assert(host_bits && !(host_bits & (host_bits - 1))); + /* + * If the host_bits isn't a division of the number of bits in long + * then the code gets a lot more complex; disallow for now + * (I'm not aware of a system where it's true anyway) + */ + assert(((sizeof(long) * 8) % host_bits) == 0); + + mis->postcopy_pmi.host_bits = host_bits; + /* A mask, starting at bit 0, containing host_bits continuous set bits */ + mis->postcopy_pmi.host_mask = (1ul << host_bits) - 1; + + assert((ram_pages % host_bits) == 0); +} + +void postcopy_pmi_destroy(MigrationIncomingState *mis) +{ + if (mis->postcopy_pmi.received_map) { + g_free(mis->postcopy_pmi.received_map); + mis->postcopy_pmi.received_map = NULL; + } + if (mis->postcopy_pmi.requested_map) { + g_free(mis->postcopy_pmi.requested_map); + mis->postcopy_pmi.requested_map = NULL; + } + qemu_mutex_destroy(&mis->postcopy_pmi.mutex); +} + +/* + * Mark a set of pages in the PMI as being clear; this is used by the discard + * at the start of postcopy, and before the postcopy stream starts. + */ +void postcopy_pmi_discard_range(MigrationIncomingState *mis, + size_t start, size_t npages) +{ + bitmap_clear(mis->postcopy_pmi.received_map, start, npages); +} + +/* + * Test a host-page worth of bits in the map starting at bitmap_index + * The bits should all be consistent + */ +static bool test_hpbits(MigrationIncomingState *mis, + size_t bitmap_index, unsigned long *map) +{ + long masked; + + assert((bitmap_index & (mis->postcopy_pmi.host_bits-1)) == 0); + + masked = (map[BIT_WORD(bitmap_index)] >> + (bitmap_index % BITS_PER_LONG)) & + mis->postcopy_pmi.host_mask; + + assert((masked == 0) || (masked == mis->postcopy_pmi.host_mask)); + return !!masked; +} + +/* + * Set host-page worth of bits in the map starting at bitmap_index + */ +static void set_hpbits(MigrationIncomingState *mis, + size_t bitmap_index, unsigned long *map) +{ + assert((bitmap_index & (mis->postcopy_pmi.host_bits-1)) == 0); + + map[BIT_WORD(bitmap_index)] |= mis->postcopy_pmi.host_mask << + (bitmap_index % BITS_PER_LONG); +} + +/* + * Clear host-page worth of bits in the map starting at bitmap_index + */ +static void clear_hpbits(MigrationIncomingState *mis, + size_t bitmap_index, unsigned long *map) +{ + assert((bitmap_index & (mis->postcopy_pmi.host_bits-1)) == 0); + + map[BIT_WORD(bitmap_index)] &= ~(mis->postcopy_pmi.host_mask << + (bitmap_index % BITS_PER_LONG)); +} + +/* + * Retrieve the state of the given page + * Note: This version for use by callers already holding the lock + */ +static PostcopyPMIState postcopy_pmi_get_state_nolock( + MigrationIncomingState *mis, + size_t bitmap_index) +{ + bool received, requested; + + received = test_hpbits(mis, bitmap_index, mis->postcopy_pmi.received_map); + requested = test_hpbits(mis, bitmap_index, mis->postcopy_pmi.requested_map); + + if (received) { + assert(!requested); + return POSTCOPY_PMI_RECEIVED; + } else { + return requested ? POSTCOPY_PMI_REQUESTED : POSTCOPY_PMI_MISSING; + } +} + +/* Retrieve the state of the given page */ +__attribute__ (( unused )) /* Until later in patch series */ +static PostcopyPMIState postcopy_pmi_get_state(MigrationIncomingState *mis, + size_t bitmap_index) +{ + PostcopyPMIState ret; + qemu_mutex_lock(&mis->postcopy_pmi.mutex); + ret = postcopy_pmi_get_state_nolock(mis, bitmap_index); + qemu_mutex_unlock(&mis->postcopy_pmi.mutex); + + return ret; +} + +/* + * Set the page state to the given state if the previous state was as expected + * Return the actual previous state. + */ +__attribute__ (( unused )) /* Until later in patch series */ +static PostcopyPMIState postcopy_pmi_change_state(MigrationIncomingState *mis, + size_t bitmap_index, + PostcopyPMIState expected_state, + PostcopyPMIState new_state) +{ + PostcopyPMIState old_state; + + qemu_mutex_lock(&mis->postcopy_pmi.mutex); + old_state = postcopy_pmi_get_state_nolock(mis, bitmap_index); + + if (old_state == expected_state) { + switch (new_state) { + case POSTCOPY_PMI_MISSING: + assert(0); /* This shouldn't actually happen - use discard_range */ + break; + + case POSTCOPY_PMI_REQUESTED: + assert(old_state == POSTCOPY_PMI_MISSING); + set_hpbits(mis, bitmap_index, mis->postcopy_pmi.requested_map); + break; + + case POSTCOPY_PMI_RECEIVED: + assert(old_state == POSTCOPY_PMI_MISSING || + old_state == POSTCOPY_PMI_REQUESTED); + set_hpbits(mis, bitmap_index, mis->postcopy_pmi.received_map); + clear_hpbits(mis, bitmap_index, mis->postcopy_pmi.requested_map); + break; + } + } + + qemu_mutex_unlock(&mis->postcopy_pmi.mutex); + return old_state; +} + +/* + * Useful when debugging postcopy, although if it failed early the + * received map can be quite sparse and thus big when dumped. + */ +void postcopy_pmi_dump(MigrationIncomingState *mis) +{ + fprintf(stderr, "postcopy_pmi_dump: requested\n"); + ram_debug_dump_bitmap(mis->postcopy_pmi.requested_map, false); + fprintf(stderr, "postcopy_pmi_dump: received\n"); + ram_debug_dump_bitmap(mis->postcopy_pmi.received_map, true); + fprintf(stderr, "postcopy_pmi_dump: end\n"); +} + +/* Called by ram_load prior to mapping the page */ +void postcopy_hook_early_receive(MigrationIncomingState *mis, + size_t bitmap_index) +{ + if (mis->postcopy_ram_state == POSTCOPY_RAM_INCOMING_ADVISE) { + /* + * If we're in precopy-advise mode we need to track received pages even + * though we don't need to place pages atomically yet. + * In advise mode there's only a single thread, so don't need locks + */ + set_bit(bitmap_index, mis->postcopy_pmi.received_map); + } +} + int postcopy_ram_hosttest(void) { /* TODO: Needs guarding with CONFIG_ once we have libc's that have the defs @@ -156,5 +369,12 @@ int postcopy_ram_hosttest(void) return -1; } +/* Called by ram_load prior to mapping the page */ +void postcopy_hook_early_receive(MigrationIncomingState *mis, + size_t bitmap_index) +{ + /* We don't support postcopy so don't care */ +} + #endif -- 1.9.3