From: Bohdan Trach <bohdan.tr...@mailbox.tu-dresden.de> This commit adds functions, which are used to work with checkpoint files. A new command-line option `-checkpoint` is added, which is used to specify the checkpoint file. Currently, MD5 function from OpenSSL is used to checkpoint memory.
Signed-off-by: Bohdan Trach <bohdan.tr...@mailbox.tu-dresden.de> --- arch_init.c | 149 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ configure | 2 + qemu-options.hx | 9 ++++ vl.c | 12 +++++ 4 files changed, 172 insertions(+) diff --git a/arch_init.c b/arch_init.c index b8a4fb1..eda86d4 100644 --- a/arch_init.c +++ b/arch_init.c @@ -27,6 +27,7 @@ #ifndef _WIN32 #include <sys/types.h> #include <sys/mman.h> +#include <openssl/md5.h> #endif #include "config.h" #include "monitor/monitor.h" @@ -140,6 +141,8 @@ static struct defconfig_file { static const uint8_t ZERO_TARGET_PAGE[TARGET_PAGE_SIZE]; +int fd_checkpoint = -1; + int qemu_read_default_config_files(bool userconfig) { int ret; @@ -184,6 +187,30 @@ static void XBZRLE_cache_lock(void) qemu_mutex_lock(&XBZRLE.lock); } +#ifdef DEBUG_ARCH_INIT +static char* md5s(const uint8_t *digest) { + /* MD5 is 16 bytes, i.e., 32 hexadecimal digits. + 1 for trailing \0. */ + static const size_t size = 32 + 1; + static char hex_digits[32+1]; + + /* snprintf(hex_digits, */ + /* MD5_DIGEST_LENGTH+1, */ + /* "%016lx", */ + /* *((uint64_t*)digest)); */ + /* snprintf(hex_digits+MD5_DIGEST_LENGTH, */ + /* MD5_DIGEST_LENGTH+1, */ + /* "%016lx", */ + /* *((uint64_t*)(digest+sizeof(uint64_t)))); */ + int digit; + for (digit = 0; digit < 32; digit += 2) { + snprintf(hex_digits+digit, 3, "%02x", digest[digit/2]); + } + + hex_digits[size-1] = '\0'; + return hex_digits; +} +#endif + static void XBZRLE_cache_unlock(void) { if (migrate_use_xbzrle()) @@ -603,6 +630,126 @@ static void migration_bitmap_sync(void) } } +int uint128_compare(const void*, const void*); +int uint128_compare(const void* x, const void* y) +{ + return memcmp(x, y, MD5_DIGEST_LENGTH); +} + +/* indexed by page number */ +static uint64_t hashes_size = 0; +static uint64_t hashes_entries = 0; +static uint8_t *hashes = 0; + +static uint32_t get_page_nr(uint64_t addr) { + assert((addr % TARGET_PAGE_SIZE) == 0); + return (addr / TARGET_PAGE_SIZE); +} + +typedef struct { + uint8_t hash[MD5_DIGEST_LENGTH]; + uint64_t offset; +} hash_offset_entry; + +static uint64_t hash_offset_entries = 0; +static uint64_t max_hash_offset_entries; +static hash_offset_entry* hash_offset_array = 0; +static uint8_t all_zeroes_hash[MD5_DIGEST_LENGTH]; + +int cmp_hash_offset_entry(const void*, const void*); +int cmp_hash_offset_entry(const void* a, const void* b) { + hash_offset_entry* e = (hash_offset_entry*) a; + hash_offset_entry* f = (hash_offset_entry*) b; + + return memcmp(e->hash, f->hash, MD5_DIGEST_LENGTH); +} + +void veecycle_init(void); +void veecycle_init(void) { + RAMBlock *block; + block = QLIST_FIRST_RCU(&ram_list.blocks); + if (block == NULL) + return; + assert(0 == strncmp(block->mr->name, "pc.ram", strlen("pc.ram"))); + + max_hash_offset_entries = hashes_entries = (ram_size / TARGET_PAGE_SIZE); + DPRINTF("pages=%lu\n", hashes_entries); + hashes_size = hashes_entries * MD5_DIGEST_LENGTH; + + hashes = g_malloc(hashes_size); + assert(0 != hashes); + bzero(hashes, hashes_size); + + uint8_t all_zeroes[TARGET_PAGE_SIZE]; + bzero(all_zeroes, TARGET_PAGE_SIZE); + MD5(all_zeroes, TARGET_PAGE_SIZE, all_zeroes_hash); + + hash_offset_array = g_malloc(max_hash_offset_entries * sizeof(hash_offset_entry)); + assert(0 != hash_offset_array); + bzero(hash_offset_array, max_hash_offset_entries * sizeof(hash_offset_entry)); +} + +void init_checksum_lookup_table(const char *checkpoint_path); +void init_checksum_lookup_table(const char *checkpoint_path) { + int rc; + uint8_t* ram; + RAMBlock *block; + + DPRINTF("ram_size=%lu\n", ram_size); + + struct stat sb; + rc = stat(checkpoint_path, &sb); + if (rc == -1 && errno == ENOENT) return; + assert(0 == rc); + + block = QLIST_FIRST_RCU(&ram_list.blocks); + assert(0 == strncmp(block->mr->name, "pc.ram", strlen("pc.ram"))); + ram = block->host; + assert(block->used_length == ram_size); + + /* Ignore checkpoint file if size is different from VM's current memory size. */ + assert(sb.st_size == ram_size); + + fd_checkpoint = open(checkpoint_path, O_RDWR); + assert(fd_checkpoint != -1); + + uint64_t idx; + for (idx=0; idx<ram_size; idx+=TARGET_PAGE_SIZE) { + rc = read(fd_checkpoint, ram+idx, TARGET_PAGE_SIZE); + assert(rc == TARGET_PAGE_SIZE); + assert(hash_offset_entries < max_hash_offset_entries); + MD5((unsigned char*)(ram+idx), + TARGET_PAGE_SIZE, + (unsigned char*)hash_offset_array[hash_offset_entries].hash); + + hash_offset_array[hash_offset_entries].offset = idx; + + DPRINTF("hash=%s offset=%08lx\n", + md5s(hash_offset_array[hash_offset_entries].hash), + hash_offset_array[hash_offset_entries].offset); + + hash_offset_entries++; + }; + + qsort(hash_offset_array, hash_offset_entries, sizeof(hash_offset_entry), + cmp_hash_offset_entry); +} + +int is_ram_addr(void* host); +int is_ram_addr(void* host) { + static RAMBlock *block = NULL; + + block = QLIST_FIRST_RCU(&ram_list.blocks); + assert(0 == strncmp(block->mr->name, "pc.ram", strlen("pc.ram"))); + + return (host >= memory_region_get_ram_ptr(block->mr) && + host < memory_region_get_ram_ptr(block->mr) + block->used_length); +} + +static int is_outgoing_with_checkpoint(void) { + return (fd_checkpoint != -1); +} + void qmp_dump_pc_ram(const char *file, Error **errp) { int rc; @@ -869,6 +1016,8 @@ static int ram_save_setup(QEMUFile *f, void *opaque) bitmap_sync_count = 0; migration_bitmap_sync_init(); + qsort(hashes, hashes_entries, MD5_DIGEST_LENGTH, uint128_compare); + if (migrate_use_xbzrle()) { XBZRLE_cache_lock(); XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() / diff --git a/configure b/configure index 6969f6f..fd6dd23 100755 --- a/configure +++ b/configure @@ -337,6 +337,8 @@ vhdx="" quorum="" numa="" +LIBS="-lcrypto" + # parse CC options first for opt do optarg=`expr "x$opt" : 'x[^=]*=\(.*\)'` diff --git a/qemu-options.hx b/qemu-options.hx index 319d971..ece4758 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -268,6 +268,15 @@ If @var{slots} and @var{maxmem} are not specified, memory hotplug won't be enabled and the guest startup RAM will never increase. ETEXI +DEF("checkpoint", HAS_ARG, QEMU_OPTION_checkpoint, + "-checkpoint file path to checkpoint file\n", QEMU_ARCH_ALL) +STEXI +@item -checkpoint @var{path} +@findex -checkpoint +Checkpoint file to use during incoming migrations. +Reduces network traffic and total migration time. +ETEXI + DEF("mem-path", HAS_ARG, QEMU_OPTION_mempath, "-mem-path FILE provide backing storage for guest RAM\n", QEMU_ARCH_ALL) STEXI diff --git a/vl.c b/vl.c index 74c2681..d423e99 100644 --- a/vl.c +++ b/vl.c @@ -134,6 +134,7 @@ int display_opengl; static int display_remote; const char* keyboard_layout = NULL; ram_addr_t ram_size; +const char *checkpoint_path = NULL; const char *mem_path = NULL; int mem_prealloc = 0; /* force preallocation of physical target memory */ bool enable_mlock = false; @@ -2643,6 +2644,9 @@ out: return 0; } +void init_checksum_lookup_table(const char *checkpoint_path); +void veecycle_init(void); + static void set_memory_options(uint64_t *ram_slots, ram_addr_t *maxram_size) { uint64_t sz; @@ -3116,6 +3120,9 @@ int main(int argc, char **argv, char **envp) } break; #endif + case QEMU_OPTION_checkpoint: + checkpoint_path = optarg; + break; case QEMU_OPTION_mempath: mem_path = optarg; break; @@ -4331,6 +4338,7 @@ int main(int argc, char **argv, char **envp) } } + veecycle_init(); qdev_prop_check_globals(); if (vmstate_dump_file) { /* dump and exit */ @@ -4339,6 +4347,10 @@ int main(int argc, char **argv, char **envp) } if (incoming) { + if (checkpoint_path) { + init_checksum_lookup_table(checkpoint_path); + } + Error *local_err = NULL; qemu_start_incoming_migration(incoming, &local_err); if (local_err) { -- 2.0.5