The proposed patch slipped by with no apparent response - kindly provide feedback, Aidan
> -----Original Message----- > From: qemu-devel-bounces+aidan.shribman=sap....@nongnu.org > [mailto:qemu-devel-bounces+aidan.shribman=sap....@nongnu.org] > On Behalf Of Shribman, Aidan > Sent: Sunday, May 22, 2011 3:01 PM > To: qemu-devel@nongnu.org > Subject: [Qemu-devel] [PATCH] XBRLE page delta compression > for live migration of large memory apps > > Subject: [PATCH] XBRLE page delta compression for live > migration of large memory apps > From: Aidan Shribman <aidan.shrib...@sap.com> > > By using XBRLE (Xor Based Run-Length-Encoding) we can reduce required > bandwidth for transfering of dirty memory pages during live migration > migrate_set_cachesize <size> > migrate -x <url> > Qemu host: Ubuntu 10.10 > Testing: live migration (w and w/o XBRLE) tested successfully. > > Signed-off-by: Benoit Hudzia <benoit.hud...@sap.com> > Signed-off-by: Petter Svard <pett...@cs.umu.se> > Signed-off-by: Aidan Shribman <aidan.shrib...@sap.com> > > --- > > arch_init.c | 647 > +++++++++++++++++++++++++++++++++++++++++++++++++---- > block-migration.c | 3 +- > hmp-commands.hx | 36 +++- > hw/hw.h | 3 +- > migration-exec.c | 6 +- > migration-fd.c | 6 +- > migration-tcp.c | 6 +- > migration-unix.c | 6 +- > migration.c | 33 +++- > migration.h | 23 ++- > qmp-commands.hx | 43 +++- > savevm.c | 13 +- > sysemu.h | 3 +- > 13 files changed, 749 insertions(+), 79 deletions(-) > > diff --git a/arch_init.c b/arch_init.c > index 4486925..069cd67 100644 > --- a/arch_init.c > +++ b/arch_init.c > @@ -27,6 +27,7 @@ > #include <sys/types.h> > #include <sys/mman.h> > #endif > +#include <assert.h> > #include "config.h" > #include "monitor.h" > #include "sysemu.h" > @@ -41,6 +42,24 @@ > #include "gdbstub.h" > #include "hw/smbios.h" > > +//#define DEBUG_ARCH_INIT > +#ifdef DEBUG_ARCH_INIT > +#define DPRINTF(fmt, ...) \ > + do { fprintf(stdout, "arch_init: " fmt, ## __VA_ARGS__); > } while (0) > +#else > +#define DPRINTF(fmt, ...) \ > + do { } while (0) > +#endif > + > +//#define DEBUG_ARCH_INIT_CKSUM > +#ifdef DEBUG_ARCH_INIT_CKSUM > +#define PAGE_LOG(addr, pdata, fmt, ...) \ > + do { page_log(addr, pdata, fmt, ## __VA_ARGS__); } while (0) > +#else > +#define PAGE_LOG(addr, pdata, fmt, ...) \ > + do { } while (0) > +#endif > + > #ifdef TARGET_SPARC > int graphic_width = 1024; > int graphic_height = 768; > @@ -88,6 +107,402 @@ const uint32_t arch_type = QEMU_ARCH; > #define RAM_SAVE_FLAG_PAGE 0x08 > #define RAM_SAVE_FLAG_EOS 0x10 > #define RAM_SAVE_FLAG_CONTINUE 0x20 > +#define RAM_SAVE_FLAG_XBRLE 0x40 > + > +/***********************************************************/ > +/* Page cache for storing previous pages as basis for XBRLE > compression */ > +#define CACHE_N_WAY 2 /* 2-way assossiative cache */ > + > +typedef struct CacheItem { > + ram_addr_t it_addr; > + unsigned long it_age; > + uint8_t *it_data; > +} CacheItem; > + > +typedef struct CacheBucket { > + CacheItem bkt_item[CACHE_N_WAY]; > +} CacheBucket; > + > +static CacheBucket *page_cache; > +static int64_t cache_num_buckets; > +static uint64_t cache_max_item_age; > +static int64_t cache_num_items; > + > +static void cache_init(ssize_t num_buckets); > +static void cache_fini(void); > +static int cache_is_cached(ram_addr_t addr); > +static int cache_get_oldest(CacheBucket *buck); > +static int cache_get_newest(CacheBucket *buck, ram_addr_t addr); > +static void cache_insert(ram_addr_t id, uint8_t *pdata); > +static unsigned long cache_get_cache_pos(ram_addr_t address); > +static CacheItem *cache_item_get(unsigned long pos, int item); > + > +/***********************************************************/ > +/* RAM Migration State */ > +typedef struct ArchMigrationState { > + int use_xbrle; > + int64_t xbrle_cache_size; > +} ArchMigrationState; > + > +static ArchMigrationState arch_mig_state; > + > +void arch_set_params(int blk_enable, int shared_base, int use_xbrle, > + int64_t xbrle_cache_size, void *opaque) > +{ > + arch_mig_state.use_xbrle = use_xbrle; > + arch_mig_state.xbrle_cache_size = xbrle_cache_size; > +} > + > +/***********************************************************/ > +/* XBRLE (Xor Based Run-Length Encoding) */ > +typedef struct XBRLEHeader { > + uint8_t xh_flags; > + uint16_t xh_len; > + uint32_t xh_cksum; > +} XBRLEHeader; > + > +static int rle_encode(uint8_t *src, int slen, uint8_t *dst, > int dlen); > +static int rle_decode(uint8_t *src, int slen, uint8_t *dst, > int dlen); > +#ifdef DEBUG_ARCH_INIT_CKSUM > +static uint32_t page_cksum(uint8_t *buf); > +static void page_log(ram_addr_t addr, uint8_t *pdata, const > char *fmt, ...); > +#endif > + > +/***********************************************************/ > +/* benchmarking */ > +typedef struct BenchInfo { > + uint64_t normal_pages; > + uint64_t xbrle_pages; > + uint64_t xbrle_bytes; > + uint64_t xbrle_pages_aborted; > + uint64_t dup_pages; > + uint64_t iterations; > +} BenchInfo; > + > +static BenchInfo bench; > + > +/***********************************************************/ > +/* XBRLE page cache implementation */ > +static CacheItem *cache_item_get(unsigned long pos, int item) > +{ > + assert(page_cache); > + return &page_cache[pos].bkt_item[item]; > +} > + > +#ifdef DEBUG_ARCH_INIT_CKSUM > +static int64_t cache_max_items(void) > +{ > + return cache_num_buckets * CACHE_N_WAY; > +} > +#endif /* DEBUG_ARCH_INIT_CKSUM */ > + > +static void cache_init(int64_t num_bytes) > +{ > + int i; > + > + cache_num_items = 0; > + cache_max_item_age = 0; > + cache_num_buckets = num_bytes / (TARGET_PAGE_SIZE * CACHE_N_WAY); > + assert(cache_num_buckets); > + DPRINTF("Setting cache buckets to %ld\n", cache_num_buckets); > + > + assert(!page_cache); > + page_cache = (CacheBucket *)qemu_mallocz((cache_num_buckets) * > + sizeof(CacheBucket)); > + > + for (i = 0; i < cache_num_buckets; i++) { > + int j; > + for (j = 0; j < CACHE_N_WAY; j++) { > + CacheItem *it = cache_item_get(i, j); > + it->it_data = NULL; > + it->it_age = 0; > + it->it_addr = -1; > + } > + } > +} > + > +static void cache_fini(void) > +{ > + int i; > + > + assert(page_cache); > + > + for (i = 0; i < cache_num_buckets; i++) { > + int j; > + for (j = 0; j < CACHE_N_WAY; j++) { > + CacheItem *it = cache_item_get(i, j); > + qemu_free(it->it_data); > + it->it_data = 0; > + } > + } > + > + qemu_free(page_cache); > + page_cache = NULL; > +} > + > +static unsigned long cache_get_cache_pos(ram_addr_t address) > +{ > + unsigned long pos; > + > + assert(cache_num_buckets); > + pos = (address/TARGET_PAGE_SIZE) & (cache_num_buckets - 1); > + return pos; > +} > + > +static int cache_get_newest(CacheBucket *buck, ram_addr_t addr) > +{ > + unsigned long big = 0; > + int big_pos = -1; > + int j; > + > + assert(page_cache); > + > + for (j = 0; j < CACHE_N_WAY; j++) { > + CacheItem *it = &buck->bkt_item[j]; > + > + if (it->it_addr != addr) { > + continue; > + } > + > + if (!j || it->it_age > big) { > + big = it->it_age; > + big_pos = j; > + } > + } > + > + return big_pos; > +} > + > +static int cache_get_oldest(CacheBucket *buck) > +{ > + unsigned long small = 0; > + int small_pos = -1; > + int j; > + > + assert(page_cache); > + > + for (j = 0; j < CACHE_N_WAY; j++) { > + CacheItem *it = &buck->bkt_item[j]; > + > + if (!j || it->it_age < small) { > + small = it->it_age; > + small_pos = j; > + } > + } > + > + return small_pos; > +} > + > +static int cache_is_cached(ram_addr_t addr) > +{ > + unsigned long pos = cache_get_cache_pos(addr); > + > + assert(page_cache); > + CacheBucket *bucket = &page_cache[pos]; > + return cache_get_newest(bucket, addr); > +} > + > +static void cache_insert(unsigned long addr, uint8_t *pdata) > +{ > + unsigned long pos; > + int slot = -1; > + CacheBucket *bucket; > + > + pos = cache_get_cache_pos(addr); > + assert(page_cache); > + bucket = &page_cache[pos]; > + slot = cache_get_oldest(bucket); /* evict LRU */ > + > + /* actual update of entry */ > + CacheItem *it = cache_item_get(pos, slot); > + if (!it->it_data) { > + cache_num_items++; > + } > + qemu_free(it->it_data); > + it->it_data = pdata; > + it->it_age = ++cache_max_item_age; > + it->it_addr = addr; > +} > + > +/* XBRLE (Xor Based Run-Length Encoding) */ > +static int rle_encode(uint8_t *src, int slen, uint8_t *dst, int dlen) > +{ > + int d = 0, ch_run = 0, i; > + uint8_t prev, ch; > + > + for (i = 0; i <= slen; i++) { > + if (i != slen) { > + ch = src[i]; > + } > + > + if (!i || (i != slen && ch == prev && ch_run < 255)) { > + ch_run++; > + } else { > + if (d+2 > dlen) > + return -1; > + *dst++ = ch_run; > + *dst++ = prev; > + d += 2; > + ch_run = 1; > + } > + > + prev = ch; > + } > + return d; > +} > + > +static int rle_decode(uint8_t *src, int slen, uint8_t *dst, int dlen) > +{ > + int d = 0, s; > + > + for (s = 0; s < slen-1; s += 2) { > + uint8_t ch_run = src[s]; > + uint8_t ch = src[s+1]; > + while (ch_run--) { > + if (d == dlen) { > + return -1; > + } > + dst[d] = ch; > + d++; > + } > + } > + return d; > +} > + > +#define PAGE_SAMPLE_PERCENT 0.02 > +#define PAGE_SAMPLE_SIZE (TARGET_PAGE_SIZE * PAGE_SAMPLE_PERCENT) > +#define BYTES_CHANGED_PERCENT 0.30 > + > +static int is_page_good_for_xbrle(uint8_t *old, uint8_t *new) > +{ > + int i, bytes_changed = 0; > + > + srand(time(NULL)+getpid()+getpid()*987654+rand()); > + > + for (i = 0; i < PAGE_SAMPLE_SIZE; i++) { > + unsigned long pos = (int) (rand() * TARGET_PAGE_SIZE > / (RAND_MAX+1.0)); > + > + if (old[pos] != new[pos]) { > + bytes_changed++; > + } > + } > + > + return (((float) bytes_changed) / PAGE_SAMPLE_SIZE) < > BYTES_CHANGED_PERCENT; > +} > + > +static void xor_encode(uint8_t *dst, uint8_t *src1, uint8_t *src2) > +{ > + int i; > + > + for (i = 0; i < TARGET_PAGE_SIZE; i++) { > + dst[i] = src1[i] ^ src2[i]; > + } > +} > + > +static void save_block_hdr(QEMUFile *f, > + RAMBlock *block, ram_addr_t offset, int cont, int flag) > +{ > + qemu_put_be64(f, offset | cont | flag); > + if (!cont) { > + qemu_put_byte(f, strlen(block->idstr)); > + qemu_put_buffer(f, (uint8_t *)block->idstr, > + strlen(block->idstr)); > + } > +} > + > +#define ENCODING_FLAG_XBRLE 0x1 > +#define ENCODING_FLAG_CKSUM 0x2 > + > +static int save_xbrle_page(QEMUFile *f, uint8_t *current_data, > + ram_addr_t current_addr, RAMBlock *block, ram_addr_t > offset, int cont) > +{ > + int cache_location = -1, slot = -1, encoded_len = 0, > bytes_sent = 0; > + XBRLEHeader hdr = {0}; > + CacheItem *it; > + uint8_t *xor_buf = NULL, *xbrle_buf = NULL; > + > + /* get location */ > + slot = cache_is_cached(current_addr); > + if (slot == -1) { > + goto done; > + } > + cache_location = cache_get_cache_pos(current_addr); > + > + /* abort if page changed too much */ > + it = cache_item_get(cache_location, slot); > + if (!is_page_good_for_xbrle(it->it_data, current_data)) { > + DPRINTF("Page changed too much! Aborting XBRLE.\n"); > + bench.xbrle_pages_aborted++; > + goto done; > + } > + > + /* XOR encoding */ > + xor_buf = (uint8_t *) qemu_mallocz(TARGET_PAGE_SIZE); > + xor_encode(xor_buf, it->it_data, current_data); > + > + /* XBRLE (XOR+RLE) encoding */ > + xbrle_buf = (uint8_t *) qemu_mallocz(TARGET_PAGE_SIZE); > + encoded_len = rle_encode(xor_buf, TARGET_PAGE_SIZE, xbrle_buf, > + TARGET_PAGE_SIZE); > + > + if (encoded_len < 0) { > + DPRINTF("XBRLE encoding oeverflow - sending uncompressed\n"); > + goto done; > + } > + > + hdr.xh_len = encoded_len; > + hdr.xh_flags |= ENCODING_FLAG_XBRLE; > +#ifdef DEBUG_ARCH_INIT_CKSUM > + hdr.xh_cksum = page_cksum(current_data); > + hdr.xh_flags |= ENCODING_FLAG_CKSUM; > +#endif > + > + /* Send XBRLE compressed page */ > + save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_XBRLE); > + qemu_put_buffer(f, (uint8_t *) &hdr, sizeof(hdr)); > + qemu_put_buffer(f, xbrle_buf, encoded_len); > + PAGE_LOG(current_addr, current_data, "XBRLE page (enc len %d)\n", > + encoded_len); > + bench.xbrle_pages++; > + bytes_sent = encoded_len + sizeof(hdr); > + bench.xbrle_bytes += bytes_sent; > + > +done: > + qemu_free(xor_buf); > + qemu_free(xbrle_buf); > + return bytes_sent; > +} > + > +#ifdef DEBUG_ARCH_INIT_CKSUM > +static uint32_t page_cksum(uint8_t *buf) > +{ > + uint32_t res = 0; > + int i; > + > + for (i = 0; i < TARGET_PAGE_SIZE; i++) { > + res += buf[i]; > + } > + > + return res; > +} > + > +static void page_log(ram_addr_t addr, uint8_t *pdata, const > char *fmt, ...) > +{ > + va_list arg_ptr; > + static FILE *fp; > + static uint32_t page_seq; > + > + va_start(arg_ptr, fmt); > + if (!fp) { > + fp = fopen("mig.log", "w"); > + } > + page_seq++; > + fprintf(fp, "[seq %d addr 0x%lX cksum 0x%X] ", page_seq, > + (unsigned long) addr, page_cksum(pdata)); > + vfprintf(fp, fmt, arg_ptr); > + va_end(arg_ptr); > +} > +#endif /* DEBUG_ARCH_INIT_CKSUM */ > > static int is_dup_page(uint8_t *page, uint8_t ch) > { > @@ -107,7 +522,7 @@ static int is_dup_page(uint8_t *page, uint8_t ch) > static RAMBlock *last_block; > static ram_addr_t last_offset; > > -static int ram_save_block(QEMUFile *f) > +static int ram_save_block(QEMUFile *f, int stage) > { > RAMBlock *block = last_block; > ram_addr_t offset = last_offset; > @@ -128,28 +543,32 @@ static int ram_save_block(QEMUFile *f) > current_addr + > TARGET_PAGE_SIZE, > MIGRATION_DIRTY_FLAG); > > - p = block->host + offset; > + p = qemu_mallocz(TARGET_PAGE_SIZE); > + memcpy(p, block->host + offset, TARGET_PAGE_SIZE); > > if (is_dup_page(p, *p)) { > - qemu_put_be64(f, offset | cont | > RAM_SAVE_FLAG_COMPRESS); > - if (!cont) { > - qemu_put_byte(f, strlen(block->idstr)); > - qemu_put_buffer(f, (uint8_t *)block->idstr, > - strlen(block->idstr)); > - } > + save_block_hdr(f, block, offset, cont, > RAM_SAVE_FLAG_COMPRESS); > qemu_put_byte(f, *p); > bytes_sent = 1; > - } else { > - qemu_put_be64(f, offset | cont | RAM_SAVE_FLAG_PAGE); > - if (!cont) { > - qemu_put_byte(f, strlen(block->idstr)); > - qemu_put_buffer(f, (uint8_t *)block->idstr, > - strlen(block->idstr)); > + bench.dup_pages++; > + PAGE_LOG(current_addr, p, "DUP page\n"); > + } else if (stage == 2 && arch_mig_state.use_xbrle) { > + bytes_sent = save_xbrle_page(f, p, > current_addr, block, > + offset, cont); > + if (bytes_sent) { > + cache_insert(current_addr, p); > } > + } > + if (!bytes_sent) { > + save_block_hdr(f, block, offset, cont, > RAM_SAVE_FLAG_PAGE); > qemu_put_buffer(f, p, TARGET_PAGE_SIZE); > bytes_sent = TARGET_PAGE_SIZE; > + bench.normal_pages++; > + PAGE_LOG(current_addr, p, "NORMAL page\n"); > + if (arch_mig_state.use_xbrle) { > + cache_insert(current_addr, p); > + } > } > - > break; > } > > @@ -212,6 +631,55 @@ uint64_t ram_bytes_total(void) > return total; > } > > +#ifdef DEBUG_ARCH_INIT > +static void dump_percentage(const char *label, unsigned long > absolute, > + unsigned long total) > +{ > + printf("%s: %ld (%0.2f%%)\n", label, absolute, > + (total ? (100.0 * absolute / total) : 0)); > +} > + > +static void dump_migration_statistics(void) > +{ > + unsigned long normal_bytes = bench.normal_pages * > TARGET_PAGE_SIZE; > + unsigned long total_pages = bench.normal_pages + > bench.xbrle_pages > + + bench.dup_pages; > + unsigned long total_bytes = normal_bytes + bench.xbrle_bytes > + + bench.dup_pages; > + > + printf("\n"); > + > printf("=====================================================\n"); > + printf("Save VM Memory Statistics (SUCCESS or FAILURE):\n"); > + printf("Iterations: %ld\n", bench.iterations); > + > + dump_percentage("Normal pages", bench.normal_pages, total_pages); > + dump_percentage("Normal bytes", normal_bytes, total_bytes); > + > + dump_percentage("Dup pages", bench.dup_pages, total_pages); > + dump_percentage("Dup bytes", bench.dup_pages, total_bytes); > + > + if (arch_mig_state.use_xbrle) { > + dump_percentage("XBRLE pages", bench.xbrle_pages, > total_pages); > + dump_percentage("XBRLE bytes", bench.xbrle_bytes, > total_bytes); > + dump_percentage("Aborted XBRLE pages from XBRLE", > + bench.xbrle_pages_aborted, > + bench.xbrle_pages + bench.xbrle_pages_aborted); > + } > + > + dump_percentage("Total pages", total_pages, total_pages); > + dump_percentage("Total bytes", total_bytes, total_bytes); > + > + if (arch_mig_state.use_xbrle) { > + printf("Cache number of inserts: %ld\n", cache_max_item_age); > + printf("Cache max items: %ld\n", cache_max_items()); > + dump_percentage("Cache number of items", cache_num_items, > + cache_max_items()); > + } > + > + > printf("=====================================================\n"); > +} > +#endif /* DEBUG_ARCH_INIT_CKSUM */ > + > int ram_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque) > { > ram_addr_t addr; > @@ -235,6 +703,10 @@ int ram_save_live(Monitor *mon, QEMUFile > *f, int stage, void *opaque) > last_block = NULL; > last_offset = 0; > > + if (arch_mig_state.use_xbrle) { > + cache_init(arch_mig_state.xbrle_cache_size); > + } > + > /* Make sure all dirty bits are set */ > QLIST_FOREACH(block, &ram_list.blocks, next) { > for (addr = block->offset; addr < block->offset > + block->length; > @@ -264,8 +736,9 @@ int ram_save_live(Monitor *mon, QEMUFile > *f, int stage, void *opaque) > while (!qemu_file_rate_limit(f)) { > int bytes_sent; > > - bytes_sent = ram_save_block(f); > + bytes_sent = ram_save_block(f, stage); > bytes_transferred += bytes_sent; > + bench.iterations++; > if (bytes_sent == 0) { /* no more blocks */ > break; > } > @@ -285,19 +758,83 @@ int ram_save_live(Monitor *mon, > QEMUFile *f, int stage, void *opaque) > int bytes_sent; > > /* flush all remaining blocks regardless of rate limiting */ > - while ((bytes_sent = ram_save_block(f)) != 0) { > + while ((bytes_sent = ram_save_block(f, stage))) { > bytes_transferred += bytes_sent; > } > cpu_physical_memory_set_dirty_tracking(0); > + if (arch_mig_state.use_xbrle) { > + cache_fini(); > +#ifdef DEBUG_ARCH_INIT > + dump_migration_statistics(); > +#endif > + } > } > > qemu_put_be64(f, RAM_SAVE_FLAG_EOS); > > expected_time = ram_save_remaining() * TARGET_PAGE_SIZE / bwidth; > > + DPRINTF("ram_save_live: expected(%ld) <= max(%ld)?\n", > expected_time, > + migrate_max_downtime()); > + > return (stage == 2) && (expected_time <= migrate_max_downtime()); > } > > +static int load_xbrle(QEMUFile *f, ram_addr_t addr, void *host) > +{ > + int ret, rc = -1; > + uint8_t *prev_page, *xor_buf, *xbrle_buf; > + XBRLEHeader hdr = {0}; > + > + /* extract RLE header */ > + qemu_get_buffer(f, (uint8_t *) &hdr, sizeof(hdr)); > + if (!(hdr.xh_flags & ENCODING_FLAG_XBRLE)) { > + fprintf(stderr, "Failed to load XBRLE page - wrong > compression!\n"); > + goto done; > + } > + > + if (hdr.xh_len > TARGET_PAGE_SIZE) { > + fprintf(stderr, "Failed to load XBRLE page - len > overflow!\n"); > + goto done; > + } > + > + /* load data and decode */ > + xbrle_buf = (uint8_t *) qemu_mallocz(TARGET_PAGE_SIZE); > + qemu_get_buffer(f, xbrle_buf, hdr.xh_len); > + > + /* decode RLE */ > + xor_buf = (uint8_t *) qemu_mallocz(TARGET_PAGE_SIZE); > + ret = rle_decode(xbrle_buf, hdr.xh_len, xor_buf, > TARGET_PAGE_SIZE); > + if (ret == -1) { > + fprintf(stderr, "Failed to load XBRLE page - decode > error!\n"); > + goto done; > + } > + > + if (ret != TARGET_PAGE_SIZE) { > + fprintf(stderr, "Failed to load XBRLE page - size %d > expected %d!\n", > + ret, TARGET_PAGE_SIZE); > + goto done; > + } > + > + /* decode XOR delta */ > + prev_page = host; > + xor_encode(prev_page, prev_page, xor_buf); > +#ifdef DEBUG_ARCH_INIT_CKSUM > + if (hdr.xh_flags & ENCODING_FLAG_CKSUM && > + hdr.xh_cksum != page_cksum(prev_page)) { > + fprintf(stderr, "Failed to load XBRLE page - bad > checksum!\n"); > + goto done; > + } > +#endif > + > + PAGE_LOG(addr, prev_page, "XBRLE page (enc len %d)\n", > hdr.xh_len); > + rc = 0; > +done: > + qemu_free(xor_buf); > + qemu_free(xbrle_buf); > + return rc; > +} > + > static inline void *host_from_stream_offset(QEMUFile *f, > ram_addr_t offset, > int flags) > @@ -328,16 +865,38 @@ static inline void > *host_from_stream_offset(QEMUFile *f, > return NULL; > } > > +static inline void *host_from_stream_offset_versioned(int version_id, > + QEMUFile *f, ram_addr_t offset, int flags) > +{ > + void *host; > + if (version_id == 3) { > + host = qemu_get_ram_ptr(offset); > + } else { > + host = host_from_stream_offset(f, offset, flags); > + } > + if (!host) { > + fprintf(stderr, "Failed to convert RAM address to host" > + " for offset 0x%lX!\n", offset); > + abort(); > + } > + return host; > +} > + > int ram_load(QEMUFile *f, void *opaque, int version_id) > { > ram_addr_t addr; > - int flags; > + int flags, ret = 0; > + static uint64_t seq_iter; > + > + seq_iter++; > > if (version_id < 3 || version_id > 4) { > - return -EINVAL; > + ret = -EINVAL; > + goto done; > } > > do { > + void *host; > addr = qemu_get_be64(f); > > flags = addr & ~TARGET_PAGE_MASK; > @@ -346,7 +905,8 @@ int ram_load(QEMUFile *f, void *opaque, > int version_id) > if (flags & RAM_SAVE_FLAG_MEM_SIZE) { > if (version_id == 3) { > if (addr != ram_bytes_total()) { > - return -EINVAL; > + ret = -EINVAL; > + goto done; > } > } else { > /* Synchronize RAM block list */ > @@ -365,8 +925,10 @@ int ram_load(QEMUFile *f, void *opaque, > int version_id) > > QLIST_FOREACH(block, &ram_list.blocks, next) { > if (!strncmp(id, block->idstr, sizeof(id))) { > - if (block->length != length) > - return -EINVAL; > + if (block->length != length) { > + ret = -EINVAL; > + goto done; > + } > break; > } > } > @@ -374,7 +936,8 @@ int ram_load(QEMUFile *f, void *opaque, > int version_id) > if (!block) { > fprintf(stderr, "Unknown ramblock > \"%s\", cannot " > "accept migration\n", id); > - return -EINVAL; > + ret = -EINVAL; > + goto done; > } > > total_ram_bytes -= length; > @@ -383,17 +946,10 @@ int ram_load(QEMUFile *f, void *opaque, > int version_id) > } > > if (flags & RAM_SAVE_FLAG_COMPRESS) { > - void *host; > uint8_t ch; > > - if (version_id == 3) > - host = qemu_get_ram_ptr(addr); > - else > - host = host_from_stream_offset(f, addr, flags); > - if (!host) { > - return -EINVAL; > - } > - > + host = host_from_stream_offset_versioned(version_id, > + f, addr, flags); > ch = qemu_get_byte(f); > memset(host, ch, TARGET_PAGE_SIZE); > #ifndef _WIN32 > @@ -402,22 +958,31 @@ int ram_load(QEMUFile *f, void *opaque, > int version_id) > qemu_madvise(host, TARGET_PAGE_SIZE, > QEMU_MADV_DONTNEED); > } > #endif > + PAGE_LOG(addr, host, "DUP page\n"); > } else if (flags & RAM_SAVE_FLAG_PAGE) { > - void *host; > - > - if (version_id == 3) > - host = qemu_get_ram_ptr(addr); > - else > - host = host_from_stream_offset(f, addr, flags); > - > + host = host_from_stream_offset_versioned(version_id, > + f, addr, flags); > qemu_get_buffer(f, host, TARGET_PAGE_SIZE); > + PAGE_LOG(addr, host, "NORMAL page\n"); > + } else if (flags & RAM_SAVE_FLAG_XBRLE) { > + host = host_from_stream_offset_versioned(version_id, > + f, addr, flags); > + if (load_xbrle(f, addr, host) < 0) { > + ret = -EINVAL; > + goto done; > + } > } > + > if (qemu_file_has_error(f)) { > - return -EIO; > + ret = -EIO; > + goto done; > } > } while (!(flags & RAM_SAVE_FLAG_EOS)); > > - return 0; > +done: > + DPRINTF("Completed load of VM with exit code %d seq > iteration %ld\n", > + ret, seq_iter); > + return ret; > } > > void qemu_service_io(void) > diff --git a/block-migration.c b/block-migration.c > index 3e66f49..004fc12 100644 > --- a/block-migration.c > +++ b/block-migration.c > @@ -689,7 +689,8 @@ static int block_load(QEMUFile *f, void > *opaque, int version_id) > return 0; > } > > -static void block_set_params(int blk_enable, int > shared_base, void *opaque) > +static void block_set_params(int blk_enable, int shared_base, > + int use_xbrle, int64_t xbrle_cache_size, void *opaque) > { > block_mig_state.blk_enable = blk_enable; > block_mig_state.shared_base = shared_base; > diff --git a/hmp-commands.hx b/hmp-commands.hx > index e5585ba..e49d5be 100644 > --- a/hmp-commands.hx > +++ b/hmp-commands.hx > @@ -717,24 +717,27 @@ ETEXI > > { > .name = "migrate", > - .args_type = "detach:-d,blk:-b,inc:-i,uri:s", > - .params = "[-d] [-b] [-i] uri", > - .help = "migrate to URI (using -d to not wait > for completion)" > - "\n\t\t\t -b for migration without > shared storage with" > - " full copy of disk\n\t\t\t -i for > migration without " > - "shared storage with incremental copy of disk " > - "(base image shared between src and > destination)", > + .args_type = "detach:-d,blk:-b,inc:-i,xbrle:-x,uri:s", > + .params = "[-d] [-b] [-i] [-x] uri", > + .help = "migrate to URI" > + "\n\t -d to not wait for completion" > + "\n\t -b for migration without shared > storage with" > + " full copy of disk" > + "\n\t -i for migration without" > + " shared storage with incremental copy of disk" > + " (base image shared between source > and destination)" > + "\n\t -x to use XBRLE page delta compression", > .user_print = monitor_user_noop, > .mhandler.cmd_new = do_migrate, > }, > > - > STEXI > -@item migrate [-d] [-b] [-i] @var{uri} > +@item migrate [-d] [-b] [-i] [-x] @var{uri} > @findex migrate > Migrate to @var{uri} (using -d to not wait for completion). > -b for migration with full copy of disk > -i for migration with incremental copy of disk (base > image is shared) > + -x to use XBRLE page delta compression > ETEXI > > { > @@ -753,10 +756,23 @@ Cancel the current VM migration. > ETEXI > > { > + .name = "migrate_set_cachesize", > + .args_type = "value:s", > + .params = "value", > + .help = "set cache size (in MB) for XBRLE migrations", > + .mhandler.cmd = do_migrate_set_cachesize, > + }, > + > +STEXI > +@item migrate_set_cachesize @var{value} > +Set cache size (in MB) for xbrle migrations. > +ETEXI > + > + { > .name = "migrate_set_speed", > .args_type = "value:o", > .params = "value", > - .help = "set maximum speed (in bytes) for migrations. " > + .help = "set maximum XBRLE cache size (in > bytes) for migrations. " > "Defaults to MB if no size suffix is specified, ie. > B/K/M/G/T", > .user_print = monitor_user_noop, > .mhandler.cmd_new = do_migrate_set_speed, > diff --git a/hw/hw.h b/hw/hw.h > index 9d2cfc2..de9e5a6 100644 > --- a/hw/hw.h > +++ b/hw/hw.h > @@ -239,7 +239,8 @@ static inline void > qemu_get_sbe64s(QEMUFile *f, int64_t *pv) > int64_t qemu_ftell(QEMUFile *f); > int64_t qemu_fseek(QEMUFile *f, int64_t pos, int whence); > > -typedef void SaveSetParamsHandler(int blk_enable, int > shared, void * opaque); > +typedef void SaveSetParamsHandler(int blk_enable, int shared, > + int use_xbrle, int64_t xbrle_cache_size, void * opaque); > typedef void SaveStateHandler(QEMUFile *f, void *opaque); > typedef int SaveLiveStateHandler(Monitor *mon, QEMUFile *f, > int stage, > void *opaque); > diff --git a/migration-exec.c b/migration-exec.c > index 14718dd..fe8254a 100644 > --- a/migration-exec.c > +++ b/migration-exec.c > @@ -67,7 +67,9 @@ MigrationState > *exec_start_outgoing_migration(Monitor *mon, > int64_t bandwidth_limit, > int detach, > int blk, > - int inc) > + int inc, > + int use_xbrle, > + int64_t xbrle_cache_size) > { > FdMigrationState *s; > FILE *f; > @@ -99,6 +101,8 @@ MigrationState > *exec_start_outgoing_migration(Monitor *mon, > > s->mig_state.blk = blk; > s->mig_state.shared = inc; > + s->mig_state.use_xbrle = use_xbrle; > + s->mig_state.xbrle_cache_size = xbrle_cache_size; > > s->state = MIG_STATE_ACTIVE; > s->mon = NULL; > diff --git a/migration-fd.c b/migration-fd.c > index 6d14505..4a1ddbd 100644 > --- a/migration-fd.c > +++ b/migration-fd.c > @@ -56,7 +56,9 @@ MigrationState > *fd_start_outgoing_migration(Monitor *mon, > int64_t bandwidth_limit, > int detach, > int blk, > - int inc) > + int inc, > + int use_xbrle, > + int64_t xbrle_cache_size) > { > FdMigrationState *s; > > @@ -82,6 +84,8 @@ MigrationState > *fd_start_outgoing_migration(Monitor *mon, > > s->mig_state.blk = blk; > s->mig_state.shared = inc; > + s->mig_state.use_xbrle = use_xbrle; > + s->mig_state.xbrle_cache_size = xbrle_cache_size; > > s->state = MIG_STATE_ACTIVE; > s->mon = NULL; > diff --git a/migration-tcp.c b/migration-tcp.c > index b55f419..4ca5bf6 100644 > --- a/migration-tcp.c > +++ b/migration-tcp.c > @@ -81,7 +81,9 @@ MigrationState > *tcp_start_outgoing_migration(Monitor *mon, > int64_t bandwidth_limit, > int detach, > int blk, > - int inc) > + int inc, > + int use_xbrle, > + int64_t xbrle_cache_size) > { > struct sockaddr_in addr; > FdMigrationState *s; > @@ -101,6 +103,8 @@ MigrationState > *tcp_start_outgoing_migration(Monitor *mon, > > s->mig_state.blk = blk; > s->mig_state.shared = inc; > + s->mig_state.use_xbrle = use_xbrle; > + s->mig_state.xbrle_cache_size = xbrle_cache_size; > > s->state = MIG_STATE_ACTIVE; > s->mon = NULL; > diff --git a/migration-unix.c b/migration-unix.c > index 57232c0..0813902 100644 > --- a/migration-unix.c > +++ b/migration-unix.c > @@ -80,7 +80,9 @@ MigrationState > *unix_start_outgoing_migration(Monitor *mon, > int64_t bandwidth_limit, > int detach, > int blk, > - int inc) > + int inc, > + int use_xbrle, > + int64_t xbrle_cache_size) > { > FdMigrationState *s; > struct sockaddr_un addr; > @@ -100,6 +102,8 @@ MigrationState > *unix_start_outgoing_migration(Monitor *mon, > > s->mig_state.blk = blk; > s->mig_state.shared = inc; > + s->mig_state.use_xbrle = use_xbrle; > + s->mig_state.xbrle_cache_size = xbrle_cache_size; > > s->state = MIG_STATE_ACTIVE; > s->mon = NULL; > diff --git a/migration.c b/migration.c > index 9ee8b17..b5b530b 100644 > --- a/migration.c > +++ b/migration.c > @@ -34,6 +34,9 @@ > /* Migration speed throttling */ > static uint32_t max_throttle = (32 << 20); > > +/* Migration XBRLE cache size */ > +static int64_t migrate_cache_size = 0x8000000; /* 256 MB size */ > + > static MigrationState *current_migration; > > int qemu_start_incoming_migration(const char *uri) > @@ -80,6 +83,7 @@ int do_migrate(Monitor *mon, const QDict > *qdict, QObject **ret_data) > int detach = qdict_get_try_bool(qdict, "detach", 0); > int blk = qdict_get_try_bool(qdict, "blk", 0); > int inc = qdict_get_try_bool(qdict, "inc", 0); > + int use_xbrle = qdict_get_try_bool(qdict, "xbrle", 0); > const char *uri = qdict_get_str(qdict, "uri"); > > if (current_migration && > @@ -90,17 +94,21 @@ int do_migrate(Monitor *mon, const QDict > *qdict, QObject **ret_data) > > if (strstart(uri, "tcp:", &p)) { > s = tcp_start_outgoing_migration(mon, p, > max_throttle, detach, > - blk, inc); > + blk, inc, use_xbrle, > + migrate_cache_size); > #if !defined(WIN32) > } else if (strstart(uri, "exec:", &p)) { > s = exec_start_outgoing_migration(mon, p, > max_throttle, detach, > - blk, inc); > + blk, inc, use_xbrle, > + migrate_cache_size); > } else if (strstart(uri, "unix:", &p)) { > s = unix_start_outgoing_migration(mon, p, > max_throttle, detach, > - blk, inc); > + blk, inc, use_xbrle, > + migrate_cache_size); > } else if (strstart(uri, "fd:", &p)) { > s = fd_start_outgoing_migration(mon, p, max_throttle, detach, > - blk, inc); > + blk, inc, use_xbrle, > + migrate_cache_size); > #endif > } else { > monitor_printf(mon, "unknown migration protocol: %s\n", uri); > @@ -341,7 +349,8 @@ void migrate_fd_connect(FdMigrationState *s) > > DPRINTF("beginning savevm\n"); > ret = qemu_savevm_state_begin(s->mon, s->file, s->mig_state.blk, > - s->mig_state.shared); > + s->mig_state.shared, > s->mig_state.use_xbrle, > + s->mig_state.xbrle_cache_size); > if (ret < 0) { > DPRINTF("failed, %d\n", ret); > migrate_fd_error(s); > @@ -448,3 +457,17 @@ int migrate_fd_close(void *opaque) > qemu_set_fd_handler2(s->fd, NULL, NULL, NULL, NULL); > return s->close(s); > } > + > +void do_migrate_set_cachesize(Monitor *mon, const QDict *qdict) > +{ > + ssize_t bytes; > + char *ptr; > + const char *value = qdict_get_str(qdict, "value"); > + > + bytes = strtod(value, &ptr); > + if (bytes > 0) { > + migrate_cache_size = bytes; > + monitor_printf(mon, "Cache size set to %ld bytes\n", bytes); > + } > +} > + > diff --git a/migration.h b/migration.h > index d13ed4f..62be2f8 100644 > --- a/migration.h > +++ b/migration.h > @@ -32,6 +32,8 @@ struct MigrationState > void (*release)(MigrationState *s); > int blk; > int shared; > + int use_xbrle; > + int64_t xbrle_cache_size; > }; > > typedef struct FdMigrationState FdMigrationState; > @@ -76,7 +78,9 @@ MigrationState > *exec_start_outgoing_migration(Monitor *mon, > int64_t bandwidth_limit, > int detach, > int blk, > - int inc); > + int inc, > + int use_xbrle, > + int64_t xbrle_cache_size); > > int tcp_start_incoming_migration(const char *host_port); > > @@ -85,7 +89,9 @@ MigrationState > *tcp_start_outgoing_migration(Monitor *mon, > int64_t bandwidth_limit, > int detach, > int blk, > - int inc); > + int inc, > + int use_xbrle, > + int64_t xbrle_cache_size); > > int unix_start_incoming_migration(const char *path); > > @@ -94,7 +100,9 @@ MigrationState > *unix_start_outgoing_migration(Monitor *mon, > int64_t bandwidth_limit, > int detach, > int blk, > - int inc); > + int inc, > + int use_xbrle, > + int64_t xbrle_cache_size); > > int fd_start_incoming_migration(const char *path); > > @@ -103,7 +111,9 @@ MigrationState > *fd_start_outgoing_migration(Monitor *mon, > int64_t bandwidth_limit, > int detach, > int blk, > - int inc); > + int inc, > + int use_xbrle, > + int64_t xbrle_cache_size); > > void migrate_fd_monitor_suspend(FdMigrationState *s, Monitor *mon); > > @@ -134,4 +144,9 @@ static inline FdMigrationState > *migrate_to_fms(MigrationState *mig_state) > return container_of(mig_state, FdMigrationState, mig_state); > } > > +void do_migrate_set_cachesize(Monitor *mon, const QDict *qdict); > + > +void arch_set_params(int blk_enable, int shared_base, > + int use_xbrle, int64_t xbrle_cache_size, void *opaque); > + > #endif > diff --git a/qmp-commands.hx b/qmp-commands.hx > index 793cf1c..8fbe64b 100644 > --- a/qmp-commands.hx > +++ b/qmp-commands.hx > @@ -431,13 +431,16 @@ EQMP > > { > .name = "migrate", > - .args_type = "detach:-d,blk:-b,inc:-i,uri:s", > - .params = "[-d] [-b] [-i] uri", > - .help = "migrate to URI (using -d to not wait > for completion)" > - "\n\t\t\t -b for migration without > shared storage with" > - " full copy of disk\n\t\t\t -i for > migration without " > - "shared storage with incremental copy of disk " > - "(base image shared between src and > destination)", > + .args_type = "detach:-d,blk:-b,inc:-i,xbrle:-x,uri:s", > + .params = "[-d] [-b] [-i] [-x] uri", > + .help = "migrate to URI" > + "\n\t -d to not wait for completion" > + "\n\t -b for migration without shared > storage with" > + " full copy of disk" > + "\n\t -i for migration without" > + " shared storage with incremental copy of disk" > + " (base image shared between source > and destination)" > + "\n\t -x to use XBRLE page delta compression", > .user_print = monitor_user_noop, > .mhandler.cmd_new = do_migrate, > }, > @@ -453,6 +456,7 @@ Arguments: > - "blk": block migration, full disk copy (json-bool, optional) > - "inc": incremental disk copy (json-bool, optional) > - "uri": Destination URI (json-string) > +- "xbrle": to use XBRLE page delta compression > > Example: > > @@ -494,6 +498,31 @@ Example: > EQMP > > { > + .name = "migrate_set_cachesize", > + .args_type = "value:s", > + .params = "value", > + .help = "set cache size (in MB) for xbrle migrations", > + .mhandler.cmd = do_migrate_set_cachesize, > + }, > + > +SQMP > +migrate_set_cachesize > +--------------------- > + > +Set cache size to be used by XBRLE migration > + > +Arguments: > + > +- "value": cache size in bytes (json-number) > + > +Example: > + > +-> { "execute": "migrate_set_cachesize", "arguments": { > "value": 500M } } > +<- { "return": {} } > + > +EQMP > + > + { > .name = "migrate_set_speed", > .args_type = "value:f", > .params = "value", > diff --git a/savevm.c b/savevm.c > index 4e49765..93b512b 100644 > --- a/savevm.c > +++ b/savevm.c > @@ -1141,7 +1141,8 @@ int register_savevm(DeviceState *dev, > void *opaque) > { > return register_savevm_live(dev, idstr, instance_id, version_id, > - NULL, NULL, save_state, > load_state, opaque); > + arch_set_params, NULL, save_state, > + load_state, opaque); > } > > void unregister_savevm(DeviceState *dev, const char *idstr, > void *opaque) > @@ -1428,15 +1429,17 @@ static int vmstate_save(QEMUFile *f, > SaveStateEntry *se) > #define QEMU_VM_SUBSECTION 0x05 > > int qemu_savevm_state_begin(Monitor *mon, QEMUFile *f, int > blk_enable, > - int shared) > + int shared, int use_xbrle, > + int64_t xbrle_cache_size) > { > SaveStateEntry *se; > > QTAILQ_FOREACH(se, &savevm_handlers, entry) { > if(se->set_params == NULL) { > continue; > - } > - se->set_params(blk_enable, shared, se->opaque); > + } > + se->set_params(blk_enable, shared, use_xbrle, > xbrle_cache_size, > + se->opaque); > } > > qemu_put_be32(f, QEMU_VM_FILE_MAGIC); > @@ -1577,7 +1580,7 @@ static int qemu_savevm_state(Monitor > *mon, QEMUFile *f) > > bdrv_flush_all(); > > - ret = qemu_savevm_state_begin(mon, f, 0, 0); > + ret = qemu_savevm_state_begin(mon, f, 0, 0, 0, 0); > if (ret < 0) > goto out; > > diff --git a/sysemu.h b/sysemu.h > index b81a70e..15a0664 100644 > --- a/sysemu.h > +++ b/sysemu.h > @@ -74,7 +74,8 @@ void qemu_announce_self(void); > void main_loop_wait(int nonblocking); > > int qemu_savevm_state_begin(Monitor *mon, QEMUFile *f, int > blk_enable, > - int shared); > + int shared, int use_xbrle, > + int64_t xbrle_cache_size); > int qemu_savevm_state_iterate(Monitor *mon, QEMUFile *f); > int qemu_savevm_state_complete(Monitor *mon, QEMUFile *f); > void qemu_savevm_state_cancel(Monitor *mon, QEMUFile *f); > > > >