On 06/01/2012 02:42 PM, Juan Quintela wrote: > Orit Wasserman <owass...@redhat.com> wrote: >> In the outgoing migration check to see if the page is cached and >> changed than send compressed page by using save_xbrle_page function. >> In the incoming migration check to see if RAM_SAVE_FLAG_XBRLE is set >> and decompress the page (by using load_xbrle function). > > > This patch can be split to easy review. Sure. > >> --- a/arch_init.c >> +++ b/arch_init.c >> @@ -43,6 +43,15 @@ >> #include "hw/smbios.h" >> #include "exec-memory.h" >> #include "hw/pcspk.h" >> +#include "qemu/cache.h" >> + >> +#ifdef DEBUG_ARCH_INIT >> +#define DPRINTF(fmt, ...) \ >> + do { fprintf(stdout, "arch_init: " fmt, ## __VA_ARGS__); } while (0) >> +#else >> +#define DPRINTF(fmt, ...) \ >> + do { } while (0) >> +#endif > > Independent of xbzrle. > >> >> #ifdef TARGET_SPARC >> int graphic_width = 1024; >> @@ -94,6 +103,7 @@ const uint32_t arch_type = QEMU_ARCH; >> #define RAM_SAVE_FLAG_PAGE 0x08 >> #define RAM_SAVE_FLAG_EOS 0x10 >> #define RAM_SAVE_FLAG_CONTINUE 0x20 >> +#define RAM_SAVE_FLAG_XBZRLE 0x40 >> >> #ifdef __ALTIVEC__ >> #include <altivec.h> >> @@ -157,6 +167,22 @@ static int is_dup_page(uint8_t *page) >> return 1; >> } >> >> +/* XBZRLE (Xor Based Zero Length Encoding */ >> +typedef struct XBZRLEHeader { >> + uint32_t xh_cksum; > > We are still not using this value, and we are sending it anyway (with a > value of zero). What happens when we start using if for a checksum, and > we migration to a new version that "expects" it to be valid? I would > preffer not to sent it, or sent the correct value. I think I will remove it, checksum should be used for all migration not just XBZRLE. I guess we can add it to the protocol in the future. > >> + uint16_t xh_len; >> + uint8_t xh_flags; >> +} XBZRLEHeader; >> + >> +/* struct contains XBZRLE cache and a static page >> + used by the compression */ >> +static struct { >> + /* buffer used for XBZRLE encoding */ >> + uint8_t *encoded_buf; >> + /* Cache for XBZRLE */ >> + Cache *cache; >> +} XBZRLE = {0}; > > Use c99 initializers, please. > > { .encoded_buf = NULL, > .cache = NULL, > } > > More instances in other parts. > >> + >> static void save_block_hdr(QEMUFile *f, RAMBlock *block, ram_addr_t offset, >> int cont, int flag) > > { >> @@ -169,19 +195,78 @@ static void save_block_hdr(QEMUFile *f, RAMBlock >> *block, ram_addr_t offset, >> >> } >> >> +#define ENCODING_FLAG_XBZRLE 0x1 >> + >> +static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data, >> + ram_addr_t current_addr, RAMBlock *block, >> + ram_addr_t offset, int cont) >> +{ >> + int encoded_len = 0, bytes_sent = -1, ret = -1; >> + XBZRLEHeader hdr = {0}; >> + uint8_t *prev_cached_page; >> + >> + /* check to see if page is cached , if not cache and return */ >> + if (!cache_is_cached(XBZRLE.cache, current_addr)) { >> + cache_insert(XBZRLE.cache, current_addr, g_memdup(current_data, >> + >> TARGET_PAGE_SIZE)); >> + goto done; >> + } >> + >> + prev_cached_page = get_cached_data(XBZRLE.cache, current_addr); >> + >> + /* XBZRLE encoding (if there is no overflow) */ >> + encoded_len = xbzrle_encode_buffer(prev_cached_page, current_data, >> + TARGET_PAGE_SIZE, XBZRLE.encoded_buf, >> + TARGET_PAGE_SIZE); >> + if (encoded_len == 0) { >> + bytes_sent = 0; >> + DPRINTF("Unmodifed page or overflow skipping\n"); >> + goto done; >> + } else if (encoded_len == -1) { >> + bytes_sent = -1; >> + DPRINTF("Overflow\n"); >> + /* update data in the cache */ >> + memcpy(prev_cached_page, current_data, TARGET_PAGE_SIZE); >> + goto done; >> + } >> + >> + /* we need to update the data in the cache, in order to get the same >> data >> + we cached we decode the encoded page on the cached data */ >> + ret = xbzrle_decode_buffer(XBZRLE.encoded_buf, encoded_len, >> + prev_cached_page, TARGET_PAGE_SIZE); >> + g_assert(ret != -1); >> + >> + hdr.xh_len = encoded_len; >> + hdr.xh_flags |= ENCODING_FLAG_XBZRLE; >> + >> + /* Send XBZRLE based compressed page */ >> + save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_XBZRLE); >> + qemu_put_byte(f, hdr.xh_flags); >> + qemu_put_be16(f, hdr.xh_len); >> + qemu_put_be32(f, hdr.xh_cksum); >> + qemu_put_buffer(f, XBZRLE.encoded_buf, encoded_len); >> + bytes_sent = encoded_len + sizeof(hdr); >> + >> +done: >> + return bytes_sent; >> +} >> + >> static RAMBlock *last_block; >> static ram_addr_t last_offset; >> >> -static int ram_save_block(QEMUFile *f) >> +static int ram_save_block(QEMUFile *f, int stage) >> { >> RAMBlock *block = last_block; >> ram_addr_t offset = last_offset; >> - int bytes_sent = 0; >> + int bytes_sent = -1; >> MemoryRegion *mr; >> + ram_addr_t current_addr; >> >> if (!block) >> block = QLIST_FIRST(&ram_list.blocks); >> >> + current_addr = block->offset + offset; >> + >> do { >> mr = block->mr; >> if (memory_region_get_dirty(mr, offset, TARGET_PAGE_SIZE, >> @@ -198,7 +283,24 @@ static int ram_save_block(QEMUFile *f) >> save_block_hdr(f, block, offset, cont, >> RAM_SAVE_FLAG_COMPRESS); >> qemu_put_byte(f, *p); >> bytes_sent = 1; >> - } else { >> + } else if (migrate_use_xbzrle()) { >> + /* in stage 1 none of the pages are cached so we just want >> to >> + cache them for next stages, and send the cached copy */ >> + if (stage == 1) { >> + cache_insert(XBZRLE.cache, current_addr, >> + g_memdup(p, TARGET_PAGE_SIZE)); >> + } else { >> + bytes_sent = save_xbzrle_page(f, p, current_addr, block, >> + offset, cont); >> + } >> + /* send the cached page copy for stage 1 and 2*/ >> + if (stage != 3) { >> + p = get_cached_data(XBZRLE.cache, current_addr); >> + } >> + } >> + >> + /* either we didn't send yet (we may got XBZRLE overflow) */ >> + if (bytes_sent == -1) { >> save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_PAGE); >> qemu_put_buffer(f, p, TARGET_PAGE_SIZE); >> bytes_sent = TARGET_PAGE_SIZE; > > > I think that code is not right when save_xbzrle_page() returns 0. That > means that page hasn't changed since last time we sent that page. We > shouldn't break in that case. Just continue with next page, right? > You are right I missed that , will be fixed
> On the other hand ... Why are we doing the stage == 1 test? stage 1 > normally only sent part of the pages, so we could use the generic code > there? It would just return -1 as bytes_sent, and do the same code that > we have now? we need to add the pages to the cache in stage 1 (for the next stage), and there is no need for checking if the page is cached. and send the pages from the cache for consistency > > The optimization for stage 3 is not done backwards? We are inserting > the page in the cache even if we are on stage 3. In stage three we > should: > - look if page is on the cache: do usual xbrlze trick > - if it is not, just sent the whole page without inserting it into the > cache? We are never going to reuse it, so putting it into the cache > would not help us at all. We are just making an extra copy? right no need to insert the page into the cache in stage 3, I will remove it > > >> >> qemu_put_be64(f, RAM_SAVE_FLAG_EOS); >> >> expected_time = ram_save_remaining() * TARGET_PAGE_SIZE / bwidth; >> >> + DPRINTF("ram_save_live: expected(%ld) <= max(%ld)?\n", expected_time, >> + migrate_max_downtime()); >> + > > This belongs to debugging patch. > >> + /* load data and decode */ >> + xbzrle_buf = g_malloc0(TARGET_PAGE_SIZE); > > can't we have a static buffer of that size, and avoid all the > malloc/free business? If space is tight, we can allways put it on the > xbrle structure and assign it only for migration. good idea > >> @@ -481,16 +657,33 @@ int ram_load(QEMUFile *f, void *opaque, int version_id) >> void *host; >> >> host = host_from_stream_offset(f, addr, flags); >> + if (!host) { >> + return -EINVAL; >> + } > > Why is this check only needed now? I wish I knew, looks like it is missing in upstream. Do you think I should fix it separately ? Thanks, Orit > > Later, Juan.