Il 07/03/2013 14:32, Michael S. Tsirkin ha scritto: > +#ifdef DEBUG_ARCH_INIT > +#define DPRINTF(fmt, ...) \ > + do { fprintf(stdout, "arch_init: " fmt, ## __VA_ARGS__); } while (0)
These need to be adjusted, but it can be a follow-up. Paolo > +#else > +#define DPRINTF(fmt, ...) \ > + do { } while (0) > +#endif > + > +/***********************************************************/ > +/* ram save/restore */ > + > +#define RAM_SAVE_FLAG_FULL 0x01 /* Obsolete, not used anymore */ > +#define RAM_SAVE_FLAG_COMPRESS 0x02 > +#define RAM_SAVE_FLAG_MEM_SIZE 0x04 > +#define RAM_SAVE_FLAG_PAGE 0x08 > +#define RAM_SAVE_FLAG_EOS 0x10 > +#define RAM_SAVE_FLAG_CONTINUE 0x20 > +#define RAM_SAVE_FLAG_XBZRLE 0x40 > + > +#ifdef __ALTIVEC__ > +#include <altivec.h> > +#define VECTYPE vector unsigned char > +#define SPLAT(p) vec_splat(vec_ld(0, p), 0) > +#define ALL_EQ(v1, v2) vec_all_eq(v1, v2) > +/* altivec.h may redefine the bool macro as vector type. > + * Reset it to POSIX semantics. */ > +#undef bool > +#define bool _Bool > +#elif defined __SSE2__ > +#include <emmintrin.h> > +#define VECTYPE __m128i > +#define SPLAT(p) _mm_set1_epi8(*(p)) > +#define ALL_EQ(v1, v2) (_mm_movemask_epi8(_mm_cmpeq_epi8(v1, v2)) == 0xFFFF) > +#else > +#define VECTYPE unsigned long > +#define SPLAT(p) (*(p) * (~0UL / 255)) > +#define ALL_EQ(v1, v2) ((v1) == (v2)) > +#endif > + > +static int is_dup_page(uint8_t *page) > +{ > + VECTYPE *p = (VECTYPE *)page; > + VECTYPE val = SPLAT(page); > + int i; > + > + for (i = 0; i < TARGET_PAGE_SIZE / sizeof(VECTYPE); i++) { > + if (!ALL_EQ(val, p[i])) { > + return 0; > + } > + } > + > + return 1; > +} > + > +/* struct contains XBZRLE cache and a static page > + used by the compression */ > +static struct { > + /* buffer used for XBZRLE encoding */ > + uint8_t *encoded_buf; > + /* buffer for storing page content */ > + uint8_t *current_buf; > + /* buffer used for XBZRLE decoding */ > + uint8_t *decoded_buf; > + /* Cache for XBZRLE */ > + PageCache *cache; > +} XBZRLE = { > + .encoded_buf = NULL, > + .current_buf = NULL, > + .decoded_buf = NULL, > + .cache = NULL, > +}; > + > + > +int64_t xbzrle_cache_resize(int64_t new_size) > +{ > + if (XBZRLE.cache != NULL) { > + return cache_resize(XBZRLE.cache, new_size / TARGET_PAGE_SIZE) * > + TARGET_PAGE_SIZE; > + } > + return pow2floor(new_size); > +} > + > +/* accounting for migration statistics */ > +typedef struct AccountingInfo { > + uint64_t dup_pages; > + uint64_t norm_pages; > + uint64_t iterations; > + uint64_t xbzrle_bytes; > + uint64_t xbzrle_pages; > + uint64_t xbzrle_cache_miss; > + uint64_t xbzrle_overflows; > +} AccountingInfo; > + > +static AccountingInfo acct_info; > + > +static void acct_clear(void) > +{ > + memset(&acct_info, 0, sizeof(acct_info)); > +} > + > +uint64_t dup_mig_bytes_transferred(void) > +{ > + return acct_info.dup_pages * TARGET_PAGE_SIZE; > +} > + > +uint64_t dup_mig_pages_transferred(void) > +{ > + return acct_info.dup_pages; > +} > + > +uint64_t norm_mig_bytes_transferred(void) > +{ > + return acct_info.norm_pages * TARGET_PAGE_SIZE; > +} > + > +uint64_t norm_mig_pages_transferred(void) > +{ > + return acct_info.norm_pages; > +} > + > +uint64_t xbzrle_mig_bytes_transferred(void) > +{ > + return acct_info.xbzrle_bytes; > +} > + > +uint64_t xbzrle_mig_pages_transferred(void) > +{ > + return acct_info.xbzrle_pages; > +} > + > +uint64_t xbzrle_mig_pages_cache_miss(void) > +{ > + return acct_info.xbzrle_cache_miss; > +} > + > +uint64_t xbzrle_mig_pages_overflow(void) > +{ > + return acct_info.xbzrle_overflows; > +} > + > +static size_t save_block_hdr(QEMUFile *f, RAMBlock *block, ram_addr_t offset, > + int cont, int flag) > +{ > + size_t size; > + > + qemu_put_be64(f, offset | cont | flag); > + size = 8; > + > + if (!cont) { > + qemu_put_byte(f, strlen(block->idstr)); > + qemu_put_buffer(f, (uint8_t *)block->idstr, > + strlen(block->idstr)); > + size += 1 + strlen(block->idstr); > + } > + return size; > +} > + > +#define ENCODING_FLAG_XBZRLE 0x1 > + > +static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data, > + ram_addr_t current_addr, RAMBlock *block, > + ram_addr_t offset, int cont, bool last_stage) > +{ > + int encoded_len = 0, bytes_sent = -1; > + uint8_t *prev_cached_page; > + > + if (!cache_is_cached(XBZRLE.cache, current_addr)) { > + if (!last_stage) { > + cache_insert(XBZRLE.cache, current_addr, current_data); > + } > + acct_info.xbzrle_cache_miss++; > + return -1; > + } > + > + prev_cached_page = get_cached_data(XBZRLE.cache, current_addr); > + > + /* save current buffer into memory */ > + memcpy(XBZRLE.current_buf, current_data, TARGET_PAGE_SIZE); > + > + /* XBZRLE encoding (if there is no overflow) */ > + encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf, > + TARGET_PAGE_SIZE, XBZRLE.encoded_buf, > + TARGET_PAGE_SIZE); > + if (encoded_len == 0) { > + DPRINTF("Skipping unmodified page\n"); > + return 0; > + } else if (encoded_len == -1) { > + DPRINTF("Overflow\n"); > + acct_info.xbzrle_overflows++; > + /* update data in the cache */ > + memcpy(prev_cached_page, current_data, TARGET_PAGE_SIZE); > + return -1; > + } > + > + /* we need to update the data in the cache, in order to get the same > data */ > + if (!last_stage) { > + memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE); > + } > + > + /* Send XBZRLE based compressed page */ > + bytes_sent = save_block_hdr(f, block, offset, cont, > RAM_SAVE_FLAG_XBZRLE); > + qemu_put_byte(f, ENCODING_FLAG_XBZRLE); > + qemu_put_be16(f, encoded_len); > + qemu_put_buffer(f, XBZRLE.encoded_buf, encoded_len); > + bytes_sent += encoded_len + 1 + 2; > + acct_info.xbzrle_pages++; > + acct_info.xbzrle_bytes += bytes_sent; > + > + return bytes_sent; > +} > + > + > +/* This is the last block that we have visited serching for dirty pages > + */ > +static RAMBlock *last_seen_block; > +/* This is the last block from where we have sent data */ > +static RAMBlock *last_sent_block; > +static ram_addr_t last_offset; > +static unsigned long *migration_bitmap; > +static uint64_t migration_dirty_pages; > +static uint32_t last_version; > + > +static inline > +ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr, > + ram_addr_t start) > +{ > + unsigned long base = mr->ram_addr >> TARGET_PAGE_BITS; > + unsigned long nr = base + (start >> TARGET_PAGE_BITS); > + unsigned long size = base + (int128_get64(mr->size) >> TARGET_PAGE_BITS); > + > + unsigned long next = find_next_bit(migration_bitmap, size, nr); > + > + if (next < size) { > + clear_bit(next, migration_bitmap); > + migration_dirty_pages--; > + } > + return (next - base) << TARGET_PAGE_BITS; > +} > + > +static inline bool migration_bitmap_set_dirty(MemoryRegion *mr, > + ram_addr_t offset) > +{ > + bool ret; > + int nr = (mr->ram_addr + offset) >> TARGET_PAGE_BITS; > + > + ret = test_and_set_bit(nr, migration_bitmap); > + > + if (!ret) { > + migration_dirty_pages++; > + } > + return ret; > +} > + > +/* Needs iothread lock! */ > + > +static void migration_bitmap_sync(void) > +{ > + RAMBlock *block; > + ram_addr_t addr; > + uint64_t num_dirty_pages_init = migration_dirty_pages; > + MigrationState *s = migrate_get_current(); > + static int64_t start_time; > + static int64_t num_dirty_pages_period; > + int64_t end_time; > + > + if (!start_time) { > + start_time = qemu_get_clock_ms(rt_clock); > + } > + > + trace_migration_bitmap_sync_start(); > + memory_global_sync_dirty_bitmap(get_system_memory()); > + > + QTAILQ_FOREACH(block, &ram_list.blocks, next) { > + for (addr = 0; addr < block->length; addr += TARGET_PAGE_SIZE) { > + if (memory_region_test_and_clear_dirty(block->mr, > + addr, TARGET_PAGE_SIZE, > + DIRTY_MEMORY_MIGRATION)) { > + migration_bitmap_set_dirty(block->mr, addr); > + } > + } > + } > + trace_migration_bitmap_sync_end(migration_dirty_pages > + - num_dirty_pages_init); > + num_dirty_pages_period += migration_dirty_pages - num_dirty_pages_init; > + end_time = qemu_get_clock_ms(rt_clock); > + > + /* more than 1 second = 1000 millisecons */ > + if (end_time > start_time + 1000) { > + s->dirty_pages_rate = num_dirty_pages_period * 1000 > + / (end_time - start_time); > + s->dirty_bytes_rate = s->dirty_pages_rate * TARGET_PAGE_SIZE; > + start_time = end_time; > + num_dirty_pages_period = 0; > + } > +} > + > +/* > + * ram_save_block: Writes a page of memory to the stream f > + * > + * Returns: The number of bytes written. > + * 0 means no dirty pages > + */ > + > +static int ram_save_block(QEMUFile *f, bool last_stage) > +{ > + RAMBlock *block = last_seen_block; > + ram_addr_t offset = last_offset; > + bool complete_round = false; > + int bytes_sent = 0; > + MemoryRegion *mr; > + ram_addr_t current_addr; > + > + if (!block) > + block = QTAILQ_FIRST(&ram_list.blocks); > + > + while (true) { > + mr = block->mr; > + offset = migration_bitmap_find_and_reset_dirty(mr, offset); > + if (complete_round && block == last_seen_block && > + offset >= last_offset) { > + break; > + } > + if (offset >= block->length) { > + offset = 0; > + block = QTAILQ_NEXT(block, next); > + if (!block) { > + block = QTAILQ_FIRST(&ram_list.blocks); > + complete_round = true; > + } > + } else { > + uint8_t *p; > + int cont = (block == last_sent_block) ? > + RAM_SAVE_FLAG_CONTINUE : 0; > + > + p = memory_region_get_ram_ptr(mr) + offset; > + > + /* In doubt sent page as normal */ > + bytes_sent = -1; > + if (is_dup_page(p)) { > + acct_info.dup_pages++; > + bytes_sent = save_block_hdr(f, block, offset, cont, > + RAM_SAVE_FLAG_COMPRESS); > + qemu_put_byte(f, *p); > + bytes_sent += 1; > + } else if (migrate_use_xbzrle()) { > + current_addr = block->offset + offset; > + bytes_sent = save_xbzrle_page(f, p, current_addr, block, > + offset, cont, last_stage); > + if (!last_stage) { > + p = get_cached_data(XBZRLE.cache, current_addr); > + } > + } > + > + /* XBZRLE overflow or normal page */ > + if (bytes_sent == -1) { > + bytes_sent = save_block_hdr(f, block, offset, cont, > RAM_SAVE_FLAG_PAGE); > + qemu_put_buffer(f, p, TARGET_PAGE_SIZE); > + bytes_sent += TARGET_PAGE_SIZE; > + acct_info.norm_pages++; > + } > + > + /* if page is unmodified, continue to the next */ > + if (bytes_sent > 0) { > + last_sent_block = block; > + break; > + } > + } > + } > + last_seen_block = block; > + last_offset = offset; > + > + return bytes_sent; > +} > + > +static uint64_t bytes_transferred; > + > +static ram_addr_t ram_save_remaining(void) > +{ > + return migration_dirty_pages; > +} > + > +uint64_t ram_bytes_remaining(void) > +{ > + return ram_save_remaining() * TARGET_PAGE_SIZE; > +} > + > +uint64_t ram_bytes_transferred(void) > +{ > + return bytes_transferred; > +} > + > +uint64_t ram_bytes_total(void) > +{ > + RAMBlock *block; > + uint64_t total = 0; > + > + QTAILQ_FOREACH(block, &ram_list.blocks, next) > + total += block->length; > + > + return total; > +} > + > +static void migration_end(void) > +{ > + if (migration_bitmap) { > + memory_global_dirty_log_stop(); > + g_free(migration_bitmap); > + migration_bitmap = NULL; > + } > + > + if (XBZRLE.cache) { > + cache_fini(XBZRLE.cache); > + g_free(XBZRLE.cache); > + g_free(XBZRLE.encoded_buf); > + g_free(XBZRLE.current_buf); > + g_free(XBZRLE.decoded_buf); > + XBZRLE.cache = NULL; > + } > +} > + > +static void ram_migration_cancel(void *opaque) > +{ > + migration_end(); > +} > + > +static void reset_ram_globals(void) > +{ > + last_seen_block = NULL; > + last_sent_block = NULL; > + last_offset = 0; > + last_version = ram_list.version; > +} > + > +#define MAX_WAIT 50 /* ms, half buffered_file limit */ > + > +static int ram_save_setup(QEMUFile *f, void *opaque) > +{ > + RAMBlock *block; > + int64_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS; > + > + migration_bitmap = bitmap_new(ram_pages); > + bitmap_set(migration_bitmap, 0, ram_pages); > + migration_dirty_pages = ram_pages; > + > + if (migrate_use_xbzrle()) { > + XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() / > + TARGET_PAGE_SIZE, > + TARGET_PAGE_SIZE); > + if (!XBZRLE.cache) { > + DPRINTF("Error creating cache\n"); > + return -1; > + } > + XBZRLE.encoded_buf = g_malloc0(TARGET_PAGE_SIZE); > + XBZRLE.current_buf = g_malloc(TARGET_PAGE_SIZE); > + acct_clear(); > + } > + > + qemu_mutex_lock_iothread(); > + qemu_mutex_lock_ramlist(); > + bytes_transferred = 0; > + reset_ram_globals(); > + > + memory_global_dirty_log_start(); > + migration_bitmap_sync(); > + qemu_mutex_unlock_iothread(); > + > + qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE); > + > + QTAILQ_FOREACH(block, &ram_list.blocks, next) { > + qemu_put_byte(f, strlen(block->idstr)); > + qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr)); > + qemu_put_be64(f, block->length); > + } > + > + qemu_mutex_unlock_ramlist(); > + qemu_put_be64(f, RAM_SAVE_FLAG_EOS); > + > + return 0; > +} > + > +static int ram_save_iterate(QEMUFile *f, void *opaque) > +{ > + int ret; > + int i; > + int64_t t0; > + int total_sent = 0; > + > + qemu_mutex_lock_ramlist(); > + > + if (ram_list.version != last_version) { > + reset_ram_globals(); > + } > + > + t0 = qemu_get_clock_ns(rt_clock); > + i = 0; > + while ((ret = qemu_file_rate_limit(f)) == 0) { > + int bytes_sent; > + > + bytes_sent = ram_save_block(f, false); > + /* no more blocks to sent */ > + if (bytes_sent == 0) { > + break; > + } > + total_sent += bytes_sent; > + acct_info.iterations++; > + /* we want to check in the 1st loop, just in case it was the 1st time > + and we had to sync the dirty bitmap. > + qemu_get_clock_ns() is a bit expensive, so we only check each some > + iterations > + */ > + if ((i & 63) == 0) { > + uint64_t t1 = (qemu_get_clock_ns(rt_clock) - t0) / 1000000; > + if (t1 > MAX_WAIT) { > + DPRINTF("big wait: %" PRIu64 " milliseconds, %d > iterations\n", > + t1, i); > + break; > + } > + } > + i++; > + } > + > + qemu_mutex_unlock_ramlist(); > + > + if (ret < 0) { > + bytes_transferred += total_sent; > + return ret; > + } > + > + qemu_put_be64(f, RAM_SAVE_FLAG_EOS); > + total_sent += 8; > + bytes_transferred += total_sent; > + > + return total_sent; > +} > + > +static int ram_save_complete(QEMUFile *f, void *opaque) > +{ > + qemu_mutex_lock_ramlist(); > + migration_bitmap_sync(); > + > + /* try transferring iterative blocks of memory */ > + > + /* flush all remaining blocks regardless of rate limiting */ > + while (true) { > + int bytes_sent; > + > + bytes_sent = ram_save_block(f, true); > + /* no more blocks to sent */ > + if (bytes_sent == 0) { > + break; > + } > + bytes_transferred += bytes_sent; > + } > + migration_end(); > + > + qemu_mutex_unlock_ramlist(); > + qemu_put_be64(f, RAM_SAVE_FLAG_EOS); > + > + return 0; > +} > + > +static uint64_t ram_save_pending(QEMUFile *f, void *opaque, uint64_t > max_size) > +{ > + uint64_t remaining_size; > + > + remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE; > + > + if (remaining_size < max_size) { > + qemu_mutex_lock_iothread(); > + migration_bitmap_sync(); > + qemu_mutex_unlock_iothread(); > + remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE; > + } > + return remaining_size; > +} > + > +static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host) > +{ > + int ret, rc = 0; > + unsigned int xh_len; > + int xh_flags; > + > + if (!XBZRLE.decoded_buf) { > + XBZRLE.decoded_buf = g_malloc(TARGET_PAGE_SIZE); > + } > + > + /* extract RLE header */ > + xh_flags = qemu_get_byte(f); > + xh_len = qemu_get_be16(f); > + > + if (xh_flags != ENCODING_FLAG_XBZRLE) { > + fprintf(stderr, "Failed to load XBZRLE page - wrong compression!\n"); > + return -1; > + } > + > + if (xh_len > TARGET_PAGE_SIZE) { > + fprintf(stderr, "Failed to load XBZRLE page - len overflow!\n"); > + return -1; > + } > + /* load data and decode */ > + qemu_get_buffer(f, XBZRLE.decoded_buf, xh_len); > + > + /* decode RLE */ > + ret = xbzrle_decode_buffer(XBZRLE.decoded_buf, xh_len, host, > + TARGET_PAGE_SIZE); > + if (ret == -1) { > + fprintf(stderr, "Failed to load XBZRLE page - decode error!\n"); > + rc = -1; > + } else if (ret > TARGET_PAGE_SIZE) { > + fprintf(stderr, "Failed to load XBZRLE page - size %d exceeds %d!\n", > + ret, TARGET_PAGE_SIZE); > + abort(); > + } > + > + return rc; > +} > + > +static inline void *host_from_stream_offset(QEMUFile *f, > + ram_addr_t offset, > + int flags) > +{ > + static RAMBlock *block = NULL; > + char id[256]; > + uint8_t len; > + > + if (flags & RAM_SAVE_FLAG_CONTINUE) { > + if (!block) { > + fprintf(stderr, "Ack, bad migration stream!\n"); > + return NULL; > + } > + > + return memory_region_get_ram_ptr(block->mr) + offset; > + } > + > + len = qemu_get_byte(f); > + qemu_get_buffer(f, (uint8_t *)id, len); > + id[len] = 0; > + > + QTAILQ_FOREACH(block, &ram_list.blocks, next) { > + if (!strncmp(id, block->idstr, sizeof(id))) > + return memory_region_get_ram_ptr(block->mr) + offset; > + } > + > + fprintf(stderr, "Can't find block %s!\n", id); > + return NULL; > +} > + > +static int ram_load(QEMUFile *f, void *opaque, int version_id) > +{ > + ram_addr_t addr; > + int flags, ret = 0; > + int error; > + static uint64_t seq_iter; > + > + seq_iter++; > + > + if (version_id < 4 || version_id > 4) { > + return -EINVAL; > + } > + > + do { > + addr = qemu_get_be64(f); > + > + flags = addr & ~TARGET_PAGE_MASK; > + addr &= TARGET_PAGE_MASK; > + > + if (flags & RAM_SAVE_FLAG_MEM_SIZE) { > + if (version_id == 4) { > + /* Synchronize RAM block list */ > + char id[256]; > + ram_addr_t length; > + ram_addr_t total_ram_bytes = addr; > + > + while (total_ram_bytes) { > + RAMBlock *block; > + uint8_t len; > + > + len = qemu_get_byte(f); > + qemu_get_buffer(f, (uint8_t *)id, len); > + id[len] = 0; > + length = qemu_get_be64(f); > + > + QTAILQ_FOREACH(block, &ram_list.blocks, next) { > + if (!strncmp(id, block->idstr, sizeof(id))) { > + if (block->length != length) { > + ret = -EINVAL; > + goto done; > + } > + break; > + } > + } > + > + if (!block) { > + fprintf(stderr, "Unknown ramblock \"%s\", cannot " > + "accept migration\n", id); > + ret = -EINVAL; > + goto done; > + } > + > + total_ram_bytes -= length; > + } > + } > + } > + > + if (flags & RAM_SAVE_FLAG_COMPRESS) { > + void *host; > + uint8_t ch; > + > + host = host_from_stream_offset(f, addr, flags); > + if (!host) { > + return -EINVAL; > + } > + > + ch = qemu_get_byte(f); > + memset(host, ch, TARGET_PAGE_SIZE); > +#ifndef _WIN32 > + if (ch == 0 && > + (!kvm_enabled() || kvm_has_sync_mmu()) && > + getpagesize() <= TARGET_PAGE_SIZE) { > + qemu_madvise(host, TARGET_PAGE_SIZE, QEMU_MADV_DONTNEED); > + } > +#endif > + } else if (flags & RAM_SAVE_FLAG_PAGE) { > + void *host; > + > + host = host_from_stream_offset(f, addr, flags); > + if (!host) { > + return -EINVAL; > + } > + > + qemu_get_buffer(f, host, TARGET_PAGE_SIZE); > + } else if (flags & RAM_SAVE_FLAG_XBZRLE) { > + void *host = host_from_stream_offset(f, addr, flags); > + if (!host) { > + return -EINVAL; > + } > + > + if (load_xbzrle(f, addr, host) < 0) { > + ret = -EINVAL; > + goto done; > + } > + } > + error = qemu_file_get_error(f); > + if (error) { > + ret = error; > + goto done; > + } > + } while (!(flags & RAM_SAVE_FLAG_EOS)); > + > +done: > + DPRINTF("Completed load of VM with exit code %d seq iteration " > + "%" PRIu64 "\n", ret, seq_iter); > + return ret; > +} > + > +SaveVMHandlers savevm_ram_handlers = { > + .save_live_setup = ram_save_setup, > + .save_live_iterate = ram_save_iterate, > + .save_live_complete = ram_save_complete, > + .save_live_pending = ram_save_pending, > + .load_state = ram_load, > + .cancel = ram_migration_cancel, > +}; >