Il 11/03/2013 05:33, michael.r.hines.mrhi...@linux.vnet.ibm.com ha scritto: > From: "Michael R. Hines" <mrhi...@us.ibm.com> > > For performance reasons, dup_page() and xbzrle() is skipped because > they are too expensive for zero-copy RDMA. > > Signed-off-by: Michael R. Hines <mrhi...@us.ibm.com> > --- > arch_init.c | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++- > 1 file changed, 56 insertions(+), 1 deletion(-) > > diff --git a/arch_init.c b/arch_init.c > index 8daeafa..437cb47 100644 > --- a/arch_init.c > +++ b/arch_init.c > @@ -45,6 +45,7 @@ > #include "exec/address-spaces.h" > #include "hw/pcspk.h" > #include "migration/page_cache.h" > +#include "migration/rdma.h" > #include "qemu/config-file.h" > #include "qmp-commands.h" > #include "trace.h" > @@ -245,6 +246,18 @@ uint64_t norm_mig_pages_transferred(void) > return acct_info.norm_pages; > } > > +/* > + * RDMA does not use the buffered_file, > + * but we still need a way to do accounting... > + */ > +uint64_t delta_norm_mig_bytes_transferred(void) > +{ > + static uint64_t last_norm_pages = 0; > + uint64_t delta_bytes = (acct_info.norm_pages - last_norm_pages) * > TARGET_PAGE_SIZE; > + last_norm_pages = acct_info.norm_pages; > + return delta_bytes; > +} > + > uint64_t xbzrle_mig_bytes_transferred(void) > { > return acct_info.xbzrle_bytes; > @@ -282,6 +295,45 @@ static size_t save_block_hdr(QEMUFile *f, RAMBlock > *block, ram_addr_t offset, > return size; > } > > +static size_t save_rdma_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset, > + int cont) > +{ > + int ret; > + size_t bytes_sent = 0; > + ram_addr_t current_addr; > + RDMAData * rdma = &migrate_get_current()->rdma; > + > + acct_info.norm_pages++; > + > + /* > + * use RDMA to send page > + */
Not quite true, the page is added to the current chunk. Please make the comments a quick-and-dirty reference of the protocol, or leave them out altogether. > + current_addr = block->offset + offset; > + if ((ret = qemu_rdma_write(rdma, current_addr, TARGET_PAGE_SIZE)) < 0) { > + fprintf(stderr, "rdma migration: write error! %d\n", ret); > + qemu_file_set_error(f, ret); > + return ret; > + } > + > + /* > + * do some polling > + */ Again, that's quite self-evident. Poll for what though? :) > + while (1) { > + int ret = qemu_rdma_poll(rdma); > + if (ret == RDMA_WRID_NONE) { > + break; > + } > + if (ret < 0) { > + fprintf(stderr, "rdma migration: polling error! %d\n", ret); > + qemu_file_set_error(f, ret); > + return ret; > + } > + } > + > + bytes_sent += TARGET_PAGE_SIZE; > + return bytes_sent; > +} As written in the other message, I think this should be an additional QEMUFile operation, hopefully the same that Orit is introducing in her patches. > #define ENCODING_FLAG_XBZRLE 0x1 > > static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data, > @@ -462,7 +514,10 @@ static int ram_save_block(QEMUFile *f, bool last_stage) > > /* In doubt sent page as normal */ > bytes_sent = -1; > - if (is_dup_page(p)) { > + if (migrate_use_rdma()) { > + /* searching for zeros is still too expensive for RDMA */ > + bytes_sent = save_rdma_page(f, block, offset, cont); Again as written in the other message, this is not really an RDMA thing, it's mostly the effect of a fast link. Of course to some extent it depends on the CPU and RAM speed, but we can fake that it isn't. > + } else if (is_dup_page(p)) { > acct_info.dup_pages++; > bytes_sent = save_block_hdr(f, block, offset, cont, > RAM_SAVE_FLAG_COMPRESS); > Thanks, Paolo