Il 11/02/2013 23:49, Michael R. Hines ha scritto: > > + /* > + * RFC RDMA: The empirical cost of searching for zero pages here > + * plus the cost of communicating with the other side > + * seems to take significantly more time than simply > + * dumping the page into remote memory. > + */ > + if (migrate_rdma_enabled()) > + return 0;
This is probably the only if (rdma) that should remain in the end. :) > for (i = 0; i < TARGET_PAGE_SIZE / sizeof(VECTYPE); i++) { > if (!ALL_EQ(val, p[i])) { > return 0; > @@ -282,6 +292,44 @@ static size_t save_block_hdr(QEMUFile *f, RAMBlock > *block, ram_addr_t offset, > return size; > } > > +static size_t save_rdma_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset, > + int cont) > +{ > + size_t bytes_sent = 0; > + ram_addr_t current_addr; > + > + acct_info.norm_pages++; > + > + /* > + * use RDMA to send page > + */ > + current_addr = block->offset + offset; > + if (rdma_write(&rdma_mdata, current_addr, > + TARGET_PAGE_SIZE)) { > + fprintf(stderr, "rdma migration: write error!\n"); > + qemu_file_set_error(f, -EIO); > + return 0; > + } > + > + /* > + * do some polling > + */ > + while (1) { > + int ret = rdma_poll(&rdma_mdata); > + if (ret == RDMA_WRID_NONE) { > + break; > + } > + if (ret < 0) { > + fprintf(stderr, "rdma migration: polling error!\n"); > + qemu_file_set_error(f, -EIO); > + return 0; > + } > + } > + > + bytes_sent += TARGET_PAGE_SIZE; > + return bytes_sent; > +} Orit, can you rebase and post an RFC of your vectored-send patches for TCP migration? Perhaps you and Michael can figure out an API that works well for both TCP and RDMA. > +#ifdef RDMA_EXTRA_SYNC > + /* > + * We use two "sync" infiniband messages happen during migration. > + * One at the beginning and one at the end, just to be thorough. > + * This is the first one. > + */ > + if (first_time && migrate_rdma_enabled()) { > + int r; > + first_time = 0; > + if (rdma_post_send_sync(&rdma_mdata, RDMA_WRID_SEND_EXTRA_SYNC)) > { > + fprintf(stderr, > + "rdma migration: error posting extra send sync!\n"); > + return -EIO; > + } > + > + r = rdma_wait_for_wrid(&rdma_mdata, RDMA_WRID_SEND_EXTRA_SYNC); > + if (r < 0) { > + fprintf(stderr, > + "rdma migration: qemu_savevm_state_iterate" > + " sync polling error!\n"); > + return -EIO; > + } > + } > +#endif This "sync" thing sounds like something that could be used to send buffered device state on the same channel. But again, I'm quite RDMA-impaired. :) Paolo