Il 11/03/2013 05:33, michael.r.hines.mrhi...@linux.vnet.ibm.com ha scritto:
From: "Michael R. Hines" <mrhi...@us.ibm.com>
For performance reasons, dup_page() and xbzrle() is skipped because
they are too expensive for zero-copy RDMA.
Signed-off-by: Michael R. Hines <mrhi...@us.ibm.com>
---
arch_init.c | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 56 insertions(+), 1 deletion(-)
diff --git a/arch_init.c b/arch_init.c
index 8daeafa..437cb47 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -45,6 +45,7 @@
#include "exec/address-spaces.h"
#include "hw/pcspk.h"
#include "migration/page_cache.h"
+#include "migration/rdma.h"
#include "qemu/config-file.h"
#include "qmp-commands.h"
#include "trace.h"
@@ -245,6 +246,18 @@ uint64_t norm_mig_pages_transferred(void)
return acct_info.norm_pages;
}
+/*
+ * RDMA does not use the buffered_file,
+ * but we still need a way to do accounting...
+ */
+uint64_t delta_norm_mig_bytes_transferred(void)
+{
+ static uint64_t last_norm_pages = 0;
+ uint64_t delta_bytes = (acct_info.norm_pages - last_norm_pages) *
TARGET_PAGE_SIZE;
+ last_norm_pages = acct_info.norm_pages;
+ return delta_bytes;
+}
+
uint64_t xbzrle_mig_bytes_transferred(void)
{
return acct_info.xbzrle_bytes;
@@ -282,6 +295,45 @@ static size_t save_block_hdr(QEMUFile *f, RAMBlock *block,
ram_addr_t offset,
return size;
}
+static size_t save_rdma_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset,
+ int cont)
+{
+ int ret;
+ size_t bytes_sent = 0;
+ ram_addr_t current_addr;
+ RDMAData * rdma = &migrate_get_current()->rdma;
+
+ acct_info.norm_pages++;
+
+ /*
+ * use RDMA to send page
+ */
Not quite true, the page is added to the current chunk. Please make the
comments a quick-and-dirty reference of the protocol, or leave them out
altogether.
+ current_addr = block->offset + offset;
+ if ((ret = qemu_rdma_write(rdma, current_addr, TARGET_PAGE_SIZE)) < 0) {
+ fprintf(stderr, "rdma migration: write error! %d\n", ret);
+ qemu_file_set_error(f, ret);
+ return ret;
+ }
+
+ /*
+ * do some polling
+ */
Again, that's quite self-evident. Poll for what though? :)
+ while (1) {
+ int ret = qemu_rdma_poll(rdma);
+ if (ret == RDMA_WRID_NONE) {
+ break;
+ }
+ if (ret < 0) {
+ fprintf(stderr, "rdma migration: polling error! %d\n", ret);
+ qemu_file_set_error(f, ret);
+ return ret;
+ }
+ }
+
+ bytes_sent += TARGET_PAGE_SIZE;
+ return bytes_sent;
+}
As written in the other message, I think this should be an additional
QEMUFile operation, hopefully the same that Orit is introducing in her
patches.
#define ENCODING_FLAG_XBZRLE 0x1
static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data,
@@ -462,7 +514,10 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
/* In doubt sent page as normal */
bytes_sent = -1;
- if (is_dup_page(p)) {
+ if (migrate_use_rdma()) {
+ /* searching for zeros is still too expensive for RDMA */
+ bytes_sent = save_rdma_page(f, block, offset, cont);
Again as written in the other message, this is not really an RDMA thing,
it's mostly the effect of a fast link. Of course to some extent it
depends on the CPU and RAM speed, but we can fake that it isn't.
+ } else if (is_dup_page(p)) {
acct_info.dup_pages++;
bytes_sent = save_block_hdr(f, block, offset, cont,
RAM_SAVE_FLAG_COMPRESS);
Thanks,
Paolo