RDMA migration very hard to complete when VM run mysql benchmark on 1G host hugepage.I think the time between ram_control_before_iterate(f, RAM_CONTROL_ROUND) and after_iterate is too large when 1G host pagesize,so 1M buffer size match with mlx driver that will be good. after this patch,it will work as normal on my situation.
Signed-off-by: Zhiwei Jiang <elish.ji...@ucloud.cn> --- migration/migration.c | 13 +++++++++++++ migration/migration.h | 6 ++++++ migration/ram.c | 6 +++++- 3 files changed, 24 insertions(+), 1 deletion(-) diff --git a/migration/migration.c b/migration/migration.c index 041b8451a6..934916b161 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -457,6 +457,8 @@ void migrate_add_address(SocketAddress *address) static void qemu_start_incoming_migration(const char *uri, Error **errp) { const char *p = NULL; + MigrationState *s = migrate_get_current(); + s->enabled_rdma_migration = false; qapi_event_send_migration(MIGRATION_STATUS_SETUP); if (strstart(uri, "tcp:", &p) || @@ -465,6 +467,7 @@ static void qemu_start_incoming_migration(const char *uri, Error **errp) socket_start_incoming_migration(p ? p : uri, errp); #ifdef CONFIG_RDMA } else if (strstart(uri, "rdma:", &p)) { + s->enabled_rdma_migration = true; rdma_start_incoming_migration(p, errp); #endif } else if (strstart(uri, "exec:", &p)) { @@ -2040,6 +2043,7 @@ void migrate_init(MigrationState *s) s->start_postcopy = false; s->postcopy_after_devices = false; s->migration_thread_running = false; + s->enabled_rdma_migration = false; error_free(s->error); s->error = NULL; s->hostname = NULL; @@ -2300,6 +2304,7 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk, socket_start_outgoing_migration(s, p ? p : uri, &local_err); #ifdef CONFIG_RDMA } else if (strstart(uri, "rdma:", &p)) { + s->enabled_rdma_migration = true; rdma_start_outgoing_migration(s, p, &local_err); #endif } else if (strstart(uri, "exec:", &p)) { @@ -2475,6 +2480,14 @@ bool migrate_use_events(void) return s->enabled_capabilities[MIGRATION_CAPABILITY_EVENTS]; } +bool migrate_use_rdma(void) +{ + MigrationState *s; + s = migrate_get_current(); + + return s->enabled_rdma_migration; +} + bool migrate_use_multifd(void) { MigrationState *s; diff --git a/migration/migration.h b/migration/migration.h index 7a5aa8c2fd..860dc93df1 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -296,6 +296,11 @@ struct MigrationState { * This save hostname when out-going migration starts */ char *hostname; + + /* + * Enable RDMA migration + */ + bool enabled_rdma_migration; }; void migrate_set_state(int *state, int old_state, int new_state); @@ -332,6 +337,7 @@ bool migrate_ignore_shared(void); bool migrate_validate_uuid(void); bool migrate_auto_converge(void); +bool migrate_use_rdma(void); bool migrate_use_multifd(void); bool migrate_pause_before_switchover(void); int migrate_multifd_channels(void); diff --git a/migration/ram.c b/migration/ram.c index 7a43bfd7af..dc0c0e2565 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -2043,7 +2043,11 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss, qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS; unsigned long hostpage_boundary = QEMU_ALIGN_UP(pss->page + 1, pagesize_bits); + /* Set RDMA boundary default 256*4K=1M that driver delivery more effective*/ + unsigned long rdma_boundary = + QEMU_ALIGN_UP(pss->page + 1, 256); unsigned long start_page = pss->page; + bool use_rdma = migrate_use_rdma(); int res; if (ramblock_is_ignored(pss->block)) { @@ -2069,7 +2073,7 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss, } } pss->page = migration_bitmap_find_dirty(rs, pss->block, pss->page); - } while ((pss->page < hostpage_boundary) && + } while ((pss->page < (use_rdma ? rdma_boundary : hostpage_boundary)) && offset_in_ramblock(pss->block, ((ram_addr_t)pss->page) << TARGET_PAGE_BITS)); /* The offset we leave with is the min boundary of host page and block */ -- 2.25.1