* Peter Xu (pet...@redhat.com) wrote: > On Thu, Oct 08, 2020 at 05:18:14PM +0100, Peter Maydell wrote: > > Compile failure, windows crossbuilds: > > > > ../../migration/migration.c: In function 'page_request_addr_cmp': > > ../../migration/migration.c:148:23: error: cast from pointer to > > integer of different size [-Werror=pointer-to-int-cast] > > unsigned long a = (unsigned long) ap, b = (unsigned long) bp; > > ^ > > ../../migration/migration.c:148:47: error: cast from pointer to > > integer of different size [-Werror=pointer-to-int-cast] > > unsigned long a = (unsigned long) ap, b = (unsigned long) bp; > > ^ > > Below squashed into "migration: Maintain postcopy faulted addresses" should > fix > the problem: > > ------8<------ > diff --git a/migration/migration.c b/migration/migration.c > index e7d179bffc..d8a5c0de44 100644 > --- a/migration/migration.c > +++ b/migration/migration.c > @@ -145,7 +145,7 @@ static void migrate_fd_cancel(MigrationState *s); > > static gint page_request_addr_cmp(gconstpointer ap, gconstpointer bp) > { > - unsigned long a = (unsigned long) ap, b = (unsigned long) bp; > + uintptr_t a = (uintptr_t) ap, b = (uintptr_t) bp; > > return (a > b) - (a < b); > } > ------8<------ > > A whole replacement patch attached too. > > Thanks! >
> -- > Peter Xu > From 6d5708092466abb2ed683f9ef24021e2edf25872 Mon Sep 17 00:00:00 2001 > From: Peter Xu <pet...@redhat.com> > Date: Fri, 28 Aug 2020 18:02:14 -0400 > Subject: [PATCH] migration: Maintain postcopy faulted addresses > > Maintain a list of faulted addresses on the destination host for which we're > waiting on. This is implemented using a GTree rather than a real list to make > sure even there're plenty of vCPUs/threads that are faulting, the lookup will > still be fast with O(log(N)) (because we'll do that after placing each page). > It should bring a slight overhead, but ideally that shouldn't be a big problem > simply because in most cases the requested page list will be short. > > Actually we did similar things for postcopy blocktime measurements. This > patch > didn't use that simply because: > > (1) blocktime measurement is towards vcpu threads only, but here we need to > record all faulted addresses, including main thread and external > thread (like, DPDK via vhost-user). > > (2) blocktime measurement will require UFFD_FEATURE_THREAD_ID, but here we > don't want to add that extra dependency on the kernel version since not > necessary. E.g., we don't need to know which thread faulted on which > page, we also don't care about multiple threads faulting on the same > page. But we only care about what addresses are faulted so waiting for > a > page copying from src. > > (3) blocktime measurement is not enabled by default. However we need this > by > default especially for postcopy recover. > > Another thing to mention is that this patch introduced a new mutex to > serialize > the receivedmap and the page_requested tree, however that serialization does > not cover other procedures like UFFDIO_COPY. > > Signed-off-by: Peter Xu <pet...@redhat.com> Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com> Respin with that replacing the original coming up. Dave > --- > migration/migration.c | 41 +++++++++++++++++++++++++++++++++++++++- > migration/migration.h | 19 ++++++++++++++++++- > migration/postcopy-ram.c | 17 ++++++++++++++--- > migration/trace-events | 2 ++ > 4 files changed, 74 insertions(+), 5 deletions(-) > > diff --git a/migration/migration.c b/migration/migration.c > index b2dac6b39c..d8a5c0de44 100644 > --- a/migration/migration.c > +++ b/migration/migration.c > @@ -143,6 +143,13 @@ static int migration_maybe_pause(MigrationState *s, > int new_state); > static void migrate_fd_cancel(MigrationState *s); > > +static gint page_request_addr_cmp(gconstpointer ap, gconstpointer bp) > +{ > + uintptr_t a = (uintptr_t) ap, b = (uintptr_t) bp; > + > + return (a > b) - (a < b); > +} > + > void migration_object_init(void) > { > MachineState *ms = MACHINE(qdev_get_machine()); > @@ -165,6 +172,8 @@ void migration_object_init(void) > qemu_event_init(¤t_incoming->main_thread_load_event, false); > qemu_sem_init(¤t_incoming->postcopy_pause_sem_dst, 0); > qemu_sem_init(¤t_incoming->postcopy_pause_sem_fault, 0); > + qemu_mutex_init(¤t_incoming->page_request_mutex); > + current_incoming->page_requested = g_tree_new(page_request_addr_cmp); > > if (!migration_object_check(current_migration, &err)) { > error_report_err(err); > @@ -240,6 +249,11 @@ void migration_incoming_state_destroy(void) > > qemu_event_reset(&mis->main_thread_load_event); > > + if (mis->page_requested) { > + g_tree_destroy(mis->page_requested); > + mis->page_requested = NULL; > + } > + > if (mis->socket_address_list) { > qapi_free_SocketAddressList(mis->socket_address_list); > mis->socket_address_list = NULL; > @@ -354,8 +368,33 @@ int > migrate_send_rp_message_req_pages(MigrationIncomingState *mis, > } > > int migrate_send_rp_req_pages(MigrationIncomingState *mis, > - RAMBlock *rb, ram_addr_t start) > + RAMBlock *rb, ram_addr_t start, uint64_t haddr) > { > + void *aligned = (void *)(uintptr_t)(haddr & (-qemu_target_page_size())); > + bool received; > + > + WITH_QEMU_LOCK_GUARD(&mis->page_request_mutex) { > + received = ramblock_recv_bitmap_test_byte_offset(rb, start); > + if (!received && !g_tree_lookup(mis->page_requested, aligned)) { > + /* > + * The page has not been received, and it's not yet in the page > + * request list. Queue it. Set the value of element to 1, so > that > + * things like g_tree_lookup() will return TRUE (1) when found. > + */ > + g_tree_insert(mis->page_requested, aligned, (gpointer)1); > + mis->page_requested_count++; > + trace_postcopy_page_req_add(aligned, mis->page_requested_count); > + } > + } > + > + /* > + * If the page is there, skip sending the message. We don't even need > the > + * lock because as long as the page arrived, it'll be there forever. > + */ > + if (received) { > + return 0; > + } > + > return migrate_send_rp_message_req_pages(mis, rb, start); > } > > diff --git a/migration/migration.h b/migration/migration.h > index e853ccf8b1..8d2d1ce839 100644 > --- a/migration/migration.h > +++ b/migration/migration.h > @@ -104,6 +104,23 @@ struct MigrationIncomingState { > > /* List of listening socket addresses */ > SocketAddressList *socket_address_list; > + > + /* A tree of pages that we requested to the source VM */ > + GTree *page_requested; > + /* For debugging purpose only, but would be nice to keep */ > + int page_requested_count; > + /* > + * The mutex helps to maintain the requested pages that we sent to the > + * source, IOW, to guarantee coherent between the page_requests tree and > + * the per-ramblock receivedmap. Note! This does not guarantee > consistency > + * of the real page copy procedures (using UFFDIO_[ZERO]COPY). E.g., > even > + * if one bit in receivedmap is cleared, UFFDIO_COPY could have happened > + * for that page already. This is intended so that the mutex won't > + * serialize and blocked by slow operations like UFFDIO_* ioctls. > However > + * this should be enough to make sure the page_requested tree always > + * contains valid information. > + */ > + QemuMutex page_request_mutex; > }; > > MigrationIncomingState *migration_incoming_get_current(void); > @@ -332,7 +349,7 @@ void migrate_send_rp_shut(MigrationIncomingState *mis, > void migrate_send_rp_pong(MigrationIncomingState *mis, > uint32_t value); > int migrate_send_rp_req_pages(MigrationIncomingState *mis, RAMBlock *rb, > - ram_addr_t start); > + ram_addr_t start, uint64_t haddr); > int migrate_send_rp_message_req_pages(MigrationIncomingState *mis, > RAMBlock *rb, ram_addr_t start); > void migrate_send_rp_recv_bitmap(MigrationIncomingState *mis, > diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c > index 722034dc01..ca1daf0024 100644 > --- a/migration/postcopy-ram.c > +++ b/migration/postcopy-ram.c > @@ -684,7 +684,7 @@ int postcopy_request_shared_page(struct PostCopyFD *pcfd, > RAMBlock *rb, > qemu_ram_get_idstr(rb), rb_offset); > return postcopy_wake_shared(pcfd, client_addr, rb); > } > - migrate_send_rp_req_pages(mis, rb, aligned_rbo); > + migrate_send_rp_req_pages(mis, rb, aligned_rbo, client_addr); > return 0; > } > > @@ -979,7 +979,8 @@ retry: > * Send the request to the source - we want to request one > * of our host page sizes (which is >= TPS) > */ > - ret = migrate_send_rp_req_pages(mis, rb, rb_offset); > + ret = migrate_send_rp_req_pages(mis, rb, rb_offset, > + msg.arg.pagefault.address); > if (ret) { > /* May be network failure, try to wait for recovery */ > if (ret == -EIO && postcopy_pause_fault_thread(mis)) { > @@ -1149,10 +1150,20 @@ static int qemu_ufd_copy_ioctl(MigrationIncomingState > *mis, void *host_addr, > ret = ioctl(userfault_fd, UFFDIO_ZEROPAGE, &zero_struct); > } > if (!ret) { > + qemu_mutex_lock(&mis->page_request_mutex); > ramblock_recv_bitmap_set_range(rb, host_addr, > pagesize / qemu_target_page_size()); > + /* > + * If this page resolves a page fault for a previous recorded faulted > + * address, take a special note to maintain the requested page list. > + */ > + if (g_tree_lookup(mis->page_requested, host_addr)) { > + g_tree_remove(mis->page_requested, host_addr); > + mis->page_requested_count--; > + trace_postcopy_page_req_del(host_addr, > mis->page_requested_count); > + } > + qemu_mutex_unlock(&mis->page_request_mutex); > mark_postcopy_blocktime_end((uintptr_t)host_addr); > - > } > return ret; > } > diff --git a/migration/trace-events b/migration/trace-events > index 338f38b3dd..e4d5eb94ca 100644 > --- a/migration/trace-events > +++ b/migration/trace-events > @@ -162,6 +162,7 @@ postcopy_pause_return_path(void) "" > postcopy_pause_return_path_continued(void) "" > postcopy_pause_continued(void) "" > postcopy_start_set_run(void) "" > +postcopy_page_req_add(void *addr, int count) "new page req %p total %d" > source_return_path_thread_bad_end(void) "" > source_return_path_thread_end(void) "" > source_return_path_thread_entry(void) "" > @@ -272,6 +273,7 @@ postcopy_ram_incoming_cleanup_blocktime(uint64_t total) > "total blocktime %" PRIu > postcopy_request_shared_page(const char *sharer, const char *rb, uint64_t > rb_offset) "for %s in %s offset 0x%"PRIx64 > postcopy_request_shared_page_present(const char *sharer, const char *rb, > uint64_t rb_offset) "%s already %s offset 0x%"PRIx64 > postcopy_wake_shared(uint64_t client_addr, const char *rb) "at 0x%"PRIx64" > in %s" > +postcopy_page_req_del(void *addr, int count) "resolved page req %p total %d" > > get_mem_fault_cpu_index(int cpu, uint32_t pid) "cpu: %d, pid: %u" > > -- > 2.26.2 > -- Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK