* Andrey Gruzdev (andrey.gruz...@virtuozzo.com) wrote: > On 25.11.2020 16:08, Dr. David Alan Gilbert wrote: > > * Andrey Gruzdev (andrey.gruz...@virtuozzo.com) wrote: > > > In this particular implementation the same single migration > > > thread is responsible for both normal linear dirty page > > > migration and procesing UFFD page fault events. > > > > > > Processing write faults includes reading UFFD file descriptor, > > > finding respective RAM block and saving faulting page to > > > the migration stream. After page has been saved, write protection > > > can be removed. Since asynchronous version of qemu_put_buffer() > > > is expected to be used to save pages, we also have to flush > > > migraion stream prior to un-protecting saved memory range. > > > > > > Write protection is being removed for any previously protected > > > memory chunk that has hit the migration stream. That's valid > > > for pages from linear page scan along with write fault pages. > > > > > > Signed-off-by: Andrey Gruzdev <andrey.gruz...@virtuozzo.com> > > > --- > > > migration/ram.c | 124 ++++++++++++++++++++++++++++++++++++++++++++---- > > > 1 file changed, 115 insertions(+), 9 deletions(-) > > > > > > diff --git a/migration/ram.c b/migration/ram.c > > > index 7f273c9996..08a1d7a252 100644 > > > --- a/migration/ram.c > > > +++ b/migration/ram.c > > > @@ -314,6 +314,8 @@ struct RAMState { > > > ram_addr_t last_page; > > > /* last ram version we have seen */ > > > uint32_t last_version; > > > + /* 'write-tracking' migration is enabled */ > > > + bool ram_wt_enabled; > > > /* We are in the first round */ > > > bool ram_bulk_stage; > > > /* The free page optimization is enabled */ > > > @@ -574,8 +576,6 @@ static int uffd_protect_memory(int uffd, hwaddr > > > start, hwaddr length, bool wp) > > > return 0; > > > } > > > -__attribute__ ((unused)) > > > -static int uffd_read_events(int uffd, struct uffd_msg *msgs, int count); > > > __attribute__ ((unused)) > > > static bool uffd_poll_events(int uffd, int tmo); > > > @@ -1929,6 +1929,86 @@ static int ram_save_host_page(RAMState *rs, > > > PageSearchStatus *pss, > > > return pages; > > > } > > > +/** > > > + * ram_find_block_by_host_address: find RAM block containing host page > > > + * > > > + * Returns true if RAM block is found and pss->block/page are > > > + * pointing to the given host page, false in case of an error > > > + * > > > + * @rs: current RAM state > > > + * @pss: page-search-status structure > > > + */ > > > +static bool ram_find_block_by_host_address(RAMState *rs, > > > PageSearchStatus *pss, > > > + hwaddr page_address) > > > +{ > > > + bool found = false; > > > + > > > + pss->block = rs->last_seen_block; > > > + do { > > > + if (page_address >= (hwaddr) pss->block->host && > > > + (page_address + TARGET_PAGE_SIZE) <= > > > + ((hwaddr) pss->block->host + > > > pss->block->used_length)) { > > > + pss->page = (unsigned long) > > > + ((page_address - (hwaddr) pss->block->host) >> > > > TARGET_PAGE_BITS); > > > + found = true; > > > + break; > > > + } > > > + > > > + pss->block = QLIST_NEXT_RCU(pss->block, next); > > > + if (!pss->block) { > > > + /* Hit the end of the list */ > > > + pss->block = QLIST_FIRST_RCU(&ram_list.blocks); > > > + } > > > + } while (pss->block != rs->last_seen_block); > > > + > > > + rs->last_seen_block = pss->block; > > > + /* > > > + * Since we are in the same loop with ram_find_and_save_block(), > > > + * need to reset pss->complete_round after switching to > > > + * other block/page in pss. > > > + */ > > > + pss->complete_round = false; > > > + > > > + return found; > > > +} > > > + > > > +/** > > > + * get_fault_page: try to get next UFFD write fault page and, if pending > > > fault > > > + * is found, put info about RAM block and block page into pss structure > > > + * > > > + * Returns true in case of UFFD write fault detected, false otherwise > > > + * > > > + * @rs: current RAM state > > > + * @pss: page-search-status structure > > > + * > > > + */ > > > +static bool get_fault_page(RAMState *rs, PageSearchStatus *pss) > > > +{ > > > + struct uffd_msg uffd_msg; > > > + hwaddr page_address; > > > + int res; > > > + > > > + if (!rs->ram_wt_enabled) { > > > + return false; > > > + } > > > + > > > + res = uffd_read_events(rs->uffdio_fd, &uffd_msg, 1); > > > + if (res <= 0) { > > > + return false; > > > + } > > > + > > > + page_address = uffd_msg.arg.pagefault.address; > > > + if (!ram_find_block_by_host_address(rs, pss, page_address)) { > > > + /* In case we couldn't find respective block, just unprotect > > > faulting page */ > > > + uffd_protect_memory(rs->uffdio_fd, page_address, > > > TARGET_PAGE_SIZE, false); > > > + error_report("ram_find_block_by_host_address() failed: > > > address=0x%0lx", > > > + page_address); > > > + return false; > > > + } > > > + > > > + return true; > > > +} > > > + > > > /** > > > * ram_find_and_save_block: finds a dirty page and sends it to f > > > * > > > @@ -1955,25 +2035,50 @@ static int ram_find_and_save_block(RAMState *rs, > > > bool last_stage) > > > return pages; > > > } > > > + if (!rs->last_seen_block) { > > > + rs->last_seen_block = QLIST_FIRST_RCU(&ram_list.blocks); > > > + } > > > pss.block = rs->last_seen_block; > > > pss.page = rs->last_page; > > > pss.complete_round = false; > > > - if (!pss.block) { > > > - pss.block = QLIST_FIRST_RCU(&ram_list.blocks); > > > - } > > > - > > > do { > > > + ram_addr_t page; > > > + ram_addr_t page_to; > > > + > > > again = true; > > > - found = get_queued_page(rs, &pss); > > > - > > > + /* In case of 'write-tracking' migration we first try > > > + * to poll UFFD and get write page fault event */ > > > + found = get_fault_page(rs, &pss); > > > + if (!found) { > > > + /* No trying to fetch something from the priority queue */ > > > + found = get_queued_page(rs, &pss); > > > + } > > > if (!found) { > > > /* priority queue empty, so just search for something dirty > > > */ > > > found = find_dirty_block(rs, &pss, &again); > > > } > > > if (found) { > > > + page = pss.page; > > > pages = ram_save_host_page(rs, &pss, last_stage); > > > + page_to = pss.page; > > > + > > > + /* Check if page is from UFFD-managed region */ > > > + if (pss.block->flags & RAM_UF_WRITEPROTECT) { > > > + hwaddr page_address = (hwaddr) pss.block->host + > > > + ((hwaddr) page << TARGET_PAGE_BITS); > > > + hwaddr run_length = (hwaddr) (page_to - page + 1) << > > > TARGET_PAGE_BITS; > > > + int res; > > > + > > > + /* Flush async buffers before un-protect */ > > > + qemu_fflush(rs->f); > > > + /* Un-protect memory range */ > > > + res = uffd_protect_memory(rs->uffdio_fd, page_address, > > > run_length, false); > > > + if (res < 0) { > > > + break; > > > + } > > > + } > > > > Please separate that out into a separate function. > > > > Dave > > > > You mean separate implementation of ram_find_and_save_block()?
No, I mean take that if (...WRITEPROTECT) { ..} block and put it in a function and call it from there, just to keep ram_find_and_save_block clean. Dave > Andrey > > > > } > > > } while (!pages && again); > > > @@ -2086,7 +2191,8 @@ static void ram_state_reset(RAMState *rs) > > > rs->last_sent_block = NULL; > > > rs->last_page = 0; > > > rs->last_version = ram_list.version; > > > - rs->ram_bulk_stage = true; > > > + rs->ram_wt_enabled = migrate_track_writes_ram(); > > > + rs->ram_bulk_stage = !rs->ram_wt_enabled; > > > rs->fpo_enabled = false; > > > } > > > -- > > > 2.25.1 > > > > > > -- > Andrey Gruzdev, Principal Engineer > Virtuozzo GmbH +7-903-247-6397 > virtuzzo.com > -- Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK