* David Gibson (da...@gibson.dropbear.id.au) wrote: > On Wed, Feb 25, 2015 at 04:51:45PM +0000, Dr. David Alan Gilbert (git) wrote: > > From: "Dr. David Alan Gilbert" <dgilb...@redhat.com> > > > > Provide a check to see if the OS we're running on has all the bits > > needed for postcopy. > > > > Creates postcopy-ram.c which will get most of the other helpers we need. > > > > Signed-off-by: Dr. David Alan Gilbert <dgilb...@redhat.com> > > --- > > include/migration/postcopy-ram.h | 19 +++++ > > migration/Makefile.objs | 2 +- > > migration/postcopy-ram.c | 161 > > +++++++++++++++++++++++++++++++++++++++ > > savevm.c | 5 ++ > > 4 files changed, 186 insertions(+), 1 deletion(-) > > create mode 100644 include/migration/postcopy-ram.h > > create mode 100644 migration/postcopy-ram.c > > > > diff --git a/include/migration/postcopy-ram.h > > b/include/migration/postcopy-ram.h > > new file mode 100644 > > index 0000000..d81934f > > --- /dev/null > > +++ b/include/migration/postcopy-ram.h > > @@ -0,0 +1,19 @@ > > +/* > > + * Postcopy migration for RAM > > + * > > + * Copyright 2013 Red Hat, Inc. and/or its affiliates > > + * > > + * Authors: > > + * Dave Gilbert <dgilb...@redhat.com> > > + * > > + * This work is licensed under the terms of the GNU GPL, version 2 or > > later. > > + * See the COPYING file in the top-level directory. > > + * > > + */ > > +#ifndef QEMU_POSTCOPY_RAM_H > > +#define QEMU_POSTCOPY_RAM_H > > + > > +/* Return true if the host supports everything we need to do postcopy-ram > > */ > > +bool postcopy_ram_supported_by_host(void); > > + > > +#endif > > diff --git a/migration/Makefile.objs b/migration/Makefile.objs > > index d929e96..0cac6d7 100644 > > --- a/migration/Makefile.objs > > +++ b/migration/Makefile.objs > > @@ -1,7 +1,7 @@ > > common-obj-y += migration.o tcp.o > > common-obj-y += vmstate.o > > common-obj-y += qemu-file.o qemu-file-buf.o qemu-file-unix.o > > qemu-file-stdio.o > > -common-obj-y += xbzrle.o > > +common-obj-y += xbzrle.o postcopy-ram.o > > > > common-obj-$(CONFIG_RDMA) += rdma.o > > common-obj-$(CONFIG_POSIX) += exec.o unix.o fd.o > > diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c > > new file mode 100644 > > index 0000000..a0e20b2 > > --- /dev/null > > +++ b/migration/postcopy-ram.c > > @@ -0,0 +1,161 @@ > > +/* > > + * Postcopy migration for RAM > > + * > > + * Copyright 2013-2014 Red Hat, Inc. and/or its affiliates > > + * > > + * Authors: > > + * Dave Gilbert <dgilb...@redhat.com> > > + * > > + * This work is licensed under the terms of the GNU GPL, version 2 or > > later. > > + * See the COPYING file in the top-level directory. > > + * > > + */ > > + > > +/* > > + * Postcopy is a migration technique where the execution flips from the > > + * source to the destination before all the data has been copied. > > + */ > > + > > +#include <glib.h> > > +#include <stdio.h> > > +#include <unistd.h> > > + > > +#include "qemu-common.h" > > +#include "migration/migration.h" > > +#include "migration/postcopy-ram.h" > > +#include "sysemu/sysemu.h" > > +#include "qemu/error-report.h" > > +#include "trace.h" > > + > > +/* Postcopy needs to detect accesses to pages that haven't yet been copied > > + * across, and efficiently map new pages in, the techniques for doing this > > + * are target OS specific. > > + */ > > +#if defined(__linux__) > > + > > +#include <sys/mman.h> > > +#include <sys/ioctl.h> > > +#include <sys/types.h> > > +#include <asm/types.h> /* for __u64 */ > > +#include <linux/userfaultfd.h> > > + > > +#ifdef HOST_X86_64 > > +#ifndef __NR_userfaultfd > > +#define __NR_userfaultfd 323 > > Sholdn't this come from the kernel headers imported in the previous > patch? Rather than having an arch-specific hack.
The header, like the rest of the kernel headers, just provides the constant and structure definitions for the call; the syscall numbers come from arch specific headers. I guess in the final world I wouldn't need this at all since it'll come from the system headers; but what's the right way to put this in for new syscalls? > > +#endif > > +#endif > > + > > +#endif > > + > > +#if defined(__linux__) && defined(__NR_userfaultfd) > > + > > +static bool ufd_version_check(int ufd) > > +{ > > + struct uffdio_api api_struct; > > + uint64_t feature_mask; > > + > > + api_struct.api = UFFD_API; > > + if (ioctl(ufd, UFFDIO_API, &api_struct)) { > > + perror("postcopy_ram_supported_by_host: UFFDIO_API failed"); > > This should be error_report() not, perror(), to match qemu > conventions, shouldn't it? Thanks; I've done all of the perrors. > > + return false; > > + } > > + > > + feature_mask = (__u64)1 << _UFFDIO_REGISTER | > > + (__u64)1 << _UFFDIO_UNREGISTER; > > + if ((api_struct.ioctls & feature_mask) != feature_mask) { > > + error_report("Missing userfault features: %" PRIu64, > > + (uint64_t)(~api_struct.ioctls & feature_mask)); > > + return false; > > + } > > + > > + return true; > > +} > > + > > +bool postcopy_ram_supported_by_host(void) > > +{ > > + long pagesize = getpagesize(); > > + int ufd = -1; > > + bool ret = false; /* Error unless we change it */ > > + void *testarea = NULL; > > + struct uffdio_register reg_struct; > > + struct uffdio_range range_struct; > > + uint64_t feature_mask; > > + > > + if ((1ul << qemu_target_page_bits()) > pagesize) { > > + /* The PMI code doesn't yet deal with TPS>HPS */ > > + error_report("Target page size bigger than host page size"); > > + goto out; > > + } > > + > > + ufd = syscall(__NR_userfaultfd, O_CLOEXEC); > > + if (ufd == -1) { > > + perror("postcopy_ram_supported_by_host: userfaultfd not > > available"); > > And here as well? And several places below. > > > + goto out; > > + } > > + > > + /* Version and features check */ > > + if (!ufd_version_check(ufd)) { > > + goto out; > > + } > > + > > + /* > > + * We need to check that the ops we need are supported on anon memory > > + * To do that we need to register a chunk and see the flags that > > + * are returned. > > + */ > > + testarea = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE | > > + MAP_ANONYMOUS, -1, 0); > > + if (!testarea) { > > This should be (testarea == MAP_FAILED). Otherwise mmap() failures > will always trip the assert below. Thanks; fixed. > > + perror("postcopy_ram_supported_by_host: Failed to map test area"); > > + goto out; > > + } > > + g_assert(((size_t)testarea & (pagesize-1)) == 0); > > + > > + reg_struct.range.start = (uint64_t)(uintptr_t)testarea; > > + reg_struct.range.len = (uint64_t)pagesize; > > + reg_struct.mode = UFFDIO_REGISTER_MODE_MISSING; > > + > > + if (ioctl(ufd, UFFDIO_REGISTER, ®_struct)) { > > + perror("postcopy_ram_supported_by_host userfault register"); > > + goto out; > > + } > > + > > + range_struct.start = (uint64_t)(uintptr_t)testarea; > > + range_struct.len = (uint64_t)pagesize; > > I don't think you need the (uint64_t) casts (though you do need the > uintptr_t cast). I think the assignment will do an implicit > conversion without probvlems. Yes, you're right - they're gone. > > + if (ioctl(ufd, UFFDIO_UNREGISTER, &range_struct)) { > > + perror("postcopy_ram_supported_by_host userfault unregister"); > > + goto out; > > + } > > + > > + feature_mask = (__u64)1 << _UFFDIO_WAKE | > > + (__u64)1 << _UFFDIO_COPY | > > + (__u64)1 << _UFFDIO_ZEROPAGE; > > + if ((reg_struct.ioctls & feature_mask) != feature_mask) { > > + error_report("Missing userfault map features: %" PRIu64, > > I'm guessing you want PRIx64, in order to make the feature mask at > least semi-readable. Yes, thanks - done. > > + (uint64_t)(~reg_struct.ioctls & feature_mask)); > > + goto out; > > + } > > + > > + /* Success! */ > > + ret = true; > > +out: > > + if (testarea) { > > + munmap(testarea, pagesize); > > + } > > + if (ufd != -1) { > > + close(ufd); > > + } > > + return ret; > > +} > > + > > +#else > > +/* No target OS support, stubs just fail */ > > + > > +bool postcopy_ram_supported_by_host(void) > > +{ > > + error_report("%s: No OS support", __func__); > > + return false; > > +} > > + > > +#endif > > + > > diff --git a/savevm.c b/savevm.c > > index e301a0a..2ea4c76 100644 > > --- a/savevm.c > > +++ b/savevm.c > > @@ -33,6 +33,7 @@ > > #include "qemu/timer.h" > > #include "audio/audio.h" > > #include "migration/migration.h" > > +#include "migration/postcopy-ram.h" > > #include "qemu/sockets.h" > > #include "qemu/queue.h" > > #include "sysemu/cpus.h" > > @@ -1109,6 +1110,10 @@ static int > > loadvm_postcopy_handle_advise(MigrationIncomingState *mis, > > return -1; > > } > > > > + if (!postcopy_ram_supported_by_host()) { > > + return -1; > > + } > > + > > if (remote_hps != getpagesize()) { > > /* > > * Some combinations of mismatch are probably possible but it gets > > -- > David Gibson | I'll have my music baroque, and my code > david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ > _other_ > | _way_ _around_! > http://www.ozlabs.org/~dgibson Dave -- Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK