On Fri, Oct 02, 2015 at 10:48:13AM +1000, Richard Henderson wrote: > On 10/01/2015 10:58 PM, Michael S. Tsirkin wrote: > >Anonymous and file-backed RAM allocation are now almost exactly the same. > > > >Reduce code duplication by moving RAM mmap code out of oslib-posix.c and > >exec.c. > > > >Reported-by: Marc-André Lureau <mlur...@redhat.com> > >Signed-off-by: Michael S. Tsirkin <m...@redhat.com> > >Reviewed-by: Paolo Bonzini <pbonz...@redhat.com> > >Acked-by: Paolo Bonzini <pbonz...@redhat.com> > >--- > > > >Changes from v1: add shared flag to get MAP_SHARED mappings > >(for vhost-user), only set MAP_ANONYMOUS for anonymous RAM. > > > > include/qemu/mmap-alloc.h | 10 +++++++ > > exec.c | 47 +++++++------------------------ > > util/mmap-alloc.c | 71 > > +++++++++++++++++++++++++++++++++++++++++++++++ > > util/oslib-posix.c | 28 +++---------------- > > util/Makefile.objs | 2 +- > > 5 files changed, 96 insertions(+), 62 deletions(-) > > create mode 100644 include/qemu/mmap-alloc.h > > create mode 100644 util/mmap-alloc.c > > > >diff --git a/include/qemu/mmap-alloc.h b/include/qemu/mmap-alloc.h > >new file mode 100644 > >index 0000000..56388e6 > >--- /dev/null > >+++ b/include/qemu/mmap-alloc.h > >@@ -0,0 +1,10 @@ > >+#ifndef QEMU_MMAP_ALLOC > >+#define QEMU_MMAP_ALLOC > >+ > >+#include "qemu-common.h" > >+ > >+void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared); > >+ > >+void qemu_ram_munmap(void *ptr, size_t size); > >+ > >+#endif > >diff --git a/exec.c b/exec.c > >index 7d90a52..4505dc7 100644 > >--- a/exec.c > >+++ b/exec.c > >@@ -55,6 +55,9 @@ > > #include "exec/ram_addr.h" > > > > #include "qemu/range.h" > >+#ifndef _WIN32 > >+#include "qemu/mmap-alloc.h" > >+#endif > > > > //#define DEBUG_SUBPAGE > > > >@@ -84,9 +87,9 @@ static MemoryRegion io_mem_unassigned; > > */ > > #define RAM_RESIZEABLE (1 << 2) > > > >-/* An extra page is mapped on top of this RAM. > >+/* RAM is backed by an mmapped file. > > */ > >-#define RAM_EXTRA (1 << 3) > >+#define RAM_FILE (1 << 3) > > #endif > > > > struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus); > >@@ -1188,13 +1191,10 @@ static void *file_ram_alloc(RAMBlock *block, > > char *filename; > > char *sanitized_name; > > char *c; > >- void *ptr; > >- void *area = NULL; > >+ void *area; > > int fd; > > uint64_t hpagesize; > >- uint64_t total; > > Error *local_err = NULL; > >- size_t offset; > > > > hpagesize = gethugepagesize(path, &local_err); > > if (local_err) { > >@@ -1238,7 +1238,6 @@ static void *file_ram_alloc(RAMBlock *block, > > g_free(filename); > > > > memory = ROUND_UP(memory, hpagesize); > >- total = memory + hpagesize; > > > > /* > > * ftruncate is not supported by hugetlbfs in older > >@@ -1250,40 +1249,14 @@ static void *file_ram_alloc(RAMBlock *block, > > perror("ftruncate"); > > } > > > >- ptr = mmap(0, total, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, > >- -1, 0); > >- if (ptr == MAP_FAILED) { > >- error_setg_errno(errp, errno, > >- "unable to allocate memory range for hugepages"); > >- close(fd); > >- goto error; > >- } > >- > >- offset = QEMU_ALIGN_UP((uintptr_t)ptr, hpagesize) - (uintptr_t)ptr; > >- > >- area = mmap(ptr + offset, memory, PROT_READ | PROT_WRITE, > >- (block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE) | > >- MAP_FIXED, > >- fd, 0); > >+ area = qemu_ram_mmap(fd, memory, hpagesize, block->flags & RAM_SHARED); > > if (area == MAP_FAILED) { > > error_setg_errno(errp, errno, > > "unable to map backing store for hugepages"); > >- munmap(ptr, total); > > close(fd); > > goto error; > > } > > > >- if (offset > 0) { > >- munmap(ptr, offset); > >- } > >- ptr += offset; > >- total -= offset; > >- > >- if (total > memory + getpagesize()) { > >- munmap(ptr + memory + getpagesize(), > >- total - memory - getpagesize()); > >- } > >- > > if (mem_prealloc) { > > os_mem_prealloc(fd, area, memory); > > } > >@@ -1601,7 +1574,7 @@ ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, > >MemoryRegion *mr, > > new_block->used_length = size; > > new_block->max_length = size; > > new_block->flags = share ? RAM_SHARED : 0; > >- new_block->flags |= RAM_EXTRA; > >+ new_block->flags |= RAM_FILE; > > new_block->host = file_ram_alloc(new_block, size, > > mem_path, errp); > > if (!new_block->host) { > >@@ -1703,8 +1676,8 @@ static void reclaim_ramblock(RAMBlock *block) > > xen_invalidate_map_cache_entry(block->host); > > #ifndef _WIN32 > > } else if (block->fd >= 0) { > >- if (block->flags & RAM_EXTRA) { > >- munmap(block->host, block->max_length + getpagesize()); > >+ if (block->flags & RAM_FILE) { > >+ qemu_ram_munmap(block->host, block->max_length); > > } else { > > munmap(block->host, block->max_length); > > } > >diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c > >new file mode 100644 > >index 0000000..e82cc94 > >--- /dev/null > >+++ b/util/mmap-alloc.c > >@@ -0,0 +1,71 @@ > >+/* > >+ * Support for RAM backed by mmaped host memory. > >+ * > >+ * Copyright (c) 2015 Red Hat, Inc. > >+ * > >+ * Authors: > >+ * Michael S. Tsirkin <m...@redhat.com> > >+ * > >+ * This work is licensed under the terms of the GNU GPL, version 2 or > >+ * later. See the COPYING file in the top-level directory. > >+ */ > >+#include <qemu/mmap-alloc.h> > >+#include <sys/types.h> > >+#include <sys/mman.h> > >+#include <assert.h> > >+ > >+void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared) > > Do we need generic "align", or would a bool hugepage suffice? Or I suppose > this is also used for non-anonymous hugepages via /dev/hugepage where we > have multiple huge page sizes?
Right. Further, passing in alignment makes it possible to keep page size probing in exec.c which is the only user. > Should we call this function something else, so that we can use it to > allocate hugepage aligned storage for code_gen_buffer and/or the tbs array? I don't much care what do we call it - with two callers it would be easy to rename down the road. The reason I put in "RAM" is because it allocates a guard page which only seems to make sense for RAM memory that can be mapped. But if you have a better name in mind, please let me know. > >+{ > >+ /* > >+ * Note: this always allocates at least one extra page of virtual > >address > >+ * space, even if size is already aligned. > >+ */ > >+ size_t total = size + align; > >+ void *ptr = mmap(0, total, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE, -1, > >0); > >+ size_t offset = QEMU_ALIGN_UP((uintptr_t)ptr, align) - (uintptr_t)ptr; > >+ void *ptr1; > >+ > >+ if (ptr == MAP_FAILED) { > >+ return NULL; > >+ } > >+ > >+ /* Make sure align is a power of 2 */ > >+ assert(!(align & (align - 1))); > >+ /* Always align to host page size */ > >+ assert(align >= getpagesize()); > > Is qemu_real_host_page_size initialized yet? > If not, should we move the initialization forward? I think it is. Though qemu_real_host_page_size seems to only be needed for portability, and this file is posix-specific. Anyway, we are moving code from util/oslib-posix.c and exec.c that currently both call getpagesize in this function, it seems better to be consistent. We can refactor afterwards, correct? > >+ > >+ ptr1 = mmap(ptr + offset, size, PROT_READ | PROT_WRITE, > >+ MAP_FIXED | > >+ (fd == -1 ? MAP_ANONYMOUS : 0) | > >+ (shared ? MAP_SHARED : MAP_PRIVATE), > >+ fd, 0); > > If fd == -1, we really only need mprotect on the region, correct? Yes. But that's already like this in util/oslib-posix.c, this is just re-factoring without logic changes. Any reason to prefer mprotect? That would be more LOC. > >+ if (ptr1 == MAP_FAILED) { > >+ munmap(ptr, total); > >+ return NULL; > >+ } > >+ > >+ ptr += offset; > >+ total -= offset; > >+ > >+ if (offset > 0) { > >+ munmap(ptr - offset, offset); > >+ } > >+ > >+ /* > >+ * Leave a single PROT_NONE page allocated after the RAM block, to > >serve as > >+ * a guard page guarding against potential buffer overflows. > >+ */ > >+ if (total > size + getpagesize()) { > >+ munmap(ptr + size + getpagesize(), total - size - getpagesize()); > >+ } > >+ > >+ return ptr; > >+}