On 01.11.24 14:47, Steve Sistare wrote:
Allocate anonymous memory using mmap MAP_ANON or memfd_create depending
on the value of the anon-alloc machine property. This option applies to
memory allocated as a side effect of creating various devices. It does
not apply to memory-backend-objects, whether explicitly specified on
the command line, or implicitly created by the -m command line option.
The memfd option is intended to support new migration modes, in which the
memory region can be transferred in place to a new QEMU process, by sending
the memfd file descriptor to the process. Memory contents are preserved,
and if the mode also transfers device descriptors, then pages that are
locked in memory for DMA remain locked. This behavior is a pre-requisite
for supporting vfio, vdpa, and iommufd devices with the new modes.
A more portable, non-Linux specific variant of this will be using shm,
similar to backends/hostmem-shm.c.
Likely we should be using that instead of memfd, or try hiding the
details. See below.
[...]
@@ -69,6 +70,8 @@
#include "qemu/pmem.h"
+#include "qapi/qapi-types-migration.h"
+#include "migration/options.h"
#include "migration/vmstate.h"
#include "qemu/range.h"
@@ -1849,6 +1852,35 @@ static void ram_block_add(RAMBlock *new_block, Error
**errp)
qemu_mutex_unlock_ramlist();
return;
}
+
+ } else if (current_machine->anon_alloc == ANON_ALLOC_OPTION_MEMFD &&
+ !object_dynamic_cast(new_block->mr->parent_obj.parent,
+ TYPE_MEMORY_BACKEND)) {
This looks a bit and hackish, and I don't think ram_block_add() is the right
place where this should be. It should likely happen in the caller.
We already do have two ways of allocating "shared anonymous memory":
(1) memory-backend-ram,share=on
(2) memory-backend-shm
(2) gives us an fd as it uses shm_open(), (1) doesn't give us an fd as it
uses MAP_ANON|MAP_SHARED. (1) is really only a corner case use case [1].
[there is also Linux specific memfd, which gives us more flexibility with
hugetlb etc, but for the purpose here shm should likely be sufficient?]
So why not make (1) behave like (2) and move that handling into
qemu_ram_alloc_internal(), from where we can easily enable it using a
new RMA_SHARED flag? So as a first step, something like:
From 4b7b760c6e54cf05addca6728edc19adbec1588a Mon Sep 17 00:00:00 2001
From: David Hildenbrand <da...@redhat.com>
Date: Mon, 4 Nov 2024 11:29:22 +0100
Subject: [PATCH] tmp
Signed-off-by: David Hildenbrand <da...@redhat.com>
---
backends/hostmem-shm.c | 56 ++++----------------------------
system/physmem.c | 73 ++++++++++++++++++++++++++++++++++++++++--
2 files changed, 76 insertions(+), 53 deletions(-)
diff --git a/backends/hostmem-shm.c b/backends/hostmem-shm.c
index 374edc3db8..0f33b35e9c 100644
--- a/backends/hostmem-shm.c
+++ b/backends/hostmem-shm.c
@@ -25,11 +25,8 @@ struct HostMemoryBackendShm {
static bool
shm_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
{
- g_autoptr(GString) shm_name = g_string_new(NULL);
- g_autofree char *backend_name = NULL;
+ g_autofree char *name = NULL;
uint32_t ram_flags;
- int fd, oflag;
- mode_t mode;
if (!backend->size) {
error_setg(errp, "can't create shm backend with size 0");
@@ -41,54 +38,13 @@ shm_backend_memory_alloc(HostMemoryBackend *backend, Error
**errp)
return false;
}
- /*
- * Let's use `mode = 0` because we don't want other processes to open our
- * memory unless we share the file descriptor with them.
- */
- mode = 0;
- oflag = O_RDWR | O_CREAT | O_EXCL;
- backend_name = host_memory_backend_get_name(backend);
-
- /*
- * Some operating systems allow creating anonymous POSIX shared memory
- * objects (e.g. FreeBSD provides the SHM_ANON constant), but this is not
- * defined by POSIX, so let's create a unique name.
- *
- * From Linux's shm_open(3) man-page:
- * For portable use, a shared memory object should be identified
- * by a name of the form /somename;"
- */
- g_string_printf(shm_name, "/qemu-" FMT_pid "-shm-%s", getpid(),
- backend_name);
-
- fd = shm_open(shm_name->str, oflag, mode);
- if (fd < 0) {
- error_setg_errno(errp, errno,
- "failed to create POSIX shared memory");
- return false;
- }
-
- /*
- * We have the file descriptor, so we no longer need to expose the
- * POSIX shared memory object. However it will remain allocated as long as
- * there are file descriptors pointing to it.
- */
- shm_unlink(shm_name->str);
-
- if (ftruncate(fd, backend->size) == -1) {
- error_setg_errno(errp, errno,
- "failed to resize POSIX shared memory to %" PRIu64,
- backend->size);
- close(fd);
- return false;
- }
-
+ /* Let's do the same as memory-backend-ram,share=on would do. */
+ name = host_memory_backend_get_name(backend);
ram_flags = RAM_SHARED;
ram_flags |= backend->reserve ? 0 : RAM_NORESERVE;
-
- return memory_region_init_ram_from_fd(&backend->mr, OBJECT(backend),
- backend_name, backend->size,
- ram_flags, fd, 0, errp);
+ return memory_region_init_ram_flags_nomigrate(&backend->mr,
OBJECT(backend),
+ name, backend->size,
+ ram_flags, errp);
}
static void
diff --git a/system/physmem.c b/system/physmem.c
index dc1db3a384..4d331b3828 100644
--- a/system/physmem.c
+++ b/system/physmem.c
@@ -2057,6 +2057,59 @@ RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size,
MemoryRegion *mr,
}
#endif
+static int qemu_shm_alloc(size_t size, Error **errp)
+{
+ g_autoptr(GString) shm_name = g_string_new(NULL);
+ int fd, oflag, cur_sequence;
+ static int sequence;
+ mode_t mode;
+
+ cur_sequence = qatomic_fetch_inc(&sequence);
+
+ /*
+ * Let's use `mode = 0` because we don't want other processes to open our
+ * memory unless we share the file descriptor with them.
+ */
+ mode = 0;
+ oflag = O_RDWR | O_CREAT | O_EXCL;
+
+ /*
+ * Some operating systems allow creating anonymous POSIX shared memory
+ * objects (e.g. FreeBSD provides the SHM_ANON constant), but this is not
+ * defined by POSIX, so let's create a unique name.
+ *
+ * From Linux's shm_open(3) man-page:
+ * For portable use, a shared memory object should be identified
+ * by a name of the form /somename;"
+ */
+ g_string_printf(shm_name, "/qemu-" FMT_pid "-shm-%d", getpid(),
+ cur_sequence);
+
+ fd = shm_open(shm_name->str, oflag, mode);
+ if (fd < 0) {
+ error_setg_errno(errp, errno,
+ "failed to create POSIX shared memory");
+ return false;
+ }
+
+ /*
+ * We have the file descriptor, so we no longer need to expose the
+ * POSIX shared memory object. However it will remain allocated as long as
+ * there are file descriptors pointing to it.
+ */
+ shm_unlink(shm_name->str);
+
+ if (ftruncate(fd, size) == -1) {
+ error_setg_errno(errp, errno,
+ "failed to resize POSIX shared memory to %" PRIu64,
+ size);
+ close(fd);
+ return false;
+ }
+
+ return fd;
+}
+
static
RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
void (*resized)(const char*,
@@ -2084,12 +2137,26 @@ RAMBlock *qemu_ram_alloc_internal(ram_addr_t size,
ram_addr_t max_size,
new_block->used_length = size;
new_block->max_length = max_size;
assert(max_size >= size);
- new_block->fd = -1;
+
new_block->guest_memfd = -1;
new_block->page_size = qemu_real_host_page_size();
- new_block->host = host;
new_block->flags = ram_flags;
- ram_block_add(new_block, &local_err);
+ new_block->host = host;
+
+ if ((ram_flags & RAM_PREALLOC) || !(ram_flags & RAM_SHARED)) {
+ new_block->fd = -1;
+ } else {
+ /*
+ * We want anonymous shared memory, similar to MAP_SHARED|MAP_ANON; but
+ * some users want the fd. So let's allocate shm explicitly, which will
+ * give us the fd.
+ */
+ assert(!host);
+ new_block->fd = qemu_shm_alloc(new_block->max_length, &local_err);
+ }
+ if (!local_err) {
+ ram_block_add(new_block, &local_err);
+ }
if (local_err) {
g_free(new_block);
error_propagate(errp, local_err);
--
2.47.0
Then, you only need a machine option to say "anon-shared", to make all
anonymous memory sharable between processes. All it would do is setting
the RAM_SHARED flag in qemu_ram_alloc_internal() when reasonable
(!(ram_flags & RAM_PREALLOC)).
To handle "memory-backend-ram,share=off", can we find a way to bail out if
memory-backend-ram,share=off was used while the machine option "anon-shared"
would be active? Or just document that the "anon-shared" will win?
Alternatives might be a RAM_PFORCE_PRIVATE flag, set by the memory backend.
With above change, we could drop the "bool share" flag from,
qemu_anon_ram_alloc(), as it would be unused.
[1]
https://patchwork.kernel.org/project/qemu-devel/patch/20180201205511.19198-2-mar...@redhat.com/
--
Cheers,
David / dhildenb