From: Paolo Bonzini <pbonz...@redhat.com> This option provides the infrastructure for binding guest NUMA nodes to host NUMA nodes. For example:
-object memory-ram,size=1024M,policy=membind,host-nodes=0,id=ram-node0 \ -numa node,nodeid=0,cpus=0,memdev=ram-node0 \ -object memory-ram,size=1024M,policy=interleave,host-nodes=1-3,id=ram-node1 \ -numa node,nodeid=1,cpus=1,memdev=ram-node1 The option replaces "-numa mem". Signed-off-by: Paolo Bonzini <pbonz...@redhat.com> Signed-off-by: Hu Tao <hu...@cn.fujitsu.com> [don't include sysemu/hostmem.h in sysemu/sysemu.h to avoid mutual inclusion.] --- include/sysemu/sysemu.h | 3 +++ numa.c | 66 +++++++++++++++++++++++++++++++++++++++++++++++-- qapi-schema.json | 6 ++++- 3 files changed, 72 insertions(+), 3 deletions(-) diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h index e9da760..40f1df9 100644 --- a/include/sysemu/sysemu.h +++ b/include/sysemu/sysemu.h @@ -137,9 +137,12 @@ extern QEMUClockType rtc_clock; #define MAX_NODES 128 #define MAX_CPUMASK_BITS 255 extern int nb_numa_nodes; +struct HostMemoryBackend; +typedef struct HostMemoryBackend HostMemoryBackend; typedef struct node_info { uint64_t node_mem; DECLARE_BITMAP(node_cpu, MAX_CPUMASK_BITS); + HostMemoryBackend *node_memdev; } NodeInfo; extern NodeInfo numa_info[MAX_NODES]; void set_numa_nodes(void); diff --git a/numa.c b/numa.c index 6441d14..8d05078 100644 --- a/numa.c +++ b/numa.c @@ -24,9 +24,12 @@ */ #include "sysemu/sysemu.h" +#include "sysemu/hostmem.h" #include "qapi-visit.h" #include "qapi/opts-visitor.h" #include "qapi/dealloc-visitor.h" +#include "qapi/qmp/qerror.h" + QemuOptsList qemu_numa_opts = { .name = "numa", .implied_opt_name = "type", @@ -34,10 +37,13 @@ QemuOptsList qemu_numa_opts = { .desc = { { 0 } } /* validated with OptsVisitor */ }; +static int have_memdevs = -1; + static int numa_node_parse(NumaNodeOptions *node, QemuOpts *opts) { uint16_t nodenr; uint16List *cpus = NULL; + Error *local_err = NULL; if (node->has_nodeid) { nodenr = node->nodeid; @@ -60,6 +66,20 @@ static int numa_node_parse(NumaNodeOptions *node, QemuOpts *opts) bitmap_set(numa_info[nodenr].node_cpu, cpus->value, 1); } + if (node->has_mem && node->has_memdev) { + fprintf(stderr, "qemu: cannot specify both mem= and memdev=\n"); + return -1; + } + + if (have_memdevs == -1) { + have_memdevs = node->has_memdev; + } + + if (node->has_memdev != have_memdevs) { + fprintf(stderr, "qemu: memdev option must be specified for either " + "all or no nodes\n"); + } + if (node->has_mem) { uint64_t mem_size = node->mem; const char *mem_str = qemu_opt_get(opts, "mem"); @@ -69,7 +89,19 @@ static int numa_node_parse(NumaNodeOptions *node, QemuOpts *opts) } numa_info[nodenr].node_mem = mem_size; } + if (node->has_memdev) { + Object *o; + o = object_resolve_path_type(node->memdev, TYPE_MEMORY_BACKEND, NULL); + if (!o) { + error_setg(&local_err, "memdev=%s is ambiguous", node->memdev); + qerror_report_err(local_err); + return -1; + } + object_ref(o); + numa_info[nodenr].node_mem = object_property_get_int(o, "size", NULL); + numa_info[nodenr].node_memdev = MEMORY_BACKEND(o); + } return 0; } @@ -188,12 +220,42 @@ void set_numa_modes(void) } } +static void allocate_system_memory_nonnuma(MemoryRegion *mr, Object *owner, + const char *name, + QEMUMachineInitArgs *args) +{ + uint64_t ram_size = args->ram_size; + + memory_region_init_ram(mr, owner, name, ram_size); + vmstate_register_ram_global(mr); +} + void memory_region_allocate_system_memory(MemoryRegion *mr, Object *owner, const char *name, QEMUMachineInitArgs *args) { uint64_t ram_size = args->ram_size; + uint64_t addr = 0; + int i; - memory_region_init_ram(mr, owner, name, ram_size); - vmstate_register_ram_global(mr); + if (nb_numa_nodes == 0 || !have_memdevs) { + allocate_system_memory_nonnuma(mr, owner, name, args); + return; + } + + memory_region_init(mr, owner, name, ram_size); + for (i = 0; i < nb_numa_nodes; i++) { + Error *local_err = NULL; + uint64_t size = numa_info[i].node_mem; + HostMemoryBackend *backend = numa_info[i].node_memdev; + MemoryRegion *seg = host_memory_backend_get_memory(backend, &local_err); + if (local_err) { + qerror_report_err(local_err); + exit(1); + } + + memory_region_add_subregion(mr, addr, seg); + vmstate_register_ram_global(seg); + addr += size; + } } diff --git a/qapi-schema.json b/qapi-schema.json index 951cc0a..62e0b83 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -4542,6 +4542,9 @@ # # @mem: #optional memory size of this node (equally divide total memory among # nodes if omitted) +# @memdev: #optional memory backend object. If specified for one node, +# it must be specified for all nodes. +# # # Since: 2.1 ## @@ -4549,4 +4552,5 @@ 'data': { '*nodeid': 'uint16', '*cpus': ['uint16'], - '*mem': 'size' }} + '*mem': 'size', + '*memdev': 'str' }} -- 1.8.5.2.229.g4448466