The memory policy setting format is like: mem-policy={membind|interleave|preferred},mem-hostnode=[+|!]{all|N-N} And we are adding this setting as a suboption of "-numa", the memory policy then can be set like following: -numa node,nodeid=0,mem=1024,cpus=0,mem-policy=membind,mem-hostnode=0-1 -numa node,nodeid=1,mem=1024,cpus=1,mem-policy=interleave,mem-hostnode=!1
Reviewed-by: Bandan Das <b...@redhat.com> Signed-off-by: Andre Przywara <andre.przyw...@amd.com> Signed-off-by: Wanlong Gao <gaowanl...@cn.fujitsu.com> --- include/sysemu/sysemu.h | 8 ++++ vl.c | 110 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 118 insertions(+) diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h index 70fd2ed..993b8e0 100644 --- a/include/sysemu/sysemu.h +++ b/include/sysemu/sysemu.h @@ -130,10 +130,18 @@ extern QEMUClock *rtc_clock; #define MAX_NODES 64 #define MAX_CPUMASK_BITS 255 +#define NODE_HOST_NONE 0x00 +#define NODE_HOST_BIND 0x01 +#define NODE_HOST_INTERLEAVE 0x02 +#define NODE_HOST_PREFERRED 0x03 +#define NODE_HOST_POLICY_MASK 0x03 +#define NODE_HOST_RELATIVE 0x04 extern int nb_numa_nodes; struct node_info { uint64_t node_mem; DECLARE_BITMAP(node_cpu, MAX_CPUMASK_BITS); + DECLARE_BITMAP(host_mem, MAX_CPUMASK_BITS); + unsigned int flags; }; extern struct node_info numa_info[MAX_NODES]; diff --git a/vl.c b/vl.c index 5207b8e..495b3a8 100644 --- a/vl.c +++ b/vl.c @@ -536,6 +536,14 @@ static QemuOptsList qemu_numa_opts = { .name = "cpus", .type = QEMU_OPT_STRING, .help = "cpu number or range" + },{ + .name = "mem-policy", + .type = QEMU_OPT_STRING, + .help = "memory policy" + },{ + .name = "mem-hostnode", + .type = QEMU_OPT_STRING, + .help = "host node number or range for memory policy" }, { /* end of list */ } }, @@ -1374,6 +1382,79 @@ error: exit(1); } +static void numa_node_parse_mpol(int nodenr, const char *mpol) +{ + if (!mpol) { + return; + } + + if (!strcmp(mpol, "interleave")) { + numa_info[nodenr].flags |= NODE_HOST_INTERLEAVE; + } else if (!strcmp(mpol, "preferred")) { + numa_info[nodenr].flags |= NODE_HOST_PREFERRED; + } else if (!strcmp(mpol, "membind")) { + numa_info[nodenr].flags |= NODE_HOST_BIND; + } else { + fprintf(stderr, "qemu: Invalid memory policy: %s\n", mpol); + } +} + +static void numa_node_parse_hostnode(int nodenr, const char *hostnode) +{ + unsigned long long value, endvalue; + char *endptr; + bool clear = false; + unsigned long *bm = numa_info[nodenr].host_mem; + + if (hostnode[0] == '!') { + clear = true; + bitmap_fill(bm, MAX_CPUMASK_BITS); + hostnode++; + } + if (hostnode[0] == '+') { + numa_info[nodenr].flags |= NODE_HOST_RELATIVE; + hostnode++; + } + + if (!strcmp(hostnode, "all")) { + bitmap_fill(bm, MAX_CPUMASK_BITS); + return; + } + + if (parse_uint(hostnode, &value, &endptr, 10) < 0) + goto error; + if (*endptr == '-') { + if (parse_uint_full(endptr + 1, &endvalue, 10) < 0) { + goto error; + } + } else if (*endptr == '\0') { + endvalue = value; + } else { + goto error; + } + + if (endvalue >= MAX_CPUMASK_BITS) { + endvalue = MAX_CPUMASK_BITS - 1; + fprintf(stderr, + "qemu: NUMA: A max of %d host nodes are supported\n", + MAX_CPUMASK_BITS); + } + + if (endvalue < value) { + goto error; + } + + if (clear) + bitmap_clear(bm, value, endvalue - value + 1); + else + bitmap_set(bm, value, endvalue - value + 1); + + return; + +error: + fprintf(stderr, "qemu: Invalid host NUMA nodes range: %s\n", hostnode); + return; +} static int numa_add_cpus(const char *name, const char *value, void *opaque) { @@ -1385,6 +1466,25 @@ static int numa_add_cpus(const char *name, const char *value, void *opaque) return 0; } +static int numa_add_mpol(const char *name, const char *value, void *opaque) +{ + int *nodenr = opaque; + + if (!strcmp(name, "mem-policy")) { + numa_node_parse_mpol(*nodenr, value); + } + return 0; +} + +static int numa_add_hostnode(const char *name, const char *value, void *opaque) +{ + int *nodenr = opaque; + if (!strcmp(name, "mem-hostnode")) { + numa_node_parse_hostnode(*nodenr, value); + } + return 0; +} + static int numa_init_func(QemuOpts *opts, void *opaque) { uint64_t nodenr, mem_size; @@ -1404,6 +1504,14 @@ static int numa_init_func(QemuOpts *opts, void *opaque) return -1; } + if (qemu_opt_foreach(opts, numa_add_mpol, &nodenr, 1) < 0) { + return -1; + } + + if (qemu_opt_foreach(opts, numa_add_hostnode, &nodenr, 1) < 0) { + return -1; + } + return 0; } @@ -2962,6 +3070,8 @@ int main(int argc, char **argv, char **envp) for (i = 0; i < MAX_NODES; i++) { numa_info[i].node_mem = 0; bitmap_zero(numa_info[i].node_cpu, MAX_CPUMASK_BITS); + bitmap_zero(numa_info[i].host_mem, MAX_CPUMASK_BITS); + numa_info[i].flags = NODE_HOST_NONE; } nb_numa_nodes = 0; -- 1.8.3.2.634.g7a3187e