The memory policy setting format is like: {membind|interleave|preferred}=[+|!]{all|N-N} And we are adding this setting as a suboption of "-numa", the memory policy then can be set like following: -numa node,nodeid=0,mem=1024,cpus=0,membind=0-1 -numa node,nodeid=1,mem=1024,cpus=1,interleave=1
Signed-off-by: Andre Przywara <andre.przyw...@amd.com> Signed-off-by: Wanlong Gao <gaowanl...@cn.fujitsu.com> --- include/sysemu/sysemu.h | 8 ++++++ vl.c | 76 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 84 insertions(+) diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h index 70fd2ed..993b8e0 100644 --- a/include/sysemu/sysemu.h +++ b/include/sysemu/sysemu.h @@ -130,10 +130,18 @@ extern QEMUClock *rtc_clock; #define MAX_NODES 64 #define MAX_CPUMASK_BITS 255 +#define NODE_HOST_NONE 0x00 +#define NODE_HOST_BIND 0x01 +#define NODE_HOST_INTERLEAVE 0x02 +#define NODE_HOST_PREFERRED 0x03 +#define NODE_HOST_POLICY_MASK 0x03 +#define NODE_HOST_RELATIVE 0x04 extern int nb_numa_nodes; struct node_info { uint64_t node_mem; DECLARE_BITMAP(node_cpu, MAX_CPUMASK_BITS); + DECLARE_BITMAP(host_mem, MAX_CPUMASK_BITS); + unsigned int flags; }; extern struct node_info numa_info[MAX_NODES]; diff --git a/vl.c b/vl.c index 42dec5e..ada9fb2 100644 --- a/vl.c +++ b/vl.c @@ -1348,11 +1348,68 @@ error: exit(1); } +static unsigned int numa_node_parse_mpol(const char *str, unsigned long *bm) +{ + unsigned long long value, endvalue; + char *endptr; + unsigned int flags = 0; + + if (str[0] == '!') { + flags |= 2; + bitmap_fill(bm, MAX_CPUMASK_BITS); + str++; + } + if (str[0] == '+') { + flags |= 1; + str++; + } + + if (!strcmp(str, "all")) { + bitmap_fill(bm, MAX_CPUMASK_BITS); + return flags; + } + + if (parse_uint(str, &value, &endptr, 10) < 0) + goto error; + if (*endptr == '-') { + if (parse_uint_full(endptr + 1, &endvalue, 10) < 0) { + goto error; + } + } else if (*endptr == '\0') { + endvalue = value; + } else { + goto error; + } + + if (endvalue >= MAX_CPUMASK_BITS) { + endvalue = MAX_CPUMASK_BITS - 1; + fprintf(stderr, + "qemu: NUMA: A max of %d host nodes are supported\n", + MAX_CPUMASK_BITS); + } + + if (endvalue < value) { + goto error; + } + + if (flags & 2) + bitmap_clear(bm, value, endvalue - value + 1); + else + bitmap_set(bm, value, endvalue - value + 1); + + return flags; + +error: + fprintf(stderr, "qemu: Invalid host NUMA nodes range: %s\n", str); + return 4; +} + static void numa_add(const char *optarg) { char option[128]; char *endptr; unsigned long long nodenr; + unsigned int ret; optarg = get_opt_name(option, 128, optarg, ','); if (*optarg == ',') { @@ -1393,6 +1450,23 @@ static void numa_add(const char *optarg) if (get_param_value(option, 128, "cpus", optarg) != 0) { numa_node_parse_cpus(nodenr, option); } + + option[0] = 0; + if (get_param_value(option, 128, "interleave", optarg) != 0) + numa_info[nodenr].flags |= NODE_HOST_INTERLEAVE; + else if (get_param_value(option, 128, "preferred", optarg) != 0) + numa_info[nodenr].flags |= NODE_HOST_PREFERRED; + else if (get_param_value(option, 128, "membind", optarg) != 0) + numa_info[nodenr].flags |= NODE_HOST_BIND; + if (option[0] != 0) { + ret = numa_node_parse_mpol(option, numa_info[nodenr].host_mem); + if (ret == 4) { + exit(1); + } else if (ret & 1) { + numa_info[nodenr].flags |= NODE_HOST_RELATIVE; + } + } + nb_numa_nodes++; } else { fprintf(stderr, "Invalid -numa option: %s\n", option); @@ -2922,6 +2996,8 @@ int main(int argc, char **argv, char **envp) for (i = 0; i < MAX_NODES; i++) { numa_info[i].node_mem = 0; bitmap_zero(numa_info[i].node_cpu, MAX_CPUMASK_BITS); + bitmap_zero(numa_info[i].host_mem, MAX_CPUMASK_BITS); + numa_info[i].flags = NODE_HOST_NONE; } nb_numa_nodes = 0; -- 1.8.3.rc2.10.g0c2b1cf