Hi, First of all, sorry for taking more than a month to start reviewing this.
On Fri, Sep 06, 2019 at 07:11:43PM +0000, Moger, Babu wrote: > To support new epyc mode, we need to know the number of numa nodes > in advance to generate apic id correctly. [...] This explains that we need to initialize numa_info earlier than something... > [...] So, split the numa > initialization into two. The function parse_numa initializes numa_info > and updates nb_numa_nodes. And then parse_numa_node does the numa node > initialization. ...but I miss what "something" is. The sequence of events here will be: * parse_numa_opts() * for each -numa option: * parse_numa() * set_numa_options() * parse_numa_info() * here ms->numa_state->num_nodes is incremented [1] * parse_numa_node_opts() * for each -numa option: * parse_numa_node() * set_numa_node_options() * here are the operations that are being delayed by this patch [2] What exactly makes it necessary for [2] to happen after [1] is done for all NUMA nodes? This needs to be clear in the code, otherwise somebody will try to refactor this in the future and merge set_numa_node_options() and parse_numa_info() again, not knowing why ordering between [1] and [2] is so important. In addition to documenting it better, I suggest saving the CPU index list in NodeInfo, instead of calling qemu_opts_foreach() twice. (Probably a good idea to document the new field as internal, though. We don't want machine-specific code to be looking at the CPU index list.) Also, would it work if the delayed initialization is done at numa_complete_configuration() instead of a new parse_numa_node_opts() function? We already have 2 separate steps in NUMA initialization (parse_numa_node() and numa_complete_configuration()), so it would be nice to avoid adding a 3rd one. Putting all the suggestions together, the code would look like this: static void parse_numa_node(MachineState *ms, NumaNodeOptions *node, Error **errp) { /* ... */ numa_info[nodenr].cpu_indexes = QAPI_CLONE(node->cpus, uint16List); /* ... */ } void numa_complete_configuration(MachineState *ms) { /* ... */ for (i = 0; i < ms->numa_state->num_nodes; i++) { /* * numa_node_complete_configuration() needs to be called after all * nodes were already parsed, because <insert reason here>, */ numa_node_complete_configuration(numa_info[i]); } } void numa_node_complete_configuration(MachineState *ms, NodeInfo *node) { for (cpu_index = node->cpu_indexes; cpu_index; cpu_index = cpu_index->next) { CpuInstanceProperties props; props = mc->cpu_index_to_instance_props(ms, cpu_index->value); props.node_id = nodenr; props.has_node_id = true; machine_set_cpu_numa_node(ms, &props, &err); } } > > Signed-off-by: Babu Moger <babu.mo...@amd.com> > --- > hw/core/numa.c | 106 > +++++++++++++++++++++++++++++++++++-------------- > include/sysemu/numa.h | 2 + > vl.c | 2 + > 3 files changed, 80 insertions(+), 30 deletions(-) > > diff --git a/hw/core/numa.c b/hw/core/numa.c > index a11431483c..27fa6b5e1d 100644 > --- a/hw/core/numa.c > +++ b/hw/core/numa.c > @@ -55,14 +55,10 @@ bool have_numa_distance; > NodeInfo numa_info[MAX_NODES]; > > > -static void parse_numa_node(MachineState *ms, NumaNodeOptions *node, > +static void parse_numa_info(MachineState *ms, NumaNodeOptions *node, > Error **errp) > { > - Error *err = NULL; > uint16_t nodenr; > - uint16List *cpus = NULL; > - MachineClass *mc = MACHINE_GET_CLASS(ms); > - unsigned int max_cpus = ms->smp.max_cpus; > > if (node->has_nodeid) { > nodenr = node->nodeid; > @@ -81,29 +77,6 @@ static void parse_numa_node(MachineState *ms, > NumaNodeOptions *node, > return; > } > > - if (!mc->cpu_index_to_instance_props || !mc->get_default_cpu_node_id) { > - error_setg(errp, "NUMA is not supported by this machine-type"); > - return; > - } > - for (cpus = node->cpus; cpus; cpus = cpus->next) { > - CpuInstanceProperties props; > - if (cpus->value >= max_cpus) { > - error_setg(errp, > - "CPU index (%" PRIu16 ")" > - " should be smaller than maxcpus (%d)", > - cpus->value, max_cpus); > - return; > - } > - props = mc->cpu_index_to_instance_props(ms, cpus->value); > - props.node_id = nodenr; > - props.has_node_id = true; > - machine_set_cpu_numa_node(ms, &props, &err); > - if (err) { > - error_propagate(errp, err); > - return; > - } > - } > - > have_memdevs = have_memdevs ? : node->has_memdev; > have_mem = have_mem ? : node->has_mem; > if ((node->has_mem && have_memdevs) || (node->has_memdev && have_mem)) { > @@ -177,7 +150,7 @@ void set_numa_options(MachineState *ms, NumaOptions > *object, Error **errp) > > switch (object->type) { > case NUMA_OPTIONS_TYPE_NODE: > - parse_numa_node(ms, &object->u.node, &err); > + parse_numa_info(ms, &object->u.node, &err); > if (err) { > goto end; > } > @@ -242,6 +215,73 @@ end: > return 0; > } > > +void set_numa_node_options(MachineState *ms, NumaOptions *object, Error > **errp) > +{ > + MachineClass *mc = MACHINE_GET_CLASS(ms); > + NumaNodeOptions *node = &object->u.node; > + unsigned int max_cpus = ms->smp.max_cpus; > + uint16List *cpus = NULL; > + Error *err = NULL; > + uint16_t nodenr; > + > + if (node->has_nodeid) { > + nodenr = node->nodeid; > + } else { > + error_setg(errp, "NUMA node information is not available"); > + } > + > + if (!mc->cpu_index_to_instance_props || !mc->get_default_cpu_node_id) { > + error_setg(errp, "NUMA is not supported by this machine-type"); > + return; > + } > + > + for (cpus = node->cpus; cpus; cpus = cpus->next) { > + CpuInstanceProperties props; > + if (cpus->value >= max_cpus) { > + error_setg(errp, > + "CPU index (%" PRIu16 ")" > + " should be smaller than maxcpus (%d)", > + cpus->value, max_cpus); > + return; > + } > + props = mc->cpu_index_to_instance_props(ms, cpus->value); > + props.node_id = nodenr; > + props.has_node_id = true; > + machine_set_cpu_numa_node(ms, &props, &err); > + if (err) { > + error_propagate(errp, err); > + return; > + } > + } > +} > + > +static int parse_numa_node(void *opaque, QemuOpts *opts, Error **errp) > +{ > + NumaOptions *object = NULL; > + MachineState *ms = MACHINE(opaque); > + Error *err = NULL; > + Visitor *v = opts_visitor_new(opts); > + > + visit_type_NumaOptions(v, NULL, &object, &err); > + visit_free(v); > + if (err) { > + goto end; > + } > + > + if (object->type == NUMA_OPTIONS_TYPE_NODE) { > + set_numa_node_options(ms, object, &err); > + } > + > +end: > + qapi_free_NumaOptions(object); > + if (err) { > + error_propagate(errp, err); > + return -1; > + } > + > + return 0; > +} > + > /* If all node pair distances are symmetric, then only distances > * in one direction are enough. If there is even one asymmetric > * pair, though, then all distances must be provided. The > @@ -368,7 +408,7 @@ void numa_complete_configuration(MachineState *ms) > if (ms->ram_slots > 0 && nb_numa_nodes == 0 && > mc->auto_enable_numa_with_memhp) { > NumaNodeOptions node = { }; > - parse_numa_node(ms, &node, &error_abort); > + parse_numa_info(ms, &node, &error_abort); > } > > assert(max_numa_nodeid <= MAX_NODES); > @@ -448,6 +488,12 @@ void parse_numa_opts(MachineState *ms) > qemu_opts_foreach(qemu_find_opts("numa"), parse_numa, ms, &error_fatal); > } > > +void parse_numa_node_opts(MachineState *ms) > +{ > + qemu_opts_foreach(qemu_find_opts("numa"), parse_numa_node, > + ms, &error_fatal); > +} > + > void numa_cpu_pre_plug(const CPUArchId *slot, DeviceState *dev, Error **errp) > { > int node_id = object_property_get_int(OBJECT(dev), "node-id", > &error_abort); > diff --git a/include/sysemu/numa.h b/include/sysemu/numa.h > index 01a263eba2..ca109adaa6 100644 > --- a/include/sysemu/numa.h > +++ b/include/sysemu/numa.h > @@ -24,7 +24,9 @@ struct NumaNodeMem { > extern NodeInfo numa_info[MAX_NODES]; > > void set_numa_options(MachineState *ms, NumaOptions *object, Error **errp); > +void set_numa_node_options(MachineState *ms, NumaOptions *object, Error > **errp); > void parse_numa_opts(MachineState *ms); > +void parse_numa_node_opts(MachineState *ms); > void numa_complete_configuration(MachineState *ms); > void query_numa_node_mem(NumaNodeMem node_mem[]); > extern QemuOptsList qemu_numa_opts; > diff --git a/vl.c b/vl.c > index b426b32134..711d2ae5da 100644 > --- a/vl.c > +++ b/vl.c > @@ -4339,6 +4339,8 @@ int main(int argc, char **argv, char **envp) > } > parse_numa_opts(current_machine); > > + parse_numa_node_opts(current_machine); > + > /* do monitor/qmp handling at preconfig state if requested */ > main_loop(); > > -- Eduardo