On Mon, Mar 07, 2016 at 02:49:06PM +1100, David Gibson wrote: > On Fri, Mar 04, 2016 at 12:24:19PM +0530, Bharata B Rao wrote: > > Set up device tree entries for the hotplugged CPU core and use the > > exising EPOW event infrastructure to send CPU hotplug notification to > > the guest. > > > > Signed-off-by: Bharata B Rao <bhar...@linux.vnet.ibm.com> > > --- > > hw/ppc/spapr.c | 73 > > ++++++++++++++++++++++++++++++++++++++++- > > hw/ppc/spapr_cpu_core.c | 60 +++++++++++++++++++++++++++++++++ > > hw/ppc/spapr_events.c | 3 ++ > > hw/ppc/spapr_rtas.c | 24 ++++++++++++++ > > include/hw/ppc/spapr.h | 4 +++ > > include/hw/ppc/spapr_cpu_core.h | 2 ++ > > 6 files changed, 165 insertions(+), 1 deletion(-) > > > > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c > > index 5acb612..6c4ac50 100644 > > --- a/hw/ppc/spapr.c > > +++ b/hw/ppc/spapr.c > > @@ -603,6 +603,18 @@ static void spapr_populate_cpu_dt(CPUState *cs, void > > *fdt, int offset, > > size_t page_sizes_prop_size; > > uint32_t vcpus_per_socket = smp_threads * smp_cores; > > uint32_t pft_size_prop[] = {0, cpu_to_be32(spapr->htab_shift)}; > > + sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(qdev_get_machine()); > > + sPAPRDRConnector *drc; > > + sPAPRDRConnectorClass *drck; > > + int drc_index; > > + > > + if (smc->dr_cpu_enabled) { > > + drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_CPU, index); > > + g_assert(drc); > > + drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); > > + drc_index = drck->get_index(drc); > > + _FDT((fdt_setprop_cell(fdt, offset, "ibm,my-drc-index", > > drc_index))); > > + } > > > > /* Note: we keep CI large pages off for now because a 64K capable guest > > * provisioned with large pages might otherwise try to map a qemu > > @@ -987,6 +999,16 @@ static void spapr_finalize_fdt(sPAPRMachineState > > *spapr, > > _FDT(spapr_drc_populate_dt(fdt, 0, NULL, > > SPAPR_DR_CONNECTOR_TYPE_LMB)); > > } > > > > + if (smc->dr_cpu_enabled) { > > + int offset = fdt_path_offset(fdt, "/cpus"); > > + ret = spapr_drc_populate_dt(fdt, offset, NULL, > > + SPAPR_DR_CONNECTOR_TYPE_CPU); > > + if (ret < 0) { > > + error_report("Couldn't set up CPU DR device tree properties"); > > + exit(1); > > + } > > + } > > + > > _FDT((fdt_pack(fdt))); > > > > if (fdt_totalsize(fdt) > FDT_MAX_SIZE) { > > @@ -1181,7 +1203,7 @@ static void ppc_spapr_reset(void) > > > > } > > > > -static void spapr_cpu_reset(void *opaque) > > +void spapr_cpu_reset(void *opaque) > > { > > sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); > > PowerPCCPU *cpu = opaque; > > @@ -1622,6 +1644,8 @@ static void spapr_boot_set(void *opaque, const char > > *boot_device, > > void spapr_cpu_init(sPAPRMachineState *spapr, PowerPCCPU *cpu, Error > > **errp) > > { > > CPUPPCState *env = &cpu->env; > > + CPUState *cs = CPU(cpu); > > + int i; > > > > /* Set time-base frequency to 512 MHz */ > > cpu_ppc_tb_init(env, TIMEBASE_FREQ); > > @@ -1646,6 +1670,14 @@ void spapr_cpu_init(sPAPRMachineState *spapr, > > PowerPCCPU *cpu, Error **errp) > > } > > } > > > > + /* Set NUMA node for the added CPUs */ > > + for (i = 0; i < nb_numa_nodes; i++) { > > + if (test_bit(cs->cpu_index, numa_info[i].node_cpu)) { > > + cs->numa_node = i; > > + break; > > + } > > + } > > + > > This hunk seems like it belongs in a different patch.
It appears that this would be needed by other archs also to set the NUMA node for the hot-plugged CPU. How about make an API out of this and use this something like below ? Igor ? ------------------------------------------------------------------- diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 0aeefd2..8347234 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -1112,6 +1112,7 @@ void pc_hot_add_cpu(const int64_t id, Error **errp) error_propagate(errp, local_err); return; } + numa_set_cpu(CPU(cpu)); object_unref(OBJECT(cpu)); } diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index a42f8c0..f2b3b67 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -1645,7 +1645,6 @@ void spapr_cpu_init(sPAPRMachineState *spapr, PowerPCCPU *cpu, Error **errp) { CPUPPCState *env = &cpu->env; CPUState *cs = CPU(cpu); - int i; /* Set time-base frequency to 512 MHz */ cpu_ppc_tb_init(env, TIMEBASE_FREQ); @@ -1671,12 +1670,7 @@ void spapr_cpu_init(sPAPRMachineState *spapr, PowerPCCPU *cpu, Error **errp) } /* Set NUMA node for the added CPUs */ - for (i = 0; i < nb_numa_nodes; i++) { - if (test_bit(cs->cpu_index, numa_info[i].node_cpu)) { - cs->numa_node = i; - break; - } - } + numa_set_cpu(cs); xics_cpu_setup(spapr->icp, cpu); qemu_register_reset(spapr_cpu_reset, cpu); diff --git a/include/sysemu/numa.h b/include/sysemu/numa.h index bb184c9..648d68b 100644 --- a/include/sysemu/numa.h +++ b/include/sysemu/numa.h @@ -31,5 +31,6 @@ extern QemuOptsList qemu_numa_opts; void numa_set_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node); void numa_unset_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node); uint32_t numa_get_node(ram_addr_t addr, Error **errp); +void numa_set_cpu(CPUState *cpu); #endif diff --git a/numa.c b/numa.c index 4c4f7f5..1b47c15 100644 --- a/numa.c +++ b/numa.c @@ -396,20 +396,32 @@ void parse_numa_opts(MachineClass *mc) } } +static void numa_set_cpu_numa_node(CPUState *cpu) +{ + int i; + + for (i = 0; i < nb_numa_nodes; i++) { + if (test_bit(cpu->cpu_index, numa_info[i].node_cpu)) { + cpu->numa_node = i; + break; + } + } +} + void numa_post_machine_init(void) { CPUState *cpu; - int i; CPU_FOREACH(cpu) { - for (i = 0; i < nb_numa_nodes; i++) { - if (test_bit(cpu->cpu_index, numa_info[i].node_cpu)) { - cpu->numa_node = i; - } - } + numa_set_cpu_numa_node(cpu); } } +void numa_set_cpu(CPUState *cpu) +{ + numa_set_cpu_numa_node(cpu); +} + static void allocate_system_memory_nonnuma(MemoryRegion *mr, Object *owner, const char *name, uint64_t ram_size) ------------------------------------------------------------------- > > > xics_cpu_setup(spapr->icp, cpu); > > qemu_register_reset(spapr_cpu_reset, cpu); > > } > > @@ -1768,6 +1800,7 @@ static void ppc_spapr_init(MachineState *machine) > > char *filename; > > int spapr_cores = smp_cpus / smp_threads; > > int spapr_max_cores = max_cpus / smp_threads; > > + int smt = kvmppc_smt_threads(); > > > > if (smp_cpus % smp_threads) { > > error_report("smp_cpus (%u) must be multiple of threads (%u)", > > @@ -1834,6 +1867,15 @@ static void ppc_spapr_init(MachineState *machine) > > spapr_validate_node_memory(machine, &error_fatal); > > } > > > > + if (smc->dr_cpu_enabled) { > > + for (i = 0; i < spapr_max_cores; i++) { > > + sPAPRDRConnector *drc = > > + spapr_dr_connector_new(OBJECT(spapr), > > + SPAPR_DR_CONNECTOR_TYPE_CPU, i * > > smt); > > + qemu_register_reset(spapr_drc_reset, drc); > > + } > > + } > > + > > Nit: would this be cleaner to include in the same loop that constructs > the (empty) links and boot-time cpu cores? Seems possible, will change. > > > /* init CPUs */ > > if (machine->cpu_model == NULL) { > > machine->cpu_model = kvm_enabled() ? "host" : "POWER7"; > > @@ -2267,6 +2309,27 @@ out: > > error_propagate(errp, local_err); > > } > > > > +void *spapr_populate_hotplug_cpu_dt(DeviceState *dev, CPUState *cs, > > + int *fdt_offset, sPAPRMachineState > > *spapr) > > +{ > > + PowerPCCPU *cpu = POWERPC_CPU(cs); > > + DeviceClass *dc = DEVICE_GET_CLASS(cs); > > + int id = ppc_get_vcpu_dt_id(cpu); > > + void *fdt; > > + int offset, fdt_size; > > + char *nodename; > > + > > + fdt = create_device_tree(&fdt_size); > > + nodename = g_strdup_printf("%s@%x", dc->fw_name, id); > > + offset = fdt_add_subnode(fdt, 0, nodename); > > + > > + spapr_populate_cpu_dt(cs, fdt, offset, spapr); > > + g_free(nodename); > > + > > + *fdt_offset = offset; > > + return fdt; > > +} > > + > > static void spapr_machine_device_plug(HotplugHandler *hotplug_dev, > > DeviceState *dev, Error **errp) > > { > > @@ -2307,6 +2370,12 @@ static void spapr_machine_device_plug(HotplugHandler > > *hotplug_dev, > > } > > > > spapr_memory_plug(hotplug_dev, dev, node, errp); > > + } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_CPU_CORE)) { > > + if (!smc->dr_cpu_enabled && dev->hotplugged) { > > + error_setg(errp, "CPU hotplug not supported for this machine"); > > + return; > > + } > > + spapr_core_plug(hotplug_dev, dev, errp); > > } > > } > > > > @@ -2366,6 +2435,7 @@ static void spapr_machine_class_init(ObjectClass *oc, > > void *data) > > mc->cpu_index_to_socket_id = spapr_cpu_index_to_socket_id; > > > > smc->dr_lmb_enabled = true; > > + smc->dr_cpu_enabled = true; > > fwc->get_dev_path = spapr_get_fw_dev_path; > > nc->nmi_monitor_handler = spapr_nmi; > > } > > @@ -2445,6 +2515,7 @@ static void > > spapr_machine_2_5_class_options(MachineClass *mc) > > > > spapr_machine_2_6_class_options(mc); > > smc->use_ohci_by_default = true; > > + smc->dr_cpu_enabled = false; > > SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_2_5); > > } > > > > diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c > > index 9ddf3ce..4c233d7 100644 > > --- a/hw/ppc/spapr_cpu_core.c > > +++ b/hw/ppc/spapr_cpu_core.c > > @@ -14,6 +14,65 @@ > > #include "qapi/visitor.h" > > #include <sysemu/cpus.h> > > > > +void spapr_core_plug(HotplugHandler *hotplug_dev, DeviceState *dev, > > + Error **errp) > > +{ > > + sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(qdev_get_machine()); > > + sPAPRMachineState *ms = SPAPR_MACHINE(qdev_get_machine()); > > + sPAPRCPUCore *core = SPAPR_CPU_CORE(OBJECT(dev)); > > + PowerPCCPU *cpu = &core->threads[0]; > > + CPUState *cs = CPU(cpu); > > + int id = ppc_get_vcpu_dt_id(cpu); > > + sPAPRDRConnector *drc = > > + spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_CPU, id); > > + sPAPRDRConnectorClass *drck; > > + Error *local_err = NULL; > > + void *fdt = NULL; > > + int fdt_offset = 0; > > + > > + if (!smc->dr_cpu_enabled) { > > + /* > > + * This is a cold plugged CPU core but the machine doesn't support > > + * DR. So skip the hotplug path ensuring that the core is brought > > + * up online with out an associated DR connector. > > + */ > > + return; > > + } > > + > > + g_assert(drc); > > + > > + /* > > + * Setup CPU DT entries only for hotplugged CPUs. For boot time or > > + * coldplugged CPUs DT entries are setup in spapr_finalize_fdt(). > > + */ > > + if (dev->hotplugged) { > > + fdt = spapr_populate_hotplug_cpu_dt(dev, cs, &fdt_offset, ms); > > + dev->hotplugged = true; > > + } > > + > > + drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); > > + drck->attach(drc, dev, fdt, fdt_offset, !dev->hotplugged, &local_err); > > + if (local_err) { > > + g_free(fdt); > > + error_propagate(errp, local_err); > > + return; > > + } > > + > > + if (dev->hotplugged) { > > + /* > > + * Send hotplug notification interrupt to the guest only in case > > + * of hotplugged CPUs. > > + */ > > + spapr_hotplug_req_add_by_index(drc); > > + } else { > > + /* > > + * Set the right DRC states for cold plugged CPU. > > + */ > > + drck->set_allocation_state(drc, SPAPR_DR_ALLOCATION_STATE_USABLE); > > + drck->set_isolation_state(drc, > > SPAPR_DR_ISOLATION_STATE_UNISOLATED); > > + } > > +} > > + > > static int spapr_cpu_core_realize_child(Object *child, void *opaque) > > { > > Error **errp = opaque; > > @@ -30,6 +89,7 @@ static int spapr_cpu_core_realize_child(Object *child, > > void *opaque) > > if (*errp) { > > return 1; > > } > > + spapr_cpu_reset(cpu); > > This also looks like it belongs in a different patch. You mean a separate patch for this or push this around to an existing patch of the series ? Regards, Bharata.