On Tue, 8 Mar 2016 15:27:39 +1100 David Gibson <da...@gibson.dropbear.id.au> wrote:
> On Mon, Mar 07, 2016 at 11:59:42AM +0530, Bharata B Rao wrote: > > On Mon, Mar 07, 2016 at 02:49:06PM +1100, David Gibson wrote: > > > On Fri, Mar 04, 2016 at 12:24:19PM +0530, Bharata B Rao wrote: > > > > Set up device tree entries for the hotplugged CPU core and use the > > > > exising EPOW event infrastructure to send CPU hotplug notification to > > > > the guest. > > > > > > > > Signed-off-by: Bharata B Rao <bhar...@linux.vnet.ibm.com> > > > > --- > > > > hw/ppc/spapr.c | 73 > > > > ++++++++++++++++++++++++++++++++++++++++- > > > > hw/ppc/spapr_cpu_core.c | 60 +++++++++++++++++++++++++++++++++ > > > > hw/ppc/spapr_events.c | 3 ++ > > > > hw/ppc/spapr_rtas.c | 24 ++++++++++++++ > > > > include/hw/ppc/spapr.h | 4 +++ > > > > include/hw/ppc/spapr_cpu_core.h | 2 ++ > > > > 6 files changed, 165 insertions(+), 1 deletion(-) > > > > > > > > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c > > > > index 5acb612..6c4ac50 100644 > > > > --- a/hw/ppc/spapr.c > > > > +++ b/hw/ppc/spapr.c > > > > @@ -603,6 +603,18 @@ static void spapr_populate_cpu_dt(CPUState *cs, > > > > void *fdt, int offset, > > > > size_t page_sizes_prop_size; > > > > uint32_t vcpus_per_socket = smp_threads * smp_cores; > > > > uint32_t pft_size_prop[] = {0, cpu_to_be32(spapr->htab_shift)}; > > > > + sPAPRMachineClass *smc = > > > > SPAPR_MACHINE_GET_CLASS(qdev_get_machine()); > > > > + sPAPRDRConnector *drc; > > > > + sPAPRDRConnectorClass *drck; > > > > + int drc_index; > > > > + > > > > + if (smc->dr_cpu_enabled) { > > > > + drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_CPU, > > > > index); > > > > + g_assert(drc); > > > > + drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); > > > > + drc_index = drck->get_index(drc); > > > > + _FDT((fdt_setprop_cell(fdt, offset, "ibm,my-drc-index", > > > > drc_index))); > > > > + } > > > > > > > > /* Note: we keep CI large pages off for now because a 64K capable > > > > guest > > > > * provisioned with large pages might otherwise try to map a qemu > > > > @@ -987,6 +999,16 @@ static void spapr_finalize_fdt(sPAPRMachineState > > > > *spapr, > > > > _FDT(spapr_drc_populate_dt(fdt, 0, NULL, > > > > SPAPR_DR_CONNECTOR_TYPE_LMB)); > > > > } > > > > > > > > + if (smc->dr_cpu_enabled) { > > > > + int offset = fdt_path_offset(fdt, "/cpus"); > > > > + ret = spapr_drc_populate_dt(fdt, offset, NULL, > > > > + SPAPR_DR_CONNECTOR_TYPE_CPU); > > > > + if (ret < 0) { > > > > + error_report("Couldn't set up CPU DR device tree > > > > properties"); > > > > + exit(1); > > > > + } > > > > + } > > > > + > > > > _FDT((fdt_pack(fdt))); > > > > > > > > if (fdt_totalsize(fdt) > FDT_MAX_SIZE) { > > > > @@ -1181,7 +1203,7 @@ static void ppc_spapr_reset(void) > > > > > > > > } > > > > > > > > -static void spapr_cpu_reset(void *opaque) > > > > +void spapr_cpu_reset(void *opaque) > > > > { > > > > sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); > > > > PowerPCCPU *cpu = opaque; > > > > @@ -1622,6 +1644,8 @@ static void spapr_boot_set(void *opaque, const > > > > char *boot_device, > > > > void spapr_cpu_init(sPAPRMachineState *spapr, PowerPCCPU *cpu, Error > > > > **errp) > > > > { > > > > CPUPPCState *env = &cpu->env; > > > > + CPUState *cs = CPU(cpu); > > > > + int i; > > > > > > > > /* Set time-base frequency to 512 MHz */ > > > > cpu_ppc_tb_init(env, TIMEBASE_FREQ); > > > > @@ -1646,6 +1670,14 @@ void spapr_cpu_init(sPAPRMachineState *spapr, > > > > PowerPCCPU *cpu, Error **errp) > > > > } > > > > } > > > > > > > > + /* Set NUMA node for the added CPUs */ > > > > + for (i = 0; i < nb_numa_nodes; i++) { > > > > + if (test_bit(cs->cpu_index, numa_info[i].node_cpu)) { > > > > + cs->numa_node = i; > > > > + break; > > > > + } > > > > + } > > > > + > > > > > > This hunk seems like it belongs in a different patch. > > > > It appears that this would be needed by other archs also to set the > > NUMA node for the hot-plugged CPU. How about make an API out of this > > and use this something like below ? Igor ? > > Is there a way we could put this in the the CPU thread initialization > itself? Rather than requiring every platform to call a helper. I'd suggest hotplugable CPU entity to have 'node' property, like we have in pc-dimm. However machine owns numa mapping, so setting it from thread initialization seems to be wrong. Could that be done from machine's plug() handler (spapr_core_plug)? Also I notice that there is in no way to check/set 'address' properties at machine level before calling cpu->realize(), which makes us to attempt checking them inside cpu->realize() or at post realize time in HotplugHandler(machine)->plug() hook with following full rollback in case of failure. Maybe we need add an HotplugHandler->pre_realize() hook to allow machine verify/set properties of CPU before it's realized. > > > ------------------------------------------------------------------- > > diff --git a/hw/i386/pc.c b/hw/i386/pc.c > > index 0aeefd2..8347234 100644 > > --- a/hw/i386/pc.c > > +++ b/hw/i386/pc.c > > @@ -1112,6 +1112,7 @@ void pc_hot_add_cpu(const int64_t id, Error **errp) > > error_propagate(errp, local_err); > > return; > > } > > + numa_set_cpu(CPU(cpu)); > > object_unref(OBJECT(cpu)); > > } > > > > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c > > index a42f8c0..f2b3b67 100644 > > --- a/hw/ppc/spapr.c > > +++ b/hw/ppc/spapr.c > > @@ -1645,7 +1645,6 @@ void spapr_cpu_init(sPAPRMachineState *spapr, > > PowerPCCPU *cpu, Error **errp) > > { > > CPUPPCState *env = &cpu->env; > > CPUState *cs = CPU(cpu); > > - int i; > > > > /* Set time-base frequency to 512 MHz */ > > cpu_ppc_tb_init(env, TIMEBASE_FREQ); > > @@ -1671,12 +1670,7 @@ void spapr_cpu_init(sPAPRMachineState *spapr, > > PowerPCCPU *cpu, Error **errp) > > } > > > > /* Set NUMA node for the added CPUs */ > > - for (i = 0; i < nb_numa_nodes; i++) { > > - if (test_bit(cs->cpu_index, numa_info[i].node_cpu)) { > > - cs->numa_node = i; > > - break; > > - } > > - } > > + numa_set_cpu(cs); > > > > xics_cpu_setup(spapr->icp, cpu); > > qemu_register_reset(spapr_cpu_reset, cpu); > > diff --git a/include/sysemu/numa.h b/include/sysemu/numa.h > > index bb184c9..648d68b 100644 > > --- a/include/sysemu/numa.h > > +++ b/include/sysemu/numa.h > > @@ -31,5 +31,6 @@ extern QemuOptsList qemu_numa_opts; > > void numa_set_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node); > > void numa_unset_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node); > > uint32_t numa_get_node(ram_addr_t addr, Error **errp); > > +void numa_set_cpu(CPUState *cpu); > > > > #endif > > diff --git a/numa.c b/numa.c > > index 4c4f7f5..1b47c15 100644 > > --- a/numa.c > > +++ b/numa.c > > @@ -396,20 +396,32 @@ void parse_numa_opts(MachineClass *mc) > > } > > } > > > > +static void numa_set_cpu_numa_node(CPUState *cpu) > > +{ > > + int i; > > + > > + for (i = 0; i < nb_numa_nodes; i++) { > > + if (test_bit(cpu->cpu_index, numa_info[i].node_cpu)) { > > + cpu->numa_node = i; > > + break; > > + } > > + } > > +} > > + > > void numa_post_machine_init(void) > > { > > CPUState *cpu; > > - int i; > > > > CPU_FOREACH(cpu) { > > - for (i = 0; i < nb_numa_nodes; i++) { > > - if (test_bit(cpu->cpu_index, numa_info[i].node_cpu)) { > > - cpu->numa_node = i; > > - } > > - } > > + numa_set_cpu_numa_node(cpu); > > } > > } > > > > +void numa_set_cpu(CPUState *cpu) > > +{ > > + numa_set_cpu_numa_node(cpu); > > +} > > + > > static void allocate_system_memory_nonnuma(MemoryRegion *mr, Object *owner, > > const char *name, > > uint64_t ram_size) > > > > ------------------------------------------------------------------- > > > > > > > xics_cpu_setup(spapr->icp, cpu); > > > > qemu_register_reset(spapr_cpu_reset, cpu); > > > > } > > > > @@ -1768,6 +1800,7 @@ static void ppc_spapr_init(MachineState *machine) > > > > char *filename; > > > > int spapr_cores = smp_cpus / smp_threads; > > > > int spapr_max_cores = max_cpus / smp_threads; > > > > + int smt = kvmppc_smt_threads(); > > > > > > > > if (smp_cpus % smp_threads) { > > > > error_report("smp_cpus (%u) must be multiple of threads (%u)", > > > > @@ -1834,6 +1867,15 @@ static void ppc_spapr_init(MachineState *machine) > > > > spapr_validate_node_memory(machine, &error_fatal); > > > > } > > > > > > > > + if (smc->dr_cpu_enabled) { > > > > + for (i = 0; i < spapr_max_cores; i++) { > > > > + sPAPRDRConnector *drc = > > > > + spapr_dr_connector_new(OBJECT(spapr), > > > > + SPAPR_DR_CONNECTOR_TYPE_CPU, i > > > > * smt); > > > > + qemu_register_reset(spapr_drc_reset, drc); > > > > + } > > > > + } > > > > + > > > > > > Nit: would this be cleaner to include in the same loop that constructs > > > the (empty) links and boot-time cpu cores? > > > > Seems possible, will change. > > > > > > > > > /* init CPUs */ > > > > if (machine->cpu_model == NULL) { > > > > machine->cpu_model = kvm_enabled() ? "host" : "POWER7"; > > > > @@ -2267,6 +2309,27 @@ out: > > > > error_propagate(errp, local_err); > > > > } > > > > > > > > +void *spapr_populate_hotplug_cpu_dt(DeviceState *dev, CPUState *cs, > > > > + int *fdt_offset, sPAPRMachineState > > > > *spapr) > > > > +{ > > > > + PowerPCCPU *cpu = POWERPC_CPU(cs); > > > > + DeviceClass *dc = DEVICE_GET_CLASS(cs); > > > > + int id = ppc_get_vcpu_dt_id(cpu); > > > > + void *fdt; > > > > + int offset, fdt_size; > > > > + char *nodename; > > > > + > > > > + fdt = create_device_tree(&fdt_size); > > > > + nodename = g_strdup_printf("%s@%x", dc->fw_name, id); > > > > + offset = fdt_add_subnode(fdt, 0, nodename); > > > > + > > > > + spapr_populate_cpu_dt(cs, fdt, offset, spapr); > > > > + g_free(nodename); > > > > + > > > > + *fdt_offset = offset; > > > > + return fdt; > > > > +} > > > > + > > > > static void spapr_machine_device_plug(HotplugHandler *hotplug_dev, > > > > DeviceState *dev, Error **errp) > > > > { > > > > @@ -2307,6 +2370,12 @@ static void > > > > spapr_machine_device_plug(HotplugHandler *hotplug_dev, > > > > } > > > > > > > > spapr_memory_plug(hotplug_dev, dev, node, errp); > > > > + } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_CPU_CORE)) { > > > > + if (!smc->dr_cpu_enabled && dev->hotplugged) { > > > > + error_setg(errp, "CPU hotplug not supported for this > > > > machine"); > > > > + return; > > > > + } > > > > + spapr_core_plug(hotplug_dev, dev, errp); > > > > } > > > > } > > > > > > > > @@ -2366,6 +2435,7 @@ static void spapr_machine_class_init(ObjectClass > > > > *oc, void *data) > > > > mc->cpu_index_to_socket_id = spapr_cpu_index_to_socket_id; > > > > > > > > smc->dr_lmb_enabled = true; > > > > + smc->dr_cpu_enabled = true; > > > > fwc->get_dev_path = spapr_get_fw_dev_path; > > > > nc->nmi_monitor_handler = spapr_nmi; > > > > } > > > > @@ -2445,6 +2515,7 @@ static void > > > > spapr_machine_2_5_class_options(MachineClass *mc) > > > > > > > > spapr_machine_2_6_class_options(mc); > > > > smc->use_ohci_by_default = true; > > > > + smc->dr_cpu_enabled = false; > > > > SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_2_5); > > > > } > > > > > > > > diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c > > > > index 9ddf3ce..4c233d7 100644 > > > > --- a/hw/ppc/spapr_cpu_core.c > > > > +++ b/hw/ppc/spapr_cpu_core.c > > > > @@ -14,6 +14,65 @@ > > > > #include "qapi/visitor.h" > > > > #include <sysemu/cpus.h> > > > > > > > > +void spapr_core_plug(HotplugHandler *hotplug_dev, DeviceState *dev, > > > > + Error **errp) > > > > +{ > > > > + sPAPRMachineClass *smc = > > > > SPAPR_MACHINE_GET_CLASS(qdev_get_machine()); > > > > + sPAPRMachineState *ms = SPAPR_MACHINE(qdev_get_machine()); > > > > + sPAPRCPUCore *core = SPAPR_CPU_CORE(OBJECT(dev)); > > > > + PowerPCCPU *cpu = &core->threads[0]; > > > > + CPUState *cs = CPU(cpu); > > > > + int id = ppc_get_vcpu_dt_id(cpu); > > > > + sPAPRDRConnector *drc = > > > > + spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_CPU, id); > > > > + sPAPRDRConnectorClass *drck; > > > > + Error *local_err = NULL; > > > > + void *fdt = NULL; > > > > + int fdt_offset = 0; > > > > + > > > > + if (!smc->dr_cpu_enabled) { > > > > + /* > > > > + * This is a cold plugged CPU core but the machine doesn't > > > > support > > > > + * DR. So skip the hotplug path ensuring that the core is > > > > brought > > > > + * up online with out an associated DR connector. > > > > + */ > > > > + return; > > > > + } > > > > + > > > > + g_assert(drc); > > > > + > > > > + /* > > > > + * Setup CPU DT entries only for hotplugged CPUs. For boot time or > > > > + * coldplugged CPUs DT entries are setup in spapr_finalize_fdt(). > > > > + */ > > > > + if (dev->hotplugged) { > > > > + fdt = spapr_populate_hotplug_cpu_dt(dev, cs, &fdt_offset, ms); > > > > + dev->hotplugged = true; > > > > + } > > > > + > > > > + drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); > > > > + drck->attach(drc, dev, fdt, fdt_offset, !dev->hotplugged, > > > > &local_err); > > > > + if (local_err) { > > > > + g_free(fdt); > > > > + error_propagate(errp, local_err); > > > > + return; > > > > + } > > > > + > > > > + if (dev->hotplugged) { > > > > + /* > > > > + * Send hotplug notification interrupt to the guest only in > > > > case > > > > + * of hotplugged CPUs. > > > > + */ > > > > + spapr_hotplug_req_add_by_index(drc); > > > > + } else { > > > > + /* > > > > + * Set the right DRC states for cold plugged CPU. > > > > + */ > > > > + drck->set_allocation_state(drc, > > > > SPAPR_DR_ALLOCATION_STATE_USABLE); > > > > + drck->set_isolation_state(drc, > > > > SPAPR_DR_ISOLATION_STATE_UNISOLATED); > > > > + } > > > > +} > > > > + > > > > static int spapr_cpu_core_realize_child(Object *child, void *opaque) > > > > { > > > > Error **errp = opaque; > > > > @@ -30,6 +89,7 @@ static int spapr_cpu_core_realize_child(Object > > > > *child, void *opaque) > > > > if (*errp) { > > > > return 1; > > > > } > > > > + spapr_cpu_reset(cpu); > > > > > > This also looks like it belongs in a different patch. > > > > You mean a separate patch for this or push this around to an existing > > patch of the series ? > > > > Regards, > > Bharata. > > >