On Tue, Sep 06, 2016 at 08:14:37AM +0200, Cédric Le Goater wrote: > On 09/05/2016 06:02 AM, David Gibson wrote: > > On Wed, Aug 31, 2016 at 06:34:13PM +0200, Cédric Le Goater wrote: > >> This is largy inspired by sPAPRCPUCore with some simplification, no > >> hotplug for instance. But the differences are small and the objects > >> could possibly be merged. > >> > >> A set of PnvCore objects is added to the PnvChip and the device > >> tree is populated looping on these cores. > >> > >> Real HW cpu ids are now generated depending on the chip cpu model, the > >> chip id and a core mask. This id is stored in CPUState->cpu_index and > >> in PnvCore->core_id and it is used to populate the device tree. > >> > >> Signed-off-by: Cédric Le Goater <c...@kaod.org> > >> --- > >> > >> Changes since v1: > >> > >> - changed name to PnvCore > >> - changed PnvChip core array type to a 'PnvCore *cores' > >> - introduced real cpu hw ids using a core mask from the chip > >> - reworked powernv_create_core_node() which populates the device tree > >> - added missing "ibm,pa-features" property > >> - smp_cpus representing threads, used smp_cores instead to create the > >> cores in the chip. > >> - removed the use of ppc_get_vcpu_dt_id() > >> - added "POWER8E" and "POWER8NVL" cpu models to exercice the > >> PnvChipClass > >> > >> hw/ppc/Makefile.objs | 2 +- > >> hw/ppc/pnv.c | 204 > >> ++++++++++++++++++++++++++++++++++++++++++++++ > >> hw/ppc/pnv_core.c | 170 ++++++++++++++++++++++++++++++++++++++ > >> include/hw/ppc/pnv.h | 7 ++ > >> include/hw/ppc/pnv_core.h | 47 +++++++++++ > >> 5 files changed, 429 insertions(+), 1 deletion(-) > >> create mode 100644 hw/ppc/pnv_core.c > >> create mode 100644 include/hw/ppc/pnv_core.h > >> > >> diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs > >> index f580e5c41413..08c213c40684 100644 > >> --- a/hw/ppc/Makefile.objs > >> +++ b/hw/ppc/Makefile.objs > >> @@ -6,7 +6,7 @@ obj-$(CONFIG_PSERIES) += spapr_hcall.o spapr_iommu.o > >> spapr_rtas.o > >> obj-$(CONFIG_PSERIES) += spapr_pci.o spapr_rtc.o spapr_drc.o spapr_rng.o > >> obj-$(CONFIG_PSERIES) += spapr_cpu_core.o > >> # IBM PowerNV > >> -obj-$(CONFIG_POWERNV) += pnv.o pnv_xscom.o > >> +obj-$(CONFIG_POWERNV) += pnv.o pnv_xscom.o pnv_core.o > >> ifeq ($(CONFIG_PCI)$(CONFIG_PSERIES)$(CONFIG_LINUX), yyy) > >> obj-y += spapr_pci_vfio.o > >> endif > >> diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c > >> index b6efb5e3ef07..daf9f459ab0e 100644 > >> --- a/hw/ppc/pnv.c > >> +++ b/hw/ppc/pnv.c > >> @@ -35,6 +35,7 @@ > >> #include "hw/ppc/fdt.h" > >> #include "hw/ppc/ppc.h" > >> #include "hw/ppc/pnv.h" > >> +#include "hw/ppc/pnv_core.h" > >> #include "hw/loader.h" > >> #include "exec/address-spaces.h" > >> #include "qemu/cutils.h" > >> @@ -98,6 +99,136 @@ static int powernv_populate_memory(void *fdt) > >> return 0; > >> } > >> > >> +/* > >> + * The PowerNV cores (and threads) need to use real HW ids and not an > >> + * incremental index like it has been done on other platforms. This HW > >> + * id is called a PIR and is used in the device tree, in the XSCOM > >> + * communication to address cores, in the interrupt servers. > >> + */ > >> +static void powernv_create_core_node(PnvCore *pc, void *fdt, > >> + int cpus_offset, int chip_id) > >> +{ > >> + CPUCore *core = CPU_CORE(pc); > >> + CPUState *cs = CPU(DEVICE(pc->threads)); > >> + DeviceClass *dc = DEVICE_GET_CLASS(cs); > >> + PowerPCCPU *cpu = POWERPC_CPU(cs); > >> + int smt_threads = ppc_get_compat_smt_threads(cpu); > >> + CPUPPCState *env = &cpu->env; > >> + PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cs); > >> + uint32_t servers_prop[smt_threads]; > >> + uint32_t gservers_prop[smt_threads * 2]; > >> + int i; > >> + uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40), > >> + 0xffffffff, 0xffffffff}; > >> + uint32_t tbfreq = PNV_TIMEBASE_FREQ; > >> + uint32_t cpufreq = 1000000000; > >> + uint32_t page_sizes_prop[64]; > >> + size_t page_sizes_prop_size; > >> + const uint8_t pa_features[] = { 24, 0, > >> + 0xf6, 0x3f, 0xc7, 0xc0, 0x80, 0xf0, > >> + 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, > >> + 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, > >> + 0x80, 0x00, 0x80, 0x00, 0x80, 0x00 }; > >> + int offset; > >> + char *nodename; > >> + > >> + nodename = g_strdup_printf("%s@%x", dc->fw_name, core->core_id); > >> + offset = fdt_add_subnode(fdt, cpus_offset, nodename); > >> + _FDT(offset); > >> + g_free(nodename); > >> + > >> + _FDT((fdt_setprop_cell(fdt, offset, "ibm,chip-id", chip_id))); > >> + > >> + _FDT((fdt_setprop_cell(fdt, offset, "reg", core->core_id))); > >> + _FDT((fdt_setprop_cell(fdt, offset, "ibm,pir", core->core_id))); > >> + _FDT((fdt_setprop_string(fdt, offset, "device_type", "cpu"))); > >> + > >> + _FDT((fdt_setprop_cell(fdt, offset, "cpu-version", > >> env->spr[SPR_PVR]))); > >> + _FDT((fdt_setprop_cell(fdt, offset, "d-cache-block-size", > >> + env->dcache_line_size))); > >> + _FDT((fdt_setprop_cell(fdt, offset, "d-cache-line-size", > >> + env->dcache_line_size))); > >> + _FDT((fdt_setprop_cell(fdt, offset, "i-cache-block-size", > >> + env->icache_line_size))); > >> + _FDT((fdt_setprop_cell(fdt, offset, "i-cache-line-size", > >> + env->icache_line_size))); > >> + > >> + if (pcc->l1_dcache_size) { > >> + _FDT((fdt_setprop_cell(fdt, offset, "d-cache-size", > >> + pcc->l1_dcache_size))); > >> + } else { > >> + error_report("Warning: Unknown L1 dcache size for cpu"); > >> + } > >> + if (pcc->l1_icache_size) { > >> + _FDT((fdt_setprop_cell(fdt, offset, "i-cache-size", > >> + pcc->l1_icache_size))); > >> + } else { > >> + error_report("Warning: Unknown L1 icache size for cpu"); > >> + } > >> + > >> + _FDT((fdt_setprop_cell(fdt, offset, "timebase-frequency", tbfreq))); > >> + _FDT((fdt_setprop_cell(fdt, offset, "clock-frequency", cpufreq))); > >> + _FDT((fdt_setprop_cell(fdt, offset, "ibm,slb-size", env->slb_nr))); > >> + _FDT((fdt_setprop_string(fdt, offset, "status", "okay"))); > >> + _FDT((fdt_setprop(fdt, offset, "64-bit", NULL, 0))); > >> + > >> + if (env->spr_cb[SPR_PURR].oea_read) { > >> + _FDT((fdt_setprop(fdt, offset, "ibm,purr", NULL, 0))); > >> + } > >> + > >> + if (env->mmu_model & POWERPC_MMU_1TSEG) { > >> + _FDT((fdt_setprop(fdt, offset, "ibm,processor-segment-sizes", > >> + segs, sizeof(segs)))); > >> + } > >> + > >> + /* Advertise VMX/VSX (vector extensions) if available > >> + * 0 / no property == no vector extensions > >> + * 1 == VMX / Altivec available > >> + * 2 == VSX available */ > >> + if (env->insns_flags & PPC_ALTIVEC) { > >> + uint32_t vmx = (env->insns_flags2 & PPC2_VSX) ? 2 : 1; > >> + > >> + _FDT((fdt_setprop_cell(fdt, offset, "ibm,vmx", vmx))); > >> + } > >> + > >> + /* Advertise DFP (Decimal Floating Point) if available > >> + * 0 / no property == no DFP > >> + * 1 == DFP available */ > >> + if (env->insns_flags2 & PPC2_DFP) { > >> + _FDT((fdt_setprop_cell(fdt, offset, "ibm,dfp", 1))); > >> + } > >> + > >> + page_sizes_prop_size = ppc_create_page_sizes_prop(env, > >> page_sizes_prop, > >> + > >> sizeof(page_sizes_prop)); > >> + if (page_sizes_prop_size) { > >> + _FDT((fdt_setprop(fdt, offset, "ibm,segment-page-sizes", > >> + page_sizes_prop, page_sizes_prop_size))); > >> + } > >> + > >> + _FDT((fdt_setprop(fdt, offset, "ibm,pa-features", > >> + pa_features, sizeof(pa_features)))); > >> + > >> + if (cpu->cpu_version) { > >> + _FDT((fdt_setprop_cell(fdt, offset, "cpu-version", > >> cpu->cpu_version))); > >> + } > >> + > >> + /* Build interrupt servers and gservers properties */ > >> + for (i = 0; i < smt_threads; i++) { > >> + servers_prop[i] = cpu_to_be32(core->core_id + i); > >> + /* Hack, direct the group queues back to cpu 0 > >> + * > >> + * FIXME: check that we still need this hack with real HW > >> + * ids. Probably not. > >> + */ > >> + gservers_prop[i * 2] = cpu_to_be32(core->core_id + i); > >> + gservers_prop[i * 2 + 1] = 0; > > > > I'm not sure the group servers concept even makes sense in the case of > > powernv. In powernv, doesn't the guest control the "real" xics, > > including the link registers, and therefore can configure its own > > groups, rather than being limited to what firmware has set up as for > > PAPR? > > yes. we can remove this property. > > >> + } > >> + _FDT((fdt_setprop(fdt, offset, "ibm,ppc-interrupt-server#s", > >> + servers_prop, sizeof(servers_prop)))); > >> + _FDT((fdt_setprop(fdt, offset, "ibm,ppc-interrupt-gserver#s", > >> + gservers_prop, sizeof(gservers_prop)))); > >> +} > >> + > >> static void *powernv_create_fdt(PnvMachineState *pnv, > >> const char *kernel_cmdline) > >> { > >> @@ -106,6 +237,7 @@ static void *powernv_create_fdt(PnvMachineState *pnv, > >> const char plat_compat[] = "qemu,powernv\0ibm,powernv"; > >> int off; > >> int i; > >> + int cpus_offset; > >> > >> fdt = g_malloc0(FDT_MAX_SIZE); > >> _FDT((fdt_create_empty_tree(fdt, FDT_MAX_SIZE))); > >> @@ -150,6 +282,22 @@ static void *powernv_create_fdt(PnvMachineState *pnv, > >> xscom_populate_fdt(pnv->chips[i]->xscom, fdt, 0); > >> } > >> > >> + /* cpus */ > >> + cpus_offset = fdt_add_subnode(fdt, 0, "cpus"); > >> + _FDT(cpus_offset); > >> + _FDT((fdt_setprop_cell(fdt, cpus_offset, "#address-cells", 0x1))); > >> + _FDT((fdt_setprop_cell(fdt, cpus_offset, "#size-cells", 0x0))); > >> + > >> + for (i = 0; i < pnv->num_chips; i++) { > >> + PnvChip *chip = pnv->chips[i]; > >> + int j; > >> + > >> + for (j = 0; j < chip->num_cores; j++) { > >> + powernv_create_core_node(&chip->cores[j], fdt, cpus_offset, > >> + chip->chip_id); > >> + } > >> + } > >> + > >> return fdt; > >> } > >> > >> @@ -230,6 +378,11 @@ static void ppc_powernv_init(MachineState *machine) > >> for (i = 0; i < pnv->num_chips; i++) { > >> Object *chip = object_new(chip_typename); > >> object_property_set_int(chip, CHIP_HWID(i), "chip-id", > >> &error_abort); > >> + object_property_set_int(chip, smp_cores, "num-cores", > >> &error_abort); > > > > This should probably be &error_fatal, again. > > ok. > > >> + /* > >> + * We could set a custom cores_mask for the chip here. > >> + */ > >> + > >> object_property_set_bool(chip, true, "realized", &error_abort); > >> pnv->chips[i] = PNV_CHIP(chip); > >> } > >> @@ -335,19 +488,70 @@ static const TypeInfo pnv_chip_power8e_info = { > >> .class_init = pnv_chip_power8e_class_init, > >> }; > >> > >> +/* > >> + * This is different for POWER9 so we might need a ops in the chip to > >> + * calculate the core pirs > >> + */ > >> +#define P8_PIR(chip_id, core_id) (((chip_id) << 7) | ((core_id) << 3)) > >> + > >> static void pnv_chip_realize(DeviceState *dev, Error **errp) > >> { > >> PnvChip *chip = PNV_CHIP(dev); > >> PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip); > >> + char *typename = pnv_core_typename(pcc->cpu_model); > >> + size_t typesize = object_type_get_instance_size(typename); > >> + int i, core_hwid; > >> + > >> + if (!object_class_by_name(typename)) { > >> + error_setg(errp, "Unable to find PowerNV CPU Core '%s'", > >> typename); > >> + return; > >> + } > >> > >> /* Set up XSCOM bus */ > >> chip->xscom = xscom_create(chip); > >> > >> + if (chip->num_cores > pcc->cores_max) { > >> + qemu_log_mask(LOG_GUEST_ERROR, "%s: too many cores for chip ! " > >> + "Limiting to %d\n", __func__, pcc->cores_max); > >> + chip->num_cores = pcc->cores_max; > >> + } > >> + > >> + chip->cores = g_new0(PnvCore, chip->num_cores); > >> + > >> + /* no custom mask for this chip, let's use the default one from > >> + * the chip class */ > >> + if (!chip->cores_mask) { > >> + chip->cores_mask = pcc->cores_mask; > >> + } > >> + > >> + for (i = 0, core_hwid = 0; (core_hwid < sizeof(chip->cores_mask) * 8) > >> + && (i < chip->num_cores); core_hwid++) { > >> + PnvCore *pnv_core = &chip->cores[i]; > > > > > > Unfortunately, as with spapr core creating its threads you'll need > > some fancier pointer manipulation to handle the possibility of > > subtypes of PnvCore with a different instance size. > > That doesn't happen now, but it can in theory. > > Yes. This is a reason why I preferred to have a Object **cores. I will > fix that. > > I don't really like that : > > void *obj = chip->cores + i * size; > > It does not feel "object-oriented".
Yeah, it's pretty clunky, but it's what we have for now. > >> + > >> + if (!(chip->cores_mask & (1 << core_hwid))) { > >> + continue; > >> + } > >> + > >> + object_initialize(pnv_core, typesize, typename); > >> + object_property_set_int(OBJECT(pnv_core), smp_threads, > >> "nr-threads", > >> + &error_fatal); > >> + object_property_set_int(OBJECT(pnv_core), > >> + P8_PIR(chip->chip_id, core_hwid), > >> + CPU_CORE_PROP_CORE_ID, &error_fatal); > >> + object_property_set_bool(OBJECT(pnv_core), true, "realized", > >> + &error_fatal); > >> + object_unref(OBJECT(pnv_core)); > >> + i++; > >> + } > >> + g_free(typename); > >> + > >> pcc->realize(chip, errp); > >> } > >> > >> static Property pnv_chip_properties[] = { > >> DEFINE_PROP_UINT32("chip-id", PnvChip, chip_id, 0), > >> + DEFINE_PROP_UINT32("num-cores", PnvChip, num_cores, 1), > > > > I suggest renaming this to "nr-cores" to match "nr-threads" inside the > > core object. > > ok. fine for me. > > >> + DEFINE_PROP_UINT32("cores-mask", PnvChip, cores_mask, 0x0), > >> DEFINE_PROP_END_OF_LIST(), > >> }; > >> > >> diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c > >> new file mode 100644 > >> index 000000000000..825aea1194a1 > >> --- /dev/null > >> +++ b/hw/ppc/pnv_core.c > >> @@ -0,0 +1,170 @@ > >> +/* > >> + * QEMU PowerPC PowerNV CPU Core model > >> + * > >> + * Copyright (c) IBM Corporation. > >> + * > >> + * This library is free software; you can redistribute it and/or > >> + * modify it under the terms of the GNU Lesser General Public License > >> + * as published by the Free Software Foundation; either version 2 of > >> + * the License, or (at your option) any later version. > >> + * > >> + * This library is distributed in the hope that it will be useful, but > >> + * WITHOUT ANY WARRANTY; without even the implied warranty of > >> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > >> + * Lesser General Public License for more details. > >> + * > >> + * You should have received a copy of the GNU Lesser General Public > >> + * License along with this library; if not, see > >> <http://www.gnu.org/licenses/>. > >> + */ > >> +#include "qemu/osdep.h" > >> +#include "sysemu/sysemu.h" > >> +#include "qapi/error.h" > >> +#include "target-ppc/cpu.h" > >> +#include "hw/ppc/ppc.h" > >> +#include "hw/ppc/pnv.h" > >> +#include "hw/ppc/pnv_core.h" > >> + > >> +static void powernv_cpu_reset(void *opaque) > >> +{ > >> + PowerPCCPU *cpu = opaque; > >> + CPUState *cs = CPU(cpu); > >> + CPUPPCState *env = &cpu->env; > >> + MachineState *machine = MACHINE(qdev_get_machine()); > >> + PnvMachineState *pnv = POWERNV_MACHINE(machine); > >> + > >> + cpu_reset(cs); > >> + > >> + env->spr[SPR_PIR] = cs->cpu_index; > > > > This can't work. Your PIR values aren't contiguous, but cpu_index > > values must be (until you get hotplug). > > cpu_index are not contiguous indeed. They are assigned in pnv_core_realize() > > cs->cpu_index = cc->core_id + i; > > For this to work, we need the four xics patches Nikunj is working > on plus some helper routines to assign and find an icp depending on > cs and not an index anymore. That's not the only problem with cpu_index being non-contiguous. Maybe we'll get to a point where that's possible but for the forseeable future, the cpu_index will need to be contiguous (as long as all possible cpus are present, anyway). For now you should, if possible, derive the non-contiguous platform ids from the contiguous cpu_index when you need them. > I will revive the thread with an extra patch. > > >> + env->spr[SPR_HIOR] = 0; > >> + env->gpr[3] = pnv->fdt_addr; > > > > Is the fdt address injected for all CPUs, or only the boot CPU? > > skiboot will just pick just any boot cpu. My understanding is that > all need to have the flat device tree address in r3. Ok. -- David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/~dgibson
signature.asc
Description: PGP signature