pnv: add a PnvCore object

David Gibson Tue, 06 Sep 2016 19:18:07 -0700

On Tue, Sep 06, 2016 at 08:14:37AM +0200, Cédric Le Goater wrote:
> On 09/05/2016 06:02 AM, David Gibson wrote:
> > On Wed, Aug 31, 2016 at 06:34:13PM +0200, Cédric Le Goater wrote:
> >> This is largy inspired by sPAPRCPUCore with some simplification, no
> >> hotplug for instance. But the differences are small and the objects
> >> could possibly be merged.
> >>
> >> A set of PnvCore objects is added to the PnvChip and the device
> >> tree is populated looping on these cores.
> >>
> >> Real HW cpu ids are now generated depending on the chip cpu model, the
> >> chip id and a core mask. This id is stored in CPUState->cpu_index and
> >> in PnvCore->core_id and it is used to populate the device tree.
> >>
> >> Signed-off-by: Cédric Le Goater <c...@kaod.org>
> >> ---
> >>
> >>  Changes since v1:
> >>
> >>  - changed name to PnvCore
> >>  - changed PnvChip core array type to a 'PnvCore *cores'
> >>  - introduced real cpu hw ids using a core mask from the chip
> >>  - reworked powernv_create_core_node() which populates the device tree
> >>  - added missing "ibm,pa-features" property 
> >>  - smp_cpus representing threads, used smp_cores instead to create the
> >>    cores in the chip.
> >>  - removed the use of ppc_get_vcpu_dt_id() 
> >>  - added "POWER8E" and "POWER8NVL" cpu models to exercice the
> >>    PnvChipClass
> >>
> >>  hw/ppc/Makefile.objs      |   2 +-
> >>  hw/ppc/pnv.c              | 204 
> >> ++++++++++++++++++++++++++++++++++++++++++++++
> >>  hw/ppc/pnv_core.c         | 170 ++++++++++++++++++++++++++++++++++++++
> >>  include/hw/ppc/pnv.h      |   7 ++
> >>  include/hw/ppc/pnv_core.h |  47 +++++++++++
> >>  5 files changed, 429 insertions(+), 1 deletion(-)
> >>  create mode 100644 hw/ppc/pnv_core.c
> >>  create mode 100644 include/hw/ppc/pnv_core.h
> >>
> >> diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs
> >> index f580e5c41413..08c213c40684 100644
> >> --- a/hw/ppc/Makefile.objs
> >> +++ b/hw/ppc/Makefile.objs
> >> @@ -6,7 +6,7 @@ obj-$(CONFIG_PSERIES) += spapr_hcall.o spapr_iommu.o 
> >> spapr_rtas.o
> >>  obj-$(CONFIG_PSERIES) += spapr_pci.o spapr_rtc.o spapr_drc.o spapr_rng.o
> >>  obj-$(CONFIG_PSERIES) += spapr_cpu_core.o
> >>  # IBM PowerNV
> >> -obj-$(CONFIG_POWERNV) += pnv.o pnv_xscom.o
> >> +obj-$(CONFIG_POWERNV) += pnv.o pnv_xscom.o pnv_core.o
> >>  ifeq ($(CONFIG_PCI)$(CONFIG_PSERIES)$(CONFIG_LINUX), yyy)
> >>  obj-y += spapr_pci_vfio.o
> >>  endif
> >> diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
> >> index b6efb5e3ef07..daf9f459ab0e 100644
> >> --- a/hw/ppc/pnv.c
> >> +++ b/hw/ppc/pnv.c
> >> @@ -35,6 +35,7 @@
> >>  #include "hw/ppc/fdt.h"
> >>  #include "hw/ppc/ppc.h"
> >>  #include "hw/ppc/pnv.h"
> >> +#include "hw/ppc/pnv_core.h"
> >>  #include "hw/loader.h"
> >>  #include "exec/address-spaces.h"
> >>  #include "qemu/cutils.h"
> >> @@ -98,6 +99,136 @@ static int powernv_populate_memory(void *fdt)
> >>      return 0;
> >>  }
> >>  
> >> +/*
> >> + * The PowerNV cores (and threads) need to use real HW ids and not an
> >> + * incremental index like it has been done on other platforms. This HW
> >> + * id is called a PIR and is used in the device tree, in the XSCOM
> >> + * communication to address cores, in the interrupt servers.
> >> + */
> >> +static void powernv_create_core_node(PnvCore *pc, void *fdt,
> >> +                                     int cpus_offset, int chip_id)
> >> +{
> >> +    CPUCore *core = CPU_CORE(pc);
> >> +    CPUState *cs = CPU(DEVICE(pc->threads));
> >> +    DeviceClass *dc = DEVICE_GET_CLASS(cs);
> >> +    PowerPCCPU *cpu = POWERPC_CPU(cs);
> >> +    int smt_threads = ppc_get_compat_smt_threads(cpu);
> >> +    CPUPPCState *env = &cpu->env;
> >> +    PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cs);
> >> +    uint32_t servers_prop[smt_threads];
> >> +    uint32_t gservers_prop[smt_threads * 2];
> >> +    int i;
> >> +    uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40),
> >> +                       0xffffffff, 0xffffffff};
> >> +    uint32_t tbfreq = PNV_TIMEBASE_FREQ;
> >> +    uint32_t cpufreq = 1000000000;
> >> +    uint32_t page_sizes_prop[64];
> >> +    size_t page_sizes_prop_size;
> >> +    const uint8_t pa_features[] = { 24, 0,
> >> +                                    0xf6, 0x3f, 0xc7, 0xc0, 0x80, 0xf0,
> >> +                                    0x80, 0x00, 0x00, 0x00, 0x00, 0x00,
> >> +                                    0x00, 0x00, 0x00, 0x00, 0x80, 0x00,
> >> +                                    0x80, 0x00, 0x80, 0x00, 0x80, 0x00 };
> >> +    int offset;
> >> +    char *nodename;
> >> +
> >> +    nodename = g_strdup_printf("%s@%x", dc->fw_name, core->core_id);
> >> +    offset = fdt_add_subnode(fdt, cpus_offset, nodename);
> >> +    _FDT(offset);
> >> +    g_free(nodename);
> >> +
> >> +    _FDT((fdt_setprop_cell(fdt, offset, "ibm,chip-id", chip_id)));
> >> +
> >> +    _FDT((fdt_setprop_cell(fdt, offset, "reg", core->core_id)));
> >> +    _FDT((fdt_setprop_cell(fdt, offset, "ibm,pir", core->core_id)));
> >> +    _FDT((fdt_setprop_string(fdt, offset, "device_type", "cpu")));
> >> +
> >> +    _FDT((fdt_setprop_cell(fdt, offset, "cpu-version", 
> >> env->spr[SPR_PVR])));
> >> +    _FDT((fdt_setprop_cell(fdt, offset, "d-cache-block-size",
> >> +                            env->dcache_line_size)));
> >> +    _FDT((fdt_setprop_cell(fdt, offset, "d-cache-line-size",
> >> +                            env->dcache_line_size)));
> >> +    _FDT((fdt_setprop_cell(fdt, offset, "i-cache-block-size",
> >> +                            env->icache_line_size)));
> >> +    _FDT((fdt_setprop_cell(fdt, offset, "i-cache-line-size",
> >> +                            env->icache_line_size)));
> >> +
> >> +    if (pcc->l1_dcache_size) {
> >> +        _FDT((fdt_setprop_cell(fdt, offset, "d-cache-size",
> >> +                               pcc->l1_dcache_size)));
> >> +    } else {
> >> +        error_report("Warning: Unknown L1 dcache size for cpu");
> >> +    }
> >> +    if (pcc->l1_icache_size) {
> >> +        _FDT((fdt_setprop_cell(fdt, offset, "i-cache-size",
> >> +                               pcc->l1_icache_size)));
> >> +    } else {
> >> +        error_report("Warning: Unknown L1 icache size for cpu");
> >> +    }
> >> +
> >> +    _FDT((fdt_setprop_cell(fdt, offset, "timebase-frequency", tbfreq)));
> >> +    _FDT((fdt_setprop_cell(fdt, offset, "clock-frequency", cpufreq)));
> >> +    _FDT((fdt_setprop_cell(fdt, offset, "ibm,slb-size", env->slb_nr)));
> >> +    _FDT((fdt_setprop_string(fdt, offset, "status", "okay")));
> >> +    _FDT((fdt_setprop(fdt, offset, "64-bit", NULL, 0)));
> >> +
> >> +    if (env->spr_cb[SPR_PURR].oea_read) {
> >> +        _FDT((fdt_setprop(fdt, offset, "ibm,purr", NULL, 0)));
> >> +    }
> >> +
> >> +    if (env->mmu_model & POWERPC_MMU_1TSEG) {
> >> +        _FDT((fdt_setprop(fdt, offset, "ibm,processor-segment-sizes",
> >> +                           segs, sizeof(segs))));
> >> +    }
> >> +
> >> +    /* Advertise VMX/VSX (vector extensions) if available
> >> +     *   0 / no property == no vector extensions
> >> +     *   1               == VMX / Altivec available
> >> +     *   2               == VSX available */
> >> +    if (env->insns_flags & PPC_ALTIVEC) {
> >> +        uint32_t vmx = (env->insns_flags2 & PPC2_VSX) ? 2 : 1;
> >> +
> >> +        _FDT((fdt_setprop_cell(fdt, offset, "ibm,vmx", vmx)));
> >> +    }
> >> +
> >> +    /* Advertise DFP (Decimal Floating Point) if available
> >> +     *   0 / no property == no DFP
> >> +     *   1               == DFP available */
> >> +    if (env->insns_flags2 & PPC2_DFP) {
> >> +        _FDT((fdt_setprop_cell(fdt, offset, "ibm,dfp", 1)));
> >> +    }
> >> +
> >> +    page_sizes_prop_size = ppc_create_page_sizes_prop(env, 
> >> page_sizes_prop,
> >> +                                                  
> >> sizeof(page_sizes_prop));
> >> +    if (page_sizes_prop_size) {
> >> +        _FDT((fdt_setprop(fdt, offset, "ibm,segment-page-sizes",
> >> +                           page_sizes_prop, page_sizes_prop_size)));
> >> +    }
> >> +
> >> +    _FDT((fdt_setprop(fdt, offset, "ibm,pa-features",
> >> +                       pa_features, sizeof(pa_features))));
> >> +
> >> +    if (cpu->cpu_version) {
> >> +        _FDT((fdt_setprop_cell(fdt, offset, "cpu-version", 
> >> cpu->cpu_version)));
> >> +    }
> >> +
> >> +    /* Build interrupt servers and gservers properties */
> >> +    for (i = 0; i < smt_threads; i++) {
> >> +        servers_prop[i] = cpu_to_be32(core->core_id + i);
> >> +        /* Hack, direct the group queues back to cpu 0
> >> +         *
> >> +         * FIXME: check that we still need this hack with real HW
> >> +         * ids. Probably not.
> >> +         */
> >> +        gservers_prop[i * 2] = cpu_to_be32(core->core_id + i);
> >> +        gservers_prop[i * 2 + 1] = 0;
> > 
> > I'm not sure the group servers concept even makes sense in the case of
> > powernv.  In powernv, doesn't the guest control the "real" xics,
> > including the link registers, and therefore can configure its own
> > groups, rather than being limited to what firmware has set up as for
> > PAPR?
> 
> yes. we can remove this property.  
>  
> >> +    }
> >> +    _FDT((fdt_setprop(fdt, offset, "ibm,ppc-interrupt-server#s",
> >> +                       servers_prop, sizeof(servers_prop))));
> >> +    _FDT((fdt_setprop(fdt, offset, "ibm,ppc-interrupt-gserver#s",
> >> +                       gservers_prop, sizeof(gservers_prop))));
> >> +}
> >> +
> >>  static void *powernv_create_fdt(PnvMachineState *pnv,
> >>                                  const char *kernel_cmdline)
> >>  {
> >> @@ -106,6 +237,7 @@ static void *powernv_create_fdt(PnvMachineState *pnv,
> >>      const char plat_compat[] = "qemu,powernv\0ibm,powernv";
> >>      int off;
> >>      int i;
> >> +    int cpus_offset;
> >>  
> >>      fdt = g_malloc0(FDT_MAX_SIZE);
> >>      _FDT((fdt_create_empty_tree(fdt, FDT_MAX_SIZE)));
> >> @@ -150,6 +282,22 @@ static void *powernv_create_fdt(PnvMachineState *pnv,
> >>          xscom_populate_fdt(pnv->chips[i]->xscom, fdt, 0);
> >>      }
> >>  
> >> +    /* cpus */
> >> +    cpus_offset = fdt_add_subnode(fdt, 0, "cpus");
> >> +    _FDT(cpus_offset);
> >> +    _FDT((fdt_setprop_cell(fdt, cpus_offset, "#address-cells", 0x1)));
> >> +    _FDT((fdt_setprop_cell(fdt, cpus_offset, "#size-cells", 0x0)));
> >> +
> >> +    for (i = 0; i < pnv->num_chips; i++) {
> >> +        PnvChip *chip = pnv->chips[i];
> >> +        int j;
> >> +
> >> +        for (j = 0; j < chip->num_cores; j++) {
> >> +            powernv_create_core_node(&chip->cores[j], fdt, cpus_offset,
> >> +                                     chip->chip_id);
> >> +        }
> >> +    }
> >> +
> >>      return fdt;
> >>  }
> >>  
> >> @@ -230,6 +378,11 @@ static void ppc_powernv_init(MachineState *machine)
> >>      for (i = 0; i < pnv->num_chips; i++) {
> >>          Object *chip = object_new(chip_typename);
> >>          object_property_set_int(chip, CHIP_HWID(i), "chip-id", 
> >> &error_abort);
> >> +        object_property_set_int(chip, smp_cores, "num-cores", 
> >> &error_abort);
> > 
> > This should probably be &error_fatal, again.
> 
> ok.
> 
> >> +        /*
> >> +         * We could set a custom cores_mask for the chip here.
> >> +         */
> >> +
> >>          object_property_set_bool(chip, true, "realized", &error_abort);
> >>          pnv->chips[i] = PNV_CHIP(chip);
> >>      }
> >> @@ -335,19 +488,70 @@ static const TypeInfo pnv_chip_power8e_info = {
> >>      .class_init    = pnv_chip_power8e_class_init,
> >>  };
> >>  
> >> +/*
> >> + * This is different for POWER9 so we might need a ops in the chip to
> >> + * calculate the core pirs
> >> + */
> >> +#define P8_PIR(chip_id, core_id) (((chip_id) << 7) | ((core_id) << 3))
> >> +
> >>  static void pnv_chip_realize(DeviceState *dev, Error **errp)
> >>  {
> >>      PnvChip *chip = PNV_CHIP(dev);
> >>      PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip);
> >> +    char *typename = pnv_core_typename(pcc->cpu_model);
> >> +    size_t typesize = object_type_get_instance_size(typename);
> >> +    int i, core_hwid;
> >> +
> >> +    if (!object_class_by_name(typename)) {
> >> +        error_setg(errp, "Unable to find PowerNV CPU Core '%s'", 
> >> typename);
> >> +        return;
> >> +    }
> >>  
> >>      /* Set up XSCOM bus */
> >>      chip->xscom = xscom_create(chip);
> >>  
> >> +    if (chip->num_cores > pcc->cores_max) {
> >> +        qemu_log_mask(LOG_GUEST_ERROR, "%s: too many cores for chip ! "
> >> +                      "Limiting to %d\n", __func__, pcc->cores_max);
> >> +        chip->num_cores = pcc->cores_max;
> >> +    }
> >> +
> >> +    chip->cores = g_new0(PnvCore, chip->num_cores);
> >> +
> >> +    /* no custom mask for this chip, let's use the default one from
> >> +     * the chip class */
> >> +    if (!chip->cores_mask) {
> >> +        chip->cores_mask = pcc->cores_mask;
> >> +    }
> >> +
> >> +    for (i = 0, core_hwid = 0; (core_hwid < sizeof(chip->cores_mask) * 8)
> >> +             && (i < chip->num_cores); core_hwid++) {
> >> +        PnvCore *pnv_core = &chip->cores[i];
> > 
> > 
> > Unfortunately, as with spapr core creating its threads you'll need
> > some fancier pointer manipulation to handle the possibility of
> > subtypes of PnvCore with a different instance size.  
> > That doesn't happen now, but it can in theory.
> 
> Yes. This is a reason why I preferred to have a Object **cores. I will 
> fix that.
> 
> I don't really like that :
> 
>       void *obj = chip->cores + i * size;
> 
> It does not feel "object-oriented".


Yeah, it's pretty clunky, but it's what we have for now.

> >> +
> >> +        if (!(chip->cores_mask & (1 << core_hwid))) {
> >> +            continue;
> >> +        }
> >> +
> >> +        object_initialize(pnv_core, typesize, typename);
> >> +        object_property_set_int(OBJECT(pnv_core), smp_threads, 
> >> "nr-threads",
> >> +                                &error_fatal);
> >> +        object_property_set_int(OBJECT(pnv_core),
> >> +                                P8_PIR(chip->chip_id, core_hwid),
> >> +                                CPU_CORE_PROP_CORE_ID, &error_fatal);
> >> +        object_property_set_bool(OBJECT(pnv_core), true, "realized",
> >> +                                 &error_fatal);
> >> +        object_unref(OBJECT(pnv_core));
> >> +        i++;
> >> +    }
> >> +    g_free(typename);
> >> +
> >>      pcc->realize(chip, errp);
> >>  }
> >>  
> >>  static Property pnv_chip_properties[] = {
> >>      DEFINE_PROP_UINT32("chip-id", PnvChip, chip_id, 0),
> >> +    DEFINE_PROP_UINT32("num-cores", PnvChip, num_cores, 1),
> > 
> > I suggest renaming this to "nr-cores" to match "nr-threads" inside the
> > core object.
> 
> ok. fine for me. 
> 
> >> +    DEFINE_PROP_UINT32("cores-mask", PnvChip, cores_mask, 0x0),
> >>      DEFINE_PROP_END_OF_LIST(),
> >>  };
> >>  
> >> diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c
> >> new file mode 100644
> >> index 000000000000..825aea1194a1
> >> --- /dev/null
> >> +++ b/hw/ppc/pnv_core.c
> >> @@ -0,0 +1,170 @@
> >> +/*
> >> + * QEMU PowerPC PowerNV CPU Core model
> >> + *
> >> + * Copyright (c) IBM Corporation.
> >> + *
> >> + * This library is free software; you can redistribute it and/or
> >> + * modify it under the terms of the GNU Lesser General Public License
> >> + * as published by the Free Software Foundation; either version 2 of
> >> + * the License, or (at your option) any later version.
> >> + *
> >> + * This library is distributed in the hope that it will be useful, but
> >> + * WITHOUT ANY WARRANTY; without even the implied warranty of
> >> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> >> + * Lesser General Public License for more details.
> >> + *
> >> + * You should have received a copy of the GNU Lesser General Public
> >> + * License along with this library; if not, see 
> >> <http://www.gnu.org/licenses/>.
> >> + */
> >> +#include "qemu/osdep.h"
> >> +#include "sysemu/sysemu.h"
> >> +#include "qapi/error.h"
> >> +#include "target-ppc/cpu.h"
> >> +#include "hw/ppc/ppc.h"
> >> +#include "hw/ppc/pnv.h"
> >> +#include "hw/ppc/pnv_core.h"
> >> +
> >> +static void powernv_cpu_reset(void *opaque)
> >> +{
> >> +    PowerPCCPU *cpu = opaque;
> >> +    CPUState *cs = CPU(cpu);
> >> +    CPUPPCState *env = &cpu->env;
> >> +    MachineState *machine = MACHINE(qdev_get_machine());
> >> +    PnvMachineState *pnv = POWERNV_MACHINE(machine);
> >> +
> >> +    cpu_reset(cs);
> >> +
> >> +    env->spr[SPR_PIR] = cs->cpu_index;
> > 
> > This can't work.  Your PIR values aren't contiguous, but cpu_index
> > values must be (until you get hotplug).
> 
> cpu_index are not contiguous indeed. They are assigned in pnv_core_realize()
> 
>         cs->cpu_index = cc->core_id + i;
> 
> For this to work, we need the four xics patches Nikunj is working 
> on plus some helper routines to assign and find an icp depending on 
> cs and not an index anymore.

That's not the only problem with cpu_index being non-contiguous.
Maybe we'll get to a point where that's possible but for the
forseeable future, the cpu_index will need to be contiguous (as long
as all possible cpus are present, anyway).

For now you should, if possible, derive the non-contiguous platform
ids from the contiguous cpu_index when you need them.

> I will revive the thread with an extra patch.
> 
> >> +    env->spr[SPR_HIOR] = 0;
> >> +    env->gpr[3] = pnv->fdt_addr;
> > 
> > Is the fdt address injected for all CPUs, or only the boot CPU?
> 
> skiboot will just pick just any boot cpu. My understanding is that 
> all need to have the flat device tree address in r3.

Ok.

-- 
David Gibson                    | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
                                | _way_ _around_!
http://www.ozlabs.org/~dgibson

signature.asc
Description: PGP signature

Re: [Qemu-devel] [PATCH v2 5/7] ppc/pnv: add a PnvCore object

Reply via email to