On 10/25/2016 07:08 AM, David Gibson wrote: > On Sat, Oct 22, 2016 at 11:46:44AM +0200, Cédric Le Goater wrote: >> This provides access to the MMIO based Interrupt Presentation >> Controllers (ICP) as found on a POWER8 system. >> >> A new XICSNative class is introduced to hold the MMIO region of the >> ICPs. Each thread of the system has a subregion, indexed by its PIR >> number, holding a XIVE (External Interrupt Vector Entry). This >> provides a mean to make the link with the ICPState of the CPU. >> >> Signed-off-by: Cédric Le Goater <c...@kaod.org> >> --- >> >> Changes since v4: >> >> - replaced the pir_table by memory subregions using an ICP. >> - removed the find_icp() and cpu_setup() handlers which became >> useless with the memory regions. >> - removed the superfluous inits done in xics_native_initfn. This is >> covered in the parent class init. >> - took ownership of the patch. >> >> default-configs/ppc64-softmmu.mak | 3 +- >> hw/intc/Makefile.objs | 1 + >> hw/intc/xics_native.c | 304 >> ++++++++++++++++++++++++++++++++++++++ >> include/hw/ppc/pnv.h | 19 +++ >> include/hw/ppc/xics.h | 24 +++ >> 5 files changed, 350 insertions(+), 1 deletion(-) >> create mode 100644 hw/intc/xics_native.c >> >> diff --git a/default-configs/ppc64-softmmu.mak >> b/default-configs/ppc64-softmmu.mak >> index 67a9bcaa67fa..a22c93a48686 100644 >> --- a/default-configs/ppc64-softmmu.mak >> +++ b/default-configs/ppc64-softmmu.mak >> @@ -48,8 +48,9 @@ CONFIG_PLATFORM_BUS=y >> CONFIG_ETSEC=y >> CONFIG_LIBDECNUMBER=y >> # For pSeries >> -CONFIG_XICS=$(CONFIG_PSERIES) >> +CONFIG_XICS=$(or $(CONFIG_PSERIES),$(CONFIG_POWERNV)) >> CONFIG_XICS_SPAPR=$(CONFIG_PSERIES) >> +CONFIG_XICS_NATIVE=$(CONFIG_POWERNV) >> CONFIG_XICS_KVM=$(and $(CONFIG_PSERIES),$(CONFIG_KVM)) >> # For PReP >> CONFIG_MC146818RTC=y >> diff --git a/hw/intc/Makefile.objs b/hw/intc/Makefile.objs >> index 2f44a2da26e9..e44a29d75b32 100644 >> --- a/hw/intc/Makefile.objs >> +++ b/hw/intc/Makefile.objs >> @@ -34,6 +34,7 @@ obj-$(CONFIG_RASPI) += bcm2835_ic.o bcm2836_control.o >> obj-$(CONFIG_SH4) += sh_intc.o >> obj-$(CONFIG_XICS) += xics.o >> obj-$(CONFIG_XICS_SPAPR) += xics_spapr.o >> +obj-$(CONFIG_XICS_NATIVE) += xics_native.o >> obj-$(CONFIG_XICS_KVM) += xics_kvm.o >> obj-$(CONFIG_ALLWINNER_A10_PIC) += allwinner-a10-pic.o >> obj-$(CONFIG_S390_FLIC) += s390_flic.o >> diff --git a/hw/intc/xics_native.c b/hw/intc/xics_native.c >> new file mode 100644 >> index 000000000000..bbdd786aeb50 >> --- /dev/null >> +++ b/hw/intc/xics_native.c >> @@ -0,0 +1,304 @@ >> +/* >> + * QEMU PowerPC PowerNV machine model >> + * >> + * Native version of ICS/ICP >> + * >> + * Copyright (c) 2016, IBM Corporation. >> + * >> + * This library is free software; you can redistribute it and/or >> + * modify it under the terms of the GNU Lesser General Public >> + * License as published by the Free Software Foundation; either >> + * version 2 of the License, or (at your option) any later version. >> + * >> + * This library is distributed in the hope that it will be useful, >> + * but WITHOUT ANY WARRANTY; without even the implied warranty of >> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU >> + * Lesser General Public License for more details. >> + * >> + * You should have received a copy of the GNU Lesser General Public >> + * License along with this library; if not, see >> <http://www.gnu.org/licenses/>. >> + */ >> + >> +#include "qemu/osdep.h" >> +#include "qapi/error.h" >> +#include "qemu-common.h" >> +#include "cpu.h" >> +#include "hw/hw.h" >> +#include "qemu/log.h" >> +#include "qapi/error.h" >> + >> +#include "hw/ppc/fdt.h" >> +#include "hw/ppc/xics.h" >> +#include "hw/ppc/pnv.h" >> + >> +#include <libfdt.h> >> + >> +static void xics_native_reset(void *opaque) >> +{ >> + device_reset(DEVICE(opaque)); >> +} >> + >> +static void xics_native_initfn(Object *obj) >> +{ >> + qemu_register_reset(xics_native_reset, obj); >> +} > > I think we need to investigate why the xics native is not showing up > on the SysBus. As a "raw" MMIO device, it really should.
Well, it has sysbus mmio region, but it is not created with qdev_create(...) so it is not under sysbus and the reset does not get called. That is my understanding of the problem. May be we shouldn't be using a sysbus mmio region ? > If it was, device_reset should be called without these shenannigans. yes. >> + >> +static uint64_t xics_native_read(void *opaque, hwaddr addr, unsigned width) >> +{ >> + ICPState *icp = opaque; >> + bool byte0 = (width == 1 && (addr & 0x3) == 0); >> + uint64_t val = 0xffffffff; >> + >> + switch (addr & 0xffc) { >> + case 0: /* poll */ >> + val = icp_ipoll(icp, NULL); >> + if (byte0) { >> + val >>= 24; >> + } else if (width != 4) { >> + goto bad_access; >> + } >> + break; >> + case 4: /* xirr */ >> + if (byte0) { >> + val = icp_ipoll(icp, NULL) >> 24; >> + } else if (width == 4) { >> + val = icp_accept(icp); >> + } else { >> + goto bad_access; >> + } >> + break; >> + case 12: >> + if (byte0) { >> + val = icp->mfrr; >> + } else { >> + goto bad_access; >> + } >> + break; >> + case 16: >> + if (width == 4) { >> + val = icp->links[0]; >> + } else { >> + goto bad_access; >> + } >> + break; >> + case 20: >> + if (width == 4) { >> + val = icp->links[1]; >> + } else { >> + goto bad_access; >> + } >> + break; >> + case 24: >> + if (width == 4) { >> + val = icp->links[2]; >> + } else { >> + goto bad_access; >> + } >> + break; >> + default: >> +bad_access: >> + qemu_log_mask(LOG_GUEST_ERROR, "XICS: Bad ICP access 0x%" >> + HWADDR_PRIx"/%d\n", addr, width); >> + } >> + >> + return val; >> +} >> + >> +static void xics_native_write(void *opaque, hwaddr addr, uint64_t val, >> + unsigned width) >> +{ >> + ICPState *icp = opaque; >> + bool byte0 = (width == 1 && (addr & 0x3) == 0); >> + >> + switch (addr & 0xffc) { >> + case 4: /* xirr */ >> + if (byte0) { >> + icp_set_cppr(icp, val); >> + } else if (width == 4) { >> + icp_eoi(icp, val); >> + } else { >> + goto bad_access; >> + } >> + break; >> + case 12: >> + if (byte0) { >> + icp_set_mfrr(icp, val); >> + } else { >> + goto bad_access; >> + } >> + break; >> + case 16: >> + if (width == 4) { >> + icp->links[0] = val; >> + } else { >> + goto bad_access; >> + } >> + break; >> + case 20: >> + if (width == 4) { >> + icp->links[1] = val; >> + } else { >> + goto bad_access; >> + } >> + break; >> + case 24: >> + if (width == 4) { >> + icp->links[2] = val; >> + } else { >> + goto bad_access; >> + } >> + break; >> + default: >> +bad_access: >> + qemu_log_mask(LOG_GUEST_ERROR, "XICS: Bad ICP access 0x%" >> + HWADDR_PRIx"/%d\n", addr, width); >> + } >> +} >> + >> +static const MemoryRegionOps xics_native_ops = { >> + .read = xics_native_read, >> + .write = xics_native_write, >> + .valid.min_access_size = 1, >> + .valid.max_access_size = 4, >> + .impl.min_access_size = 1, >> + .impl.max_access_size = 4, >> + .endianness = DEVICE_BIG_ENDIAN, >> +}; >> + >> +static uint64_t xics_native_default_read(void *opaque, hwaddr addr, >> + unsigned width) >> +{ >> + qemu_log_mask(LOG_GUEST_ERROR, "XICS: Bad ICP access 0x%" >> + HWADDR_PRIx"/%d\n", addr, width); >> + return 0xffffffffffffffffull; >> +} >> + >> +static void xics_native_default_write(void *opaque, hwaddr addr, uint64_t >> val, >> + unsigned width) >> +{ >> + qemu_log_mask(LOG_GUEST_ERROR, "XICS: Bad ICP access 0x%" >> + HWADDR_PRIx"/%d\n", addr, width); >> +} >> + >> +static const MemoryRegionOps xics_native_default_ops = { >> + .read = xics_native_default_read, >> + .write = xics_native_default_write, >> + .valid.min_access_size = 1, >> + .valid.max_access_size = 4, >> + .impl.min_access_size = 1, >> + .impl.max_access_size = 4, >> + .endianness = DEVICE_BIG_ENDIAN, >> +}; >> + >> +static void xics_native_set_nr_servers(XICSState *xics, uint32_t nr_servers, >> + Error **errp) >> +{ >> + xics_set_nr_servers(xics, nr_servers, TYPE_ICP, errp); >> +} >> + >> +static void xics_native_realize(DeviceState *dev, Error **errp) >> +{ >> + XICSState *xics = XICS_COMMON(dev); >> + XICSNative *xicsn = XICS_NATIVE(dev); >> + Error *error = NULL; >> + int i; >> + >> + if (!xics->nr_servers) { >> + error_setg(errp, "Number of servers needs to be greater than 0"); >> + return; >> + } >> + >> + for (i = 0; i < xics->nr_servers; i++) { >> + object_property_set_bool(OBJECT(&xics->ss[i]), true, "realized", >> + &error); >> + if (error) { >> + error_propagate(errp, error); >> + return; >> + } >> + } >> + >> + /* >> + * Initialize the container region for the ICPs and the subregion >> + * for each cpu. The mmapping will be done at the board level >> + * depending on the pir of the core. >> + * >> + * TODO: build a name with the chip id >> + */ >> + memory_region_init_io(&xicsn->icp_mmio, OBJECT(dev), >> + &xics_native_default_ops, xicsn, "icp-0", >> + PNV_XICS_SIZE); > > I don't think you should need these native ops. I believe you can > construct a memory region as a "pure" container, then just put the > populated regions inside it. It is a way to track bogus read/writes the guest shouldn't be doing but it is not that important. I can remove it. Thanks C. >> + sysbus_init_mmio(SYS_BUS_DEVICE(dev), &xicsn->icp_mmio); >> + >> + xicsn->icp_mmios = g_new0(MemoryRegion, xics->nr_servers); >> + for (i = 0; i < xics->nr_servers; i++) { >> + ICPState *icp = &xics->ss[i]; >> + memory_region_init_io(&xicsn->icp_mmios[i], OBJECT(dev), >> + &xics_native_ops, icp, "icp-0", 0x1000); >> + } >> +} >> + >> +static void xics_native_class_init(ObjectClass *oc, void *data) >> +{ >> + DeviceClass *dc = DEVICE_CLASS(oc); >> + XICSStateClass *xsc = XICS_NATIVE_CLASS(oc); >> + >> + dc->realize = xics_native_realize; >> + xsc->set_nr_servers = xics_native_set_nr_servers; >> +} >> + >> +static const TypeInfo xics_native_info = { >> + .name = TYPE_XICS_NATIVE, >> + .parent = TYPE_XICS_COMMON, >> + .instance_size = sizeof(XICSNative), >> + .class_size = sizeof(XICSStateClass), >> + .class_init = xics_native_class_init, >> + .instance_init = xics_native_initfn, >> +}; >> + >> +static void xics_native_register_types(void) >> +{ >> + type_register_static(&xics_native_info); >> +} >> + >> +type_init(xics_native_register_types) >> + >> +void xics_native_populate_icp(PnvChip *chip, void *fdt, int offset, >> + uint32_t pir, uint32_t count) >> +{ >> + uint64_t addr; >> + char *name; >> + const char compat[] = "IBM,power8-icp\0IBM,ppc-xicp"; >> + uint32_t irange[2], i, rsize; >> + uint64_t *reg; >> + >> + /* >> + * TODO: add multichip ICP BAR >> + */ >> + addr = PNV_XICS_BASE | (pir << 12); >> + >> + irange[0] = cpu_to_be32(pir); >> + irange[1] = cpu_to_be32(count); >> + >> + rsize = sizeof(uint64_t) * 2 * count; >> + reg = g_malloc(rsize); >> + for (i = 0; i < count; i++) { >> + reg[i * 2] = cpu_to_be64(addr | ((pir + i) * 0x1000)); >> + reg[i * 2 + 1] = cpu_to_be64(0x1000); >> + } >> + >> + name = g_strdup_printf("interrupt-controller@%"PRIX64, addr); >> + offset = fdt_add_subnode(fdt, offset, name); >> + _FDT(offset); >> + g_free(name); >> + >> + _FDT((fdt_setprop(fdt, offset, "compatible", compat, sizeof(compat)))); >> + _FDT((fdt_setprop(fdt, offset, "reg", reg, rsize))); >> + _FDT((fdt_setprop_string(fdt, offset, "device_type", >> + "PowerPC-External-Interrupt-Presentation"))); >> + _FDT((fdt_setprop(fdt, offset, "interrupt-controller", NULL, 0))); >> + _FDT((fdt_setprop(fdt, offset, "ibm,interrupt-server-ranges", >> + irange, sizeof(irange)))); >> + _FDT((fdt_setprop_cell(fdt, offset, "#interrupt-cells", 1))); >> + _FDT((fdt_setprop_cell(fdt, offset, "#address-cells", 0))); >> + g_free(reg); >> +} >> diff --git a/include/hw/ppc/pnv.h b/include/hw/ppc/pnv.h >> index 02ac1c5f428e..c08ed1c72b17 100644 >> --- a/include/hw/ppc/pnv.h >> +++ b/include/hw/ppc/pnv.h >> @@ -126,4 +126,23 @@ typedef struct PnvMachineState { >> #define PNV_XSCOM_BASE(chip) \ >> (chip->xscom_base + ((uint64_t)(chip)->chip_id) * PNV_XSCOM_SIZE) >> >> +/* >> + * XSCOM 0x20109CA defines the ICP BAR: >> + * >> + * 0:29 : bits 14 to 43 of address to define 1 MB region. >> + * 30 : 1 to enable ICP to receive loads/stores against its BAR region >> + * 31:63 : Constant 0 >> + * >> + * Usually defined as : >> + * >> + * 0xffffe00200000000 -> 0x0003ffff80000000 >> + * 0xffffe00600000000 -> 0x0003ffff80100000 >> + * 0xffffe02200000000 -> 0x0003ffff80800000 >> + * 0xffffe02600000000 -> 0x0003ffff80900000 >> + * >> + * TODO: make a macro using the chip hw id >> + */ >> +#define PNV_XICS_BASE 0x0003ffff80000000ull >> +#define PNV_XICS_SIZE 0x0000000000100000ull >> + >> #endif /* _PPC_PNV_H */ >> diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h >> index 6e5a113332c6..911cdd5e549f 100644 >> --- a/include/hw/ppc/xics.h >> +++ b/include/hw/ppc/xics.h >> @@ -119,8 +119,27 @@ struct ICPState { >> bool cap_irq_xics_enabled; >> >> XICSState *xics; >> + >> + /* >> + * for XICSNative (not used by Linux). >> + */ >> + uint32_t links[3]; >> }; >> >> +#define TYPE_XICS_NATIVE "xics-native" >> +#define XICS_NATIVE(obj) OBJECT_CHECK(XICSNative, (obj), TYPE_XICS_NATIVE) >> +#define XICS_NATIVE_CLASS(klass) \ >> + OBJECT_CLASS_CHECK(XICSStateClass, (klass), TYPE_XICS_NATIVE) >> +#define XICS_NATIVE_GET_CLASS(obj) \ >> + OBJECT_CLASS_CHECK(XICSStateClass, (obj), TYPE_XICS_NATIVE) >> + >> +typedef struct XICSNative { >> + XICSState parent_obj; >> + >> + MemoryRegion icp_mmio; >> + MemoryRegion *icp_mmios; >> +} XICSNative; >> + >> #define TYPE_ICS_BASE "ics-base" >> #define ICS_BASE(obj) OBJECT_CHECK(ICSState, (obj), TYPE_ICS_BASE) >> >> @@ -209,4 +228,9 @@ void ics_set_irq_type(ICSState *ics, int srcno, bool >> lsi); >> >> ICSState *xics_find_source(XICSState *icp, int irq); >> >> +typedef struct PnvChip PnvChip; >> + >> +void xics_native_populate_icp(PnvChip *chip, void *fdt, int offset, >> + uint32_t base, uint32_t count); >> + >> #endif /* XICS_H */ >