Hi, On Mon, Jan 5, 2015 at 6:13 PM, Alexander Graf <ag...@suse.de> wrote: > > > On 21.11.14 19:07, Alvise Rigo wrote: >> Add a generic PCI host controller for virtual platforms, based on the >> previous work by Rob Herring: >> http://lists.gnu.org/archive/html/qemu-devel/2014-06/msg03482.html >> >> The controller creates a PCI bus for PCI devices; it is intended to >> receive from the platform all the needed information to initiate the >> memory regions expected by the PCI system. Due to this dependence, a >> header file is used to define the structure that the platform has to >> fill with the data needed by the controller. The device provides also a >> routine for the device tree node generation, which mostly has to take >> care of the interrupt-map property generation. This property is fetched >> by the guest operating system to map any PCI interrupt to the interrupt >> controller. For the time being, the device expects a GIC v2 to be used >> by the guest. >> Only mach-virt has been used to test the controller. >> >> Signed-off-by: Alvise Rigo <a.r...@virtualopensystems.com> >> --- >> hw/pci-host/Makefile.objs | 2 +- >> hw/pci-host/generic-pci.c | 280 >> ++++++++++++++++++++++++++++++++++++++ >> include/hw/pci-host/generic-pci.h | 74 ++++++++++ >> 3 files changed, 355 insertions(+), 1 deletion(-) >> create mode 100644 hw/pci-host/generic-pci.c >> create mode 100644 include/hw/pci-host/generic-pci.h >> >> diff --git a/hw/pci-host/Makefile.objs b/hw/pci-host/Makefile.objs >> index bb65f9c..8ef9fac 100644 >> --- a/hw/pci-host/Makefile.objs >> +++ b/hw/pci-host/Makefile.objs >> @@ -1,4 +1,4 @@ >> -common-obj-y += pam.o >> +common-obj-y += pam.o generic-pci.o >> >> # PPC devices >> common-obj-$(CONFIG_PREP_PCI) += prep.o >> diff --git a/hw/pci-host/generic-pci.c b/hw/pci-host/generic-pci.c >> new file mode 100644 >> index 0000000..9c90263 >> --- /dev/null >> +++ b/hw/pci-host/generic-pci.c >> @@ -0,0 +1,280 @@ >> +/* >> + * Generic PCI host controller >> + * >> + * Copyright (c) 2014 Linaro, Ltd. >> + * Author: Rob Herring <rob.herr...@linaro.org> >> + * >> + * Based on ARM Versatile PCI controller (hw/pci-host/versatile.c): >> + * Copyright (c) 2006-2009 CodeSourcery. >> + * Written by Paul Brook >> + * >> + * This code is licensed under the LGPL. >> + */ >> + >> +#include "hw/sysbus.h" >> +#include "hw/pci-host/generic-pci.h" >> +#include "exec/address-spaces.h" >> +#include "sysemu/device_tree.h" >> + >> +static const VMStateDescription pci_generic_host_vmstate = { >> + .name = "generic-host-pci", >> + .version_id = 1, >> + .minimum_version_id = 1, >> +}; >> + >> +static void pci_cam_config_write(void *opaque, hwaddr addr, >> + uint64_t val, unsigned size) >> +{ >> + PCIGenState *s = opaque; >> + pci_data_write(&s->pci_bus, addr, val, size); >> +} >> + >> +static uint64_t pci_cam_config_read(void *opaque, hwaddr addr, unsigned >> size) >> +{ >> + PCIGenState *s = opaque; >> + uint32_t val; >> + val = pci_data_read(&s->pci_bus, addr, size); >> + return val; >> +} >> + >> +static const MemoryRegionOps pci_vpb_config_ops = { >> + .read = pci_cam_config_read, >> + .write = pci_cam_config_write, >> + .endianness = DEVICE_NATIVE_ENDIAN, > > Never use NATIVE_ENDIAN unless you're 100% sure it's correct. In this > case, please make it LITTLE_ENDIAN.
Agreed. > >> +}; >> + >> +static void pci_generic_set_irq(void *opaque, int irq_num, int level) >> +{ >> + qemu_irq *pic = opaque; >> + qemu_set_irq(pic[irq_num], level); >> +} >> + >> +static void pci_generic_host_init(Object *obj) >> +{ >> + PCIHostState *h = PCI_HOST_BRIDGE(obj); >> + PCIGenState *s = PCI_GEN(obj); >> + GenericPCIHostState *gps = &s->pci_gen; >> + >> + memory_region_init(&s->pci_io_space, OBJECT(s), "pci_io", 1ULL << 32); > > Why is IO space that big? It's only 64k usually, no? This was just to prevent a definition of the io space smaller than the actual size of the memory region. This could be avoided as you suggested later, by postponing the creation of the PCIBus. > >> + memory_region_init(&s->pci_mem_space, OBJECT(s), "pci_mem", 1ULL << 32); >> + >> + pci_bus_new_inplace(&s->pci_bus, sizeof(s->pci_bus), DEVICE(obj), "pci", >> + &s->pci_mem_space, &s->pci_io_space, >> + PCI_DEVFN(0, 0), TYPE_PCIE_BUS); >> + h->bus = &s->pci_bus; >> + >> + object_initialize(gps, sizeof(*gps), TYPE_GENERIC_PCI_HOST); >> + qdev_set_parent_bus(DEVICE(gps), BUS(&s->pci_bus)); >> + gps->devfns = 0; >> +} >> + >> +static int generic_pci_map_irq_fn(PCIDevice *pci_dev, int pin) >> +{ >> + BusState *bus = qdev_get_parent_bus(&pci_dev->qdev); >> + PCIBus *pci_bus = PCI_BUS(bus); >> + PCIDevice *pdev = pci_bus->devices[PCI_DEVFN(0, 0)]; >> + GenericPCIHostState *gps = PCI_GEN_HOST(pdev); >> + >> + return gps->irqmap.devfn_idx_map[PCI_SLOT(pci_dev->devfn)] >> + [PCI_FUNC(pci_dev->devfn)]; >> +} >> + >> +static void pci_generic_host_realize(DeviceState *dev, Error **errp) >> +{ >> + PCIGenState *s = PCI_GEN(dev); >> + SysBusDevice *sbd = SYS_BUS_DEVICE(dev); >> + GenericPCIPlatformData *pdata = &s->plat_data; > > Where does this come from? Why isn't it exposed as qdev parameters? It comes from the platform (e.g. mach-virt). Indeed, I will expose them as qdev properties. > >> + int i; >> + >> + if (!pdata->addr_map[PCI_CFG].size || !pdata->addr_map[PCI_IO].size || >> + !pdata->addr_map[PCI_MEM].size || !pdata->irqs) { >> + hw_error("generic_pci: PCI controller not fully configured."); >> + } >> + >> + for (i = 0; i < pdata->irqs; i++) { >> + sysbus_init_irq(sbd, &s->irq[i]); >> + } >> + >> + pci_bus_irqs(&s->pci_bus, pci_generic_set_irq, generic_pci_map_irq_fn, >> + s->irq, pdata->irqs); >> + >> + memory_region_init_io(&s->mem_config, OBJECT(s), &pci_vpb_config_ops, s, >> + "pci-config", pdata->addr_map[PCI_CFG].size); >> + sysbus_init_mmio(sbd, &s->mem_config); >> + >> + /* Depending on the available memory of the board using the controller, >> we >> + create a window on both the I/O and mememory space. */ > > meme? Ack. > >> + memory_region_init_alias(&s->pci_io_window, OBJECT(s), "pci-io-win", >> + &s->pci_io_space, 0, >> + pdata->addr_map[PCI_IO].size); >> + sysbus_init_mmio(sbd, &s->pci_io_window); >> + >> + memory_region_init_alias(&s->pci_mem_window, OBJECT(s), "pci-mem-win", >> + &s->pci_mem_space, >> + pdata->addr_map[PCI_MEM].addr, >> + pdata->addr_map[PCI_MEM].size); >> + sysbus_init_mmio(sbd, &s->pci_mem_window); > > What if we simply postpone the creation of those regions and the > creation of the PCIBus to the realize function? At that point we know > the size of the regions. I'm not sure but I think I've already tried this path and for some reason I've abandoned it. I will explore it again and I'll let you know. > >> + >> + /* TODO Remove once realize propagates to child devices. */ >> + object_property_set_bool(OBJECT(&s->pci_gen), true, "realized", errp); > > Is this still necessary? In fact, wouldn't the realize of our PHB and > the creation of child devices happen consecutively usually? Sure, I will fix it. > >> +} >> + >> +static void pci_generic_host_class_init(ObjectClass *klass, void *data) >> +{ >> + PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); >> + DeviceClass *dc = DEVICE_CLASS(klass); >> + >> + k->vendor_id = PCI_VENDOR_ID_REDHAT; >> + k->device_id = 0x1234; > > Is this a reserved ID? Not at all. I see now from the documentation (docs/specs/pci-ids.txt) that a value within the range 0000 -> 00ff should be used. I suppose that eventually we will pick up one of the available? > >> + k->class_id = PCI_CLASS_PROCESSOR_CO; >> + /* >> + * PCI-facing part of the host bridge, not usable without the >> + * host-facing part, which can't be device_add'ed, yet. >> + */ >> + dc->cannot_instantiate_with_device_add_yet = true; >> +} >> + >> +struct dt_irq_mapping { >> + int irqs; >> + uint32_t gic_phandle; >> + int base_irq_num; >> + uint64_t *data; >> +}; >> + >> +/* */ >> +#define IRQ_MAP_ENTRY_DESC_SZ 14 >> +static void gen_int_mapping_fn(PCIBus *b, PCIDevice *d, void *opaque) >> +{ >> + struct dt_irq_mapping *map_data; >> + int pin; >> + >> + PCIDevice *pdev = b->devices[PCI_DEVFN(0, 0)]; >> + GenericPCIHostState *gps = PCI_GEN_HOST(pdev); >> + map_data = (struct dt_irq_mapping *)opaque; >> + >> + /* Check if the platform has enough IRQs available. */ >> + if (gps->devfns > map_data->irqs) { >> + hw_error("generic_pci: too many PCI devices."); >> + } >> + >> + pin = pci_get_byte(d->config + PCI_INTERRUPT_PIN); >> + if (pin) { >> + uint64_t *data_ptr = map_data->data + (IRQ_MAP_ENTRY_DESC_SZ * >> gps->devfns); >> + uint8_t pci_slot = PCI_SLOT(d->devfn); >> + uint8_t pci_func = PCI_FUNC(d->devfn); >> + uint8_t *devfn_idx = &gps->irqmap.devfn_idx_map[pci_slot][0]; >> + uint8_t *devfn_irq = gps->irqmap.devfn_irq_map; >> + >> + devfn_idx[pci_func] = gps->devfns; >> + devfn_irq[gps->devfns] = map_data->base_irq_num + gps->devfns; >> + >> + uint64_t buffer[IRQ_MAP_ENTRY_DESC_SZ] = >> + {1, (pci_slot << 11)|(pci_func << 8), 2, 0x00000000, 1, pin, >> + 1, map_data->gic_phandle, 1, 0x0, 1, devfn_irq[gps->devfns], >> + 1, 0x1}; >> + >> + memcpy(data_ptr, buffer, IRQ_MAP_ENTRY_DESC_SZ * sizeof(*buffer)); >> + gps->devfns++; >> + } >> +} >> + >> +/* Generate the "interrup-map" node's data and store it in map_data */ >> +static void generate_int_mapping(struct dt_irq_mapping *map_data, >> + PCIGenState *s) >> +{ >> + pci_for_each_device(s->parent_obj.bus, 0, gen_int_mapping_fn, map_data); > > The interrupt-map should describe interrupt mappings for every slot, not > every device. If you only describe the current state of affairs, hotplug > won't work. Ack, as agreed also with Peter in the other thread. > >> +} >> + >> +static void generate_dt_node(void *fdt, DeviceState *dev) >> +{ >> + PCIGenState *s = PCI_GEN(dev); >> + char *nodename; >> + uint32_t gic_phandle; >> + uint32_t plat_acells; >> + uint32_t plat_scells; >> + >> + SysBusDevice *busdev; >> + busdev = SYS_BUS_DEVICE(dev); >> + MemoryRegion *cfg = sysbus_mmio_get_region(busdev, 0); >> + MemoryRegion *io = sysbus_mmio_get_region(busdev, 1); >> + MemoryRegion *mem = sysbus_mmio_get_region(busdev, 2); >> + >> + nodename = g_strdup_printf("/pci@%" PRIx64, cfg->addr); >> + qemu_fdt_add_subnode(fdt, nodename); >> + qemu_fdt_setprop_string(fdt, nodename, "compatible", >> + "pci-host-cam-generic"); >> + qemu_fdt_setprop_string(fdt, nodename, "device_type", "pci"); >> + qemu_fdt_setprop_cell(fdt, nodename, "#address-cells", 0x3); >> + qemu_fdt_setprop_cell(fdt, nodename, "#size-cells", 0x2); >> + qemu_fdt_setprop_cell(fdt, nodename, "#interrupt-cells", 0x1); >> + >> + plat_acells = qemu_fdt_getprop_cell(fdt, "/", "#address-cells"); >> + plat_scells = qemu_fdt_getprop_cell(fdt, "/", "#size-cells"); >> + qemu_fdt_setprop_sized_cells(fdt, nodename, "reg", plat_acells, >> cfg->addr, >> + plat_scells, >> memory_region_size(cfg)); >> + >> + qemu_fdt_setprop_sized_cells(fdt, nodename, "ranges", >> + 1, 0x01000000, 2, 0x00000000, /* I/O space */ >> + 2, io->addr, 2, memory_region_size(io), >> + 1, 0x02000000, 2, mem->addr, /* 32bit memory space >> */ >> + 2, mem->addr, 2, memory_region_size(mem)); >> + >> + gic_phandle = qemu_fdt_get_phandle(fdt, s->plat_data.gic_node_name); >> + /* A mask covering bits [15,8] of phys.high allows to honor the >> + function number when resolving a triggered PCI interrupt. */ >> + qemu_fdt_setprop_sized_cells(fdt, nodename, "interrupt-map-mask", >> + 1, 0xff00, 1, 0x0, 1, 0x0, 1, 0x7); >> + >> + uint64_t *int_mapping_data = g_malloc0(IRQ_MAP_ENTRY_DESC_SZ * >> + sizeof(uint64_t) * >> s->plat_data.irqs); >> + struct dt_irq_mapping dt_map_data = { >> + .irqs = s->plat_data.irqs, >> + .gic_phandle = gic_phandle, >> + .base_irq_num = s->plat_data.base_irq, >> + .data = int_mapping_data >> + }; >> + /* Generate the interrupt mapping according to the devices attached >> + * to the PCI bus of the controller. */ >> + generate_int_mapping(&dt_map_data, s); >> + qemu_fdt_setprop_sized_cells_from_array(fdt, nodename, "interrupt-map", >> + (s->pci_gen.devfns * IRQ_MAP_ENTRY_DESC_SZ)/2, >> int_mapping_data); >> + >> + g_free(int_mapping_data); >> + g_free(nodename); >> +} >> + >> +void pci_controller_build_dt_node(void *fdt, DeviceState *dev) >> +{ >> + generate_dt_node(fdt, dev); > > Device tree generation should *never* be part of device code. It's > machine / board specific. If one day someone can convince me that it's Actually you already convinced me that the device tree generation should stay in the board code :) I will rework the code accordingly. Thank you for your feedback, alvise > safe to share device tree bits of one device with multiple boards I'm > happy to work with them to achieve that goal then, but for now, please > leave device tree bits in the machine logic. > > > Alex > >> +} >> + >> +static const TypeInfo pci_generic_host_info = { >> + .name = TYPE_GENERIC_PCI_HOST, >> + .parent = TYPE_PCI_DEVICE, >> + .instance_size = sizeof(GenericPCIHostState), >> + .class_init = pci_generic_host_class_init, >> +}; >> + >> +static void pci_generic_class_init(ObjectClass *klass, void *data) >> +{ >> + DeviceClass *dc = DEVICE_CLASS(klass); >> + >> + dc->realize = pci_generic_host_realize; >> + dc->vmsd = &pci_generic_host_vmstate; >> +} >> + >> +static const TypeInfo pci_generic_info = { >> + .name = TYPE_GENERIC_PCI, >> + .parent = TYPE_PCI_HOST_BRIDGE, >> + .instance_size = sizeof(PCIGenState), >> + .instance_init = pci_generic_host_init, >> + .class_init = pci_generic_class_init, >> +}; >> + >> +static void generic_pci_host_register_types(void) >> +{ >> + type_register_static(&pci_generic_info); >> + type_register_static(&pci_generic_host_info); >> +} >> + >> +type_init(generic_pci_host_register_types) >> \ No newline at end of file >> diff --git a/include/hw/pci-host/generic-pci.h >> b/include/hw/pci-host/generic-pci.h >> new file mode 100644 >> index 0000000..43c7a0f >> --- /dev/null >> +++ b/include/hw/pci-host/generic-pci.h >> @@ -0,0 +1,74 @@ >> +#ifndef QEMU_GENERIC_PCI_H >> +#define QEMU_GENERIC_PCI_H >> + >> +#include "hw/pci/pci.h" >> +#include "hw/pci/pci_bus.h" >> +#include "hw/pci/pci_host.h" >> + >> +#define MAX_PCI_DEVICES (PCI_SLOT_MAX * PCI_FUNC_MAX) >> + >> +enum { >> + PCI_CFG = 0, >> + PCI_IO, >> + PCI_MEM, >> +}; >> + >> +typedef struct { >> + /* addr_map[PCI_CFG].addr = base address of configuration memory >> + addr_map[PCI_CFG].size = configuration memory size >> + ... */ >> + struct addr_map { >> + hwaddr addr; >> + hwaddr size; >> + } addr_map[3]; >> + >> + const char *gic_node_name; >> + uint32_t base_irq; >> + uint32_t irqs; >> +} GenericPCIPlatformData; >> + >> +typedef struct { >> + PCIDevice parent_obj; >> + >> + struct irqmap { >> + /* devfn_idx_map[i][j] = index inside slot_irq_map for device at >> slot i, >> + fn j */ >> + uint8_t devfn_idx_map[PCI_SLOT_MAX][PCI_FUNC_MAX]; >> + /* devfn_irq_map[i] = IRQ num. of the i-th device (devfn) attached >> to >> + the bus */ >> + uint8_t devfn_irq_map[MAX_PCI_DEVICES]; >> + } irqmap; >> + /* number of devices attached to the bus */ >> + uint32_t devfns; >> +} GenericPCIHostState; >> + >> +typedef struct { >> + PCIHostState parent_obj; >> + >> + qemu_irq irq[MAX_PCI_DEVICES]; >> + MemoryRegion mem_config; >> + /* Containers representing the PCI address spaces */ >> + MemoryRegion pci_io_space; >> + MemoryRegion pci_mem_space; >> + /* Alias regions into PCI address spaces which we expose as sysbus >> regions. >> + * The offsets into pci_mem_space is fixed. */ >> + MemoryRegion pci_io_window; >> + MemoryRegion pci_mem_window; >> + PCIBus pci_bus; >> + GenericPCIHostState pci_gen; >> + >> + /* Platform dependent data */ >> + GenericPCIPlatformData plat_data; >> +} PCIGenState; >> + >> +#define TYPE_GENERIC_PCI "generic_pci" >> +#define PCI_GEN(obj) \ >> + OBJECT_CHECK(PCIGenState, (obj), TYPE_GENERIC_PCI) >> + >> +#define TYPE_GENERIC_PCI_HOST "generic_pci_host" >> +#define PCI_GEN_HOST(obj) \ >> + OBJECT_CHECK(GenericPCIHostState, (obj), TYPE_GENERIC_PCI_HOST) >> + >> +void pci_controller_build_dt_node(void *fdt, DeviceState *dev); >> + >> +#endif >> \ No newline at end of file >>