On 28.10.2011, at 03:56, David Gibson wrote: > From: Alexey Kardashevskiy <a...@au1.ibm.com> > > This patch adds a PCI bus to the pseries machine. This instantiates > the qemu generic PCI bus code, advertises a PCI host bridge in the > guest's device tree and implements the RTAS methods specified by PAPR > to access PCI config space. It also sets up the memory regions we > need to provide windows into the PCI memory and IO space, and > advertises those to the guest. > > However, because qemu can't yet emulate an IOMMU, which is mandatory on > pseries, PCI devices which use DMA (i.e. most of them) will not work with > this code alone. Still, this is enough to support the virtio_pci device > (which probably _should_ use emulated PCI DMA, but is specced to use > direct hypervisor access to guest physical memory instead). > > Signed-off-by: Alexey Kardashevskiy <a...@au1.ibm.com> > Signed-off-by: David Gibson <da...@gibson.dropbear.id.au> > --- > Makefile.target | 3 + > hw/spapr.c | 36 ++++- > hw/spapr.h | 2 + > hw/spapr_pci.c | 515 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ > hw/spapr_pci.h | 59 +++++++ > 5 files changed, 611 insertions(+), 4 deletions(-) > create mode 100644 hw/spapr_pci.c > create mode 100644 hw/spapr_pci.h > > diff --git a/Makefile.target b/Makefile.target > index fe5f6f7..f3eb842 100644 > --- a/Makefile.target > +++ b/Makefile.target > @@ -248,6 +248,9 @@ obj-ppc-y += ppc_newworld.o > # IBM pSeries (sPAPR) > obj-ppc-$(CONFIG_PSERIES) += spapr.o spapr_hcall.o spapr_rtas.o spapr_vio.o > obj-ppc-$(CONFIG_PSERIES) += xics.o spapr_vty.o spapr_llan.o spapr_vscsi.o > +ifeq ($(CONFIG_PCI),y) > +obj-ppc-$(CONFIG_PSERIES) += spapr_pci.o device-hotplug.o pci-hotplug.o > +endif
You make it conditional here ... > # PowerPC 4xx boards > obj-ppc-y += ppc4xx_devs.o ppc4xx_pci.o ppc405_uc.o ppc405_boards.o > obj-ppc-y += ppc440.o ppc440_bamboo.o > diff --git a/hw/spapr.c b/hw/spapr.c > index 933af32..bdaa938 100644 > --- a/hw/spapr.c > +++ b/hw/spapr.c > @@ -39,10 +39,12 @@ > > #include "hw/spapr.h" > #include "hw/spapr_vio.h" > +#include "hw/spapr_pci.h" > #include "hw/xics.h" > > #include "kvm.h" > #include "kvm_ppc.h" > +#include "pci.h" ... but not here. Just throw away the condition above. We don't need to support -M pseries without PCI. > > #include "exec-memory.h" > > @@ -62,6 +64,11 @@ > #define MAX_CPUS 256 > #define XICS_IRQS 1024 > > +#define SPAPR_PCI_BUID 0x800000020000001ULL > +#define SPAPR_PCI_MEM_WIN_ADDR (0x10000000000ULL + 0xA0000000) > +#define SPAPR_PCI_MEM_WIN_SIZE 0x20000000 > +#define SPAPR_PCI_IO_WIN_ADDR (0x10000000000ULL + 0x80000000) > + > #define PHANDLE_XICP 0x00001111 > > sPAPREnvironment *spapr; > @@ -146,6 +153,14 @@ static void *spapr_create_fdt_skel(const char *cpu_model, > &end_prop, sizeof(end_prop)))); > _FDT((fdt_property_string(fdt, "qemu,boot-device", boot_device))); > > + /* > + * Because we don't always invoke any firmware, we can't rely on > + * that to do BAR allocation. Long term, we should probably do > + * that ourselves, but for now, this setting (plus advertising the > + * current BARs as 0) causes sufficiently recent kernels to to the > + * BAR assignment themselves */ > + _FDT((fdt_property_cell(fdt, "linux,pci-probe-only", 0))); > + > _FDT((fdt_end_node(fdt))); > > /* memory node(s) */ > @@ -308,6 +323,7 @@ static void spapr_finalize_fdt(sPAPREnvironment *spapr, > { > int ret; > void *fdt; > + sPAPRPHBState *phb; > > fdt = g_malloc(FDT_MAX_SIZE); > > @@ -320,6 +336,15 @@ static void spapr_finalize_fdt(sPAPREnvironment *spapr, > exit(1); > } > > + QLIST_FOREACH(phb, &spapr->phbs, list) { > + ret = spapr_populate_pci_devices(phb, PHANDLE_XICP, fdt); > + } > + > + if (ret < 0) { > + fprintf(stderr, "couldn't setup PCI devices in fdt\n"); > + exit(1); > + } > + > /* RTAS */ > ret = spapr_rtas_device_tree_setup(fdt, rtas_addr, rtas_size); > if (ret < 0) { > @@ -478,6 +503,12 @@ static void ppc_spapr_init(ram_addr_t ram_size, > } > } > > + /* Set up PCI */ > + spapr_create_phb(spapr, "pci", SPAPR_PCI_BUID, > + SPAPR_PCI_MEM_WIN_ADDR, > + SPAPR_PCI_MEM_WIN_SIZE, > + SPAPR_PCI_IO_WIN_ADDR); > + > for (i = 0; i < nb_nics; i++) { > NICInfo *nd = &nd_table[i]; > > @@ -488,10 +519,7 @@ static void ppc_spapr_init(ram_addr_t ram_size, > if (strcmp(nd->model, "ibmveth") == 0) { > spapr_vlan_create(spapr->vio_bus, 0x1000 + i, nd); > } else { > - fprintf(stderr, "pSeries (sPAPR) platform does not support " > - "NIC model '%s' (only ibmveth is supported)\n", > - nd->model); > - exit(1); > + pci_nic_init_nofail(&nd_table[i], nd->model, NULL); > } > } > > diff --git a/hw/spapr.h b/hw/spapr.h > index 6657c33..5689797 100644 > --- a/hw/spapr.h > +++ b/hw/spapr.h > @@ -2,12 +2,14 @@ > #define __HW_SPAPR_H__ > > #include "hw/xics.h" > +#include "spapr_pci.h" > > struct VIOsPAPRBus; > struct icp_state; > > typedef struct sPAPREnvironment { > struct VIOsPAPRBus *vio_bus; > + QLIST_HEAD(, sPAPRPHBState) phbs; > struct icp_state *icp; > > target_phys_addr_t ram_limit; > diff --git a/hw/spapr_pci.c b/hw/spapr_pci.c > new file mode 100644 > index 0000000..a907747 > --- /dev/null > +++ b/hw/spapr_pci.c > @@ -0,0 +1,515 @@ > +/* > + * QEMU sPAPR PCI host originated from Uninorth PCI host > + * > + * Copyright (c) 2011 Alexey Kardashevskiy, IBM Corporation. > + * Copyright (C) 2011 David Gibson, IBM Corporation. > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > copy > + * of this software and associated documentation files (the "Software"), to > deal > + * in the Software without restriction, including without limitation the > rights > + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell > + * copies of the Software, and to permit persons to whom the Software is > + * furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice shall be included in > + * all copies or substantial portions of the Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING > FROM, > + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN > + * THE SOFTWARE. > + */ > +#include "hw.h" > +#include "pci.h" > +#include "pci_host.h" > +#include "hw/spapr.h" > +#include "hw/spapr_pci.h" > +#include "exec-memory.h" > +#include <libfdt.h> > + > +#include "hw/pci_internals.h" > + > +static const uint32_t bars[] = { > + PCI_BASE_ADDRESS_0, PCI_BASE_ADDRESS_1, > + PCI_BASE_ADDRESS_2, PCI_BASE_ADDRESS_3, > + PCI_BASE_ADDRESS_4, PCI_BASE_ADDRESS_5 > + /*, PCI_ROM_ADDRESS*/ > +}; > + > +static PCIDevice *find_dev(sPAPREnvironment *spapr, > + uint64_t buid, uint32_t config_addr) > +{ > + DeviceState *qdev; > + int devfn = (config_addr >> 8) & 0xFF; > + sPAPRPHBState *phb; > + > + QLIST_FOREACH(phb, &spapr->phbs, list) { > + if (phb->buid != buid) { > + continue; > + } > + > + QLIST_FOREACH(qdev, &phb->host_state.bus->qbus.children, sibling) { > + PCIDevice *dev = (PCIDevice *)qdev; > + if (dev->devfn == devfn) { > + return dev; > + } > + } > + } > + > + return NULL; > +} > + > +static void rtas_ibm_read_pci_config(sPAPREnvironment *spapr, > + uint32_t token, uint32_t nargs, > + target_ulong args, > + uint32_t nret, target_ulong rets) > +{ > + uint32_t val, size, addr; > + uint64_t buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2); > + PCIDevice *dev = find_dev(spapr, buid, rtas_ld(args, 0)); > + > + if (!dev) { > + rtas_st(rets, 0, -1); > + return; > + } > + size = rtas_ld(args, 3); > + addr = rtas_ld(args, 0) & 0xFF; > + val = pci_default_read_config(dev, addr, size); > + rtas_st(rets, 0, 0); > + rtas_st(rets, 1, val); > +} > + > +static void rtas_read_pci_config(sPAPREnvironment *spapr, > + uint32_t token, uint32_t nargs, > + target_ulong args, > + uint32_t nret, target_ulong rets) > +{ > + uint32_t val, size, addr; > + PCIDevice *dev = find_dev(spapr, 0, rtas_ld(args, 0)); > + > + if (!dev) { > + rtas_st(rets, 0, -1); > + return; > + } > + size = rtas_ld(args, 1); > + addr = rtas_ld(args, 0) & 0xFF; > + val = pci_default_read_config(dev, addr, size); > + rtas_st(rets, 0, 0); > + rtas_st(rets, 1, val); > +} > + > +static void rtas_ibm_write_pci_config(sPAPREnvironment *spapr, > + uint32_t token, uint32_t nargs, > + target_ulong args, > + uint32_t nret, target_ulong rets) > +{ > + uint32_t val, size, addr; > + uint64_t buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2); > + PCIDevice *dev = find_dev(spapr, buid, rtas_ld(args, 0)); > + > + if (!dev) { > + rtas_st(rets, 0, -1); > + return; > + } > + val = rtas_ld(args, 4); > + size = rtas_ld(args, 3); > + addr = rtas_ld(args, 0) & 0xFF; > + pci_default_write_config(dev, addr, val, size); > + rtas_st(rets, 0, 0); > +} > + > +static void rtas_write_pci_config(sPAPREnvironment *spapr, > + uint32_t token, uint32_t nargs, > + target_ulong args, > + uint32_t nret, target_ulong rets) > +{ > + uint32_t val, size, addr; > + PCIDevice *dev = find_dev(spapr, 0, rtas_ld(args, 0)); > + > + if (!dev) { > + rtas_st(rets, 0, -1); > + return; > + } > + val = rtas_ld(args, 2); > + size = rtas_ld(args, 1); > + addr = rtas_ld(args, 0) & 0xFF; > + pci_default_write_config(dev, addr, val, size); > + rtas_st(rets, 0, 0); > +} > + > +static int pci_spapr_map_irq(PCIDevice *pci_dev, int irq_num) > +{ > + /* > + * Here we need to convert pci_dev + irq_num to some unique value > + * which is less than number of IRQs on the specific bus (now it > + * is 16). At the moment irq_num == device_id (number of the > + * slot?) > + * FIXME: we should swizzle in fn and irq_num > + */ > + return (pci_dev->devfn >> 3) % SPAPR_PCI_NUM_LSI; > +} > + > +static void pci_spapr_set_irq(void *opaque, int irq_num, int level) > +{ > + /* > + * Here we use the number returned by pci_spapr_map_irq to find a > + * corresponding qemu_irq. > + */ > + sPAPRPHBState *phb = opaque; > + > + qemu_set_irq(phb->lsi_table[irq_num].qirq, level); > +} > + > +static int spapr_phb_init(SysBusDevice *s) > +{ > + sPAPRPHBState *phb = FROM_SYSBUS(sPAPRPHBState, s); > + int i; > + > + /* Initialize the LSI table */ > + for (i = 0; i < SPAPR_PCI_NUM_LSI; i++) { > + qemu_irq qirq; > + uint32_t num; > + > + qirq = spapr_allocate_irq(0, &num); > + if (!qirq) { > + return -1; > + } > + > + phb->lsi_table[i].dt_irq = num; > + phb->lsi_table[i].qirq = qirq; > + } > + > + return 0; > +} > + > +static int spapr_main_pci_host_init(PCIDevice *d) > +{ > + return 0; > +} > + > +static PCIDeviceInfo spapr_main_pci_host_info = { > + .qdev.name = "spapr-pci-host-bridge", > + .qdev.size = sizeof(PCIDevice), > + .init = spapr_main_pci_host_init, > +}; > + > +static void spapr_register_devices(void) > +{ > + sysbus_register_dev("spapr-pci-host-bridge", sizeof(sPAPRPHBState), > + spapr_phb_init); > + pci_qdev_register(&spapr_main_pci_host_info); > +} > + > +device_init(spapr_register_devices) > + > +static uint64_t spapr_io_read(void *opaque, target_phys_addr_t addr, > + unsigned size) > +{ > + switch (size) { > + case 1: > + return cpu_inb(addr); > + case 2: > + return cpu_inw(addr); > + case 4: > + return cpu_inl(addr); > + } > + assert(0); > +} > + > +static void spapr_io_write(void *opaque, target_phys_addr_t addr, > + uint64_t data, unsigned size) > +{ > + switch (size) { > + case 1: > + cpu_outb(addr, data); > + return; > + case 2: > + cpu_outw(addr, data); > + return; > + case 4: > + cpu_outl(addr, data); > + return; > + } > + assert(0); > +} > + > +static MemoryRegionOps spapr_io_ops = { > + .endianness = DEVICE_LITTLE_ENDIAN, > + .read = spapr_io_read, > + .write = spapr_io_write > +}; > + > +void spapr_create_phb(sPAPREnvironment *spapr, > + const char *busname, uint64_t buid, > + uint64_t mem_win_addr, uint64_t mem_win_size, > + uint64_t io_win_addr) > +{ > + DeviceState *dev; > + SysBusDevice *s; > + sPAPRPHBState *phb; > + PCIBus *bus; > + char namebuf[strlen(busname)+11]; > + > + dev = qdev_create(NULL, "spapr-pci-host-bridge"); > + qdev_init_nofail(dev); > + s = sysbus_from_qdev(dev); > + phb = FROM_SYSBUS(sPAPRPHBState, s); > + > + phb->mem_win_addr = mem_win_addr; > + > + sprintf(namebuf, "%s-mem", busname); > + memory_region_init(&phb->memspace, namebuf, INT64_MAX); > + > + sprintf(namebuf, "%s-memwindow", busname); > + memory_region_init_alias(&phb->memwindow, namebuf, &phb->memspace, > + SPAPR_PCI_MEM_WIN_BUS_OFFSET, mem_win_size); > + memory_region_add_subregion(get_system_memory(), mem_win_addr, > + &phb->memwindow); > + > + phb->io_win_addr = io_win_addr; > + > + /* On ppc, we only have MMIO no specific IO space from the CPU > + * perspective. In theory we ought to be able to embed the PCI IO > + * memory region direction in the system memory space. However, > + * if any of the IO BAR subregions use the old_portio mechanism, > + * that won't be processed properly unless accessed from the > + * system io address space. This hack to bounce things via > + * system_io works around the problem until all the users of > + * old_portion are updated */ > + sprintf(namebuf, "%s-io", busname); > + memory_region_init(&phb->iospace, namebuf, SPAPR_PCI_IO_WIN_SIZE); > + /* FIXME: fix to support multiple PHBs */ > + memory_region_add_subregion(get_system_io(), 0, &phb->iospace); > + > + sprintf(namebuf, "%s-iowindow", busname); > + memory_region_init_io(&phb->iowindow, &spapr_io_ops, phb, > + namebuf, SPAPR_PCI_IO_WIN_SIZE); > + memory_region_add_subregion(get_system_memory(), io_win_addr, > + &phb->iowindow); > + > + phb->host_state.bus = bus = pci_register_bus(&phb->busdev.qdev, busname, > + pci_spapr_set_irq, > + pci_spapr_map_irq, > + phb, > + &phb->memspace, > &phb->iospace, > + PCI_DEVFN(0, 0), > + SPAPR_PCI_NUM_LSI); > + > + spapr_rtas_register("read-pci-config", rtas_read_pci_config); > + spapr_rtas_register("write-pci-config", rtas_write_pci_config); > + spapr_rtas_register("ibm,read-pci-config", rtas_ibm_read_pci_config); > + spapr_rtas_register("ibm,write-pci-config", rtas_ibm_write_pci_config); > + > + /* > + * This is a workaround to disable PCI devices resetting as we do > + * BAR allocation on the QEMU side and reset destroys this > + * configuration. > + */ > + bus->qbus.info->reset = NULL; Eh. What? So you're breaking reset for all PCI devices? How do you reboot? Alex