On 13/10/2016 01:57, David Gibson wrote: > On real hardware, and under pHyp, the PCI host bridges on Power machines > typically advertise two outbound MMIO windows from the guest's physical > memory space to PCI memory space: > - A 32-bit window which maps onto 2GiB..4GiB in the PCI address space > - A 64-bit window which maps onto a large region somewhere high in PCI > address space (traditionally this used an identity mapping from guest > physical address to PCI address, but that's not always the case) > > The qemu implementation in spapr-pci-host-bridge, however, only supports a > single outbound MMIO window, however. At least some Linux versions expect > the two windows however, so we arranged this window to map onto the PCI > memory space from 2 GiB..~64 GiB, then advertised it as two contiguous > windows, the "32-bit" window from 2G..4G and the "64-bit" window from > 4G..~64G. > > This approach means, however, that the 64G window is not naturally aligned. > In turn this limits the size of the largest BAR we can map (which does have > to be naturally aligned) to roughly half of the total window. With some > large nVidia GPGPU cards which have huge memory BARs, this is starting to > be a problem. > > This patch adds true support for separate 32-bit and 64-bit outbound MMIO > windows to the spapr-pci-host-bridge implementation, each of which can > be independently configured. The 32-bit window always maps to 2G.. in PCI > space, but the PCI address of the 64-bit window can be configured (it > defaults to the same as the guest physical address). > > So as not to break possible existing configurations, as long as a 64-bit > window is not specified, a large single window can be specified. This > will appear the same way to the guest as the old approach, although it's > now implemented by two contiguous memory regions rather than a single one. > > For now, this only adds the possibility of 64-bit windows. The default > configuration still uses the legacy mode. > > Signed-off-by: David Gibson <da...@gibson.dropbear.id.au>
Reviewed-by: Laurent Vivier <lviv...@redhat.com> > --- > hw/ppc/spapr.c | 10 +++++-- > hw/ppc/spapr_pci.c | 70 > ++++++++++++++++++++++++++++++++++++--------- > include/hw/pci-host/spapr.h | 8 ++++-- > include/hw/ppc/spapr.h | 3 +- > 4 files changed, 72 insertions(+), 19 deletions(-) > > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c > index e6b110d..8db3657 100644 > --- a/hw/ppc/spapr.c > +++ b/hw/ppc/spapr.c > @@ -2371,7 +2371,8 @@ static HotpluggableCPUList > *spapr_query_hotpluggable_cpus(MachineState *machine) > } > > static void spapr_phb_placement(sPAPRMachineState *spapr, uint32_t index, > - uint64_t *buid, hwaddr *pio, hwaddr *mmio, > + uint64_t *buid, hwaddr *pio, > + hwaddr *mmio32, hwaddr *mmio64, > unsigned n_dma, uint32_t *liobns, Error > **errp) > { > const uint64_t base_buid = 0x800000020000000ULL; > @@ -2409,7 +2410,12 @@ static void spapr_phb_placement(sPAPRMachineState > *spapr, uint32_t index, > > phb_base = phb0_base + index * phb_spacing; > *pio = phb_base + pio_offset; > - *mmio = phb_base + mmio_offset; > + *mmio32 = phb_base + mmio_offset; > + /* > + * We don't set the 64-bit MMIO window, relying on the PHB's > + * fallback behaviour of automatically splitting a large "32-bit" > + * window into contiguous 32-bit and 64-bit windows > + */ > } > > static void spapr_machine_class_init(ObjectClass *oc, void *data) > diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c > index 0e6cf4d..31ca6fa 100644 > --- a/hw/ppc/spapr_pci.c > +++ b/hw/ppc/spapr_pci.c > @@ -1317,14 +1317,16 @@ static void spapr_phb_realize(DeviceState *dev, Error > **errp) > if ((sphb->buid != (uint64_t)-1) || (sphb->dma_liobn[0] != > (uint32_t)-1) > || (sphb->dma_liobn[1] != (uint32_t)-1 && windows_supported == 2) > || (sphb->mem_win_addr != (hwaddr)-1) > + || (sphb->mem64_win_addr != (hwaddr)-1) > || (sphb->io_win_addr != (hwaddr)-1)) { > error_setg(errp, "Either \"index\" or other parameters must" > " be specified for PAPR PHB, not both"); > return; > } > > - smc->phb_placement(spapr, sphb->index, &sphb->buid, > - &sphb->io_win_addr, &sphb->mem_win_addr, > + smc->phb_placement(spapr, sphb->index, > + &sphb->buid, &sphb->io_win_addr, > + &sphb->mem_win_addr, &sphb->mem64_win_addr, > windows_supported, sphb->dma_liobn, &local_err); > if (local_err) { > error_propagate(errp, local_err); > @@ -1353,6 +1355,38 @@ static void spapr_phb_realize(DeviceState *dev, Error > **errp) > return; > } > > + if (sphb->mem64_win_size != 0) { > + if (sphb->mem64_win_addr == (hwaddr)-1) { > + error_setg(errp, > + "64-bit memory window address not specified for PHB"); > + return; > + } > + > + if (sphb->mem_win_size > SPAPR_PCI_MEM32_WIN_SIZE) { > + error_setg(errp, "32-bit memory window of size 0x%"HWADDR_PRIx > + " (max 2 GiB)", sphb->mem_win_size); > + return; > + } > + > + if (sphb->mem64_win_pciaddr == (hwaddr)-1) { > + /* 64-bit window defaults to identity mapping */ > + sphb->mem64_win_pciaddr = sphb->mem64_win_addr; > + } > + } else if (sphb->mem_win_size > SPAPR_PCI_MEM32_WIN_SIZE) { > + /* > + * For compatibility with old configuration, if no 64-bit MMIO > + * window is specified, but the ordinary (32-bit) memory > + * window is specified as > 2GiB, we treat it as a 2GiB 32-bit > + * window, with a 64-bit MMIO window following on immediately > + * afterwards > + */ > + sphb->mem64_win_size = sphb->mem_win_size - SPAPR_PCI_MEM32_WIN_SIZE; > + sphb->mem64_win_addr = sphb->mem_win_addr + SPAPR_PCI_MEM32_WIN_SIZE; > + sphb->mem64_win_pciaddr = > + SPAPR_PCI_MEM_WIN_BUS_OFFSET + SPAPR_PCI_MEM32_WIN_SIZE; > + sphb->mem_win_size = SPAPR_PCI_MEM32_WIN_SIZE; > + } > + > if (spapr_pci_find_phb(spapr, sphb->buid)) { > error_setg(errp, "PCI host bridges must have unique BUIDs"); > return; > @@ -1366,12 +1400,19 @@ static void spapr_phb_realize(DeviceState *dev, Error > **errp) > sprintf(namebuf, "%s.mmio", sphb->dtbusname); > memory_region_init(&sphb->memspace, OBJECT(sphb), namebuf, UINT64_MAX); > > - sprintf(namebuf, "%s.mmio-alias", sphb->dtbusname); > - memory_region_init_alias(&sphb->memwindow, OBJECT(sphb), > + sprintf(namebuf, "%s.mmio32-alias", sphb->dtbusname); > + memory_region_init_alias(&sphb->mem32window, OBJECT(sphb), > namebuf, &sphb->memspace, > SPAPR_PCI_MEM_WIN_BUS_OFFSET, > sphb->mem_win_size); > memory_region_add_subregion(get_system_memory(), sphb->mem_win_addr, > - &sphb->memwindow); > + &sphb->mem32window); > + > + sprintf(namebuf, "%s.mmio64-alias", sphb->dtbusname); > + memory_region_init_alias(&sphb->mem64window, OBJECT(sphb), > + namebuf, &sphb->memspace, > + sphb->mem64_win_pciaddr, sphb->mem64_win_size); > + memory_region_add_subregion(get_system_memory(), sphb->mem64_win_addr, > + &sphb->mem64window); > > /* Initialize IO regions */ > sprintf(namebuf, "%s.io", sphb->dtbusname); > @@ -1525,6 +1566,10 @@ static Property spapr_phb_properties[] = { > DEFINE_PROP_UINT64("mem_win_addr", sPAPRPHBState, mem_win_addr, -1), > DEFINE_PROP_UINT64("mem_win_size", sPAPRPHBState, mem_win_size, > SPAPR_PCI_MMIO_WIN_SIZE), > + DEFINE_PROP_UINT64("mem64_win_addr", sPAPRPHBState, mem64_win_addr, -1), > + DEFINE_PROP_UINT64("mem64_win_size", sPAPRPHBState, mem64_win_size, 0), > + DEFINE_PROP_UINT64("mem64_win_pciaddr", sPAPRPHBState, mem64_win_pciaddr, > + -1), > DEFINE_PROP_UINT64("io_win_addr", sPAPRPHBState, io_win_addr, -1), > DEFINE_PROP_UINT64("io_win_size", sPAPRPHBState, io_win_size, > SPAPR_PCI_IO_WIN_SIZE), > @@ -1760,10 +1805,6 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb, > int bus_off, i, j, ret; > char nodename[FDT_NAME_MAX]; > uint32_t bus_range[] = { cpu_to_be32(0), cpu_to_be32(0xff) }; > - const uint64_t mmiosize = memory_region_size(&phb->memwindow); > - const uint64_t w32max = (1ULL << 32) - SPAPR_PCI_MEM_WIN_BUS_OFFSET; > - const uint64_t w32size = MIN(w32max, mmiosize); > - const uint64_t w64size = (mmiosize > w32size) ? (mmiosize - w32size) : 0; > struct { > uint32_t hi; > uint64_t child; > @@ -1778,15 +1819,16 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb, > { > cpu_to_be32(b_ss(2)), cpu_to_be64(SPAPR_PCI_MEM_WIN_BUS_OFFSET), > cpu_to_be64(phb->mem_win_addr), > - cpu_to_be64(w32size), > + cpu_to_be64(phb->mem_win_size), > }, > { > - cpu_to_be32(b_ss(3)), cpu_to_be64(1ULL << 32), > - cpu_to_be64(phb->mem_win_addr + w32size), > - cpu_to_be64(w64size) > + cpu_to_be32(b_ss(3)), cpu_to_be64(phb->mem64_win_pciaddr), > + cpu_to_be64(phb->mem64_win_addr), > + cpu_to_be64(phb->mem64_win_size), > }, > }; > - const unsigned sizeof_ranges = (w64size ? 3 : 2) * sizeof(ranges[0]); > + const unsigned sizeof_ranges = > + (phb->mem64_win_size ? 3 : 2) * sizeof(ranges[0]); > uint64_t bus_reg[] = { cpu_to_be64(phb->buid), 0 }; > uint32_t interrupt_map_mask[] = { > cpu_to_be32(b_ddddd(-1)|b_fff(0)), 0x0, 0x0, cpu_to_be32(-1)}; > diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h > index 8c9ebfd..23dfb09 100644 > --- a/include/hw/pci-host/spapr.h > +++ b/include/hw/pci-host/spapr.h > @@ -53,8 +53,10 @@ struct sPAPRPHBState { > bool dr_enabled; > > MemoryRegion memspace, iospace; > - hwaddr mem_win_addr, mem_win_size, io_win_addr, io_win_size; > - MemoryRegion memwindow, iowindow, msiwindow; > + hwaddr mem_win_addr, mem_win_size, mem64_win_addr, mem64_win_size; > + uint64_t mem64_win_pciaddr; > + hwaddr io_win_addr, io_win_size; > + MemoryRegion mem32window, mem64window, iowindow, msiwindow; > > uint32_t dma_liobn[SPAPR_PCI_DMA_MAX_WINDOWS]; > hwaddr dma_win_addr, dma_win_size; > @@ -80,6 +82,8 @@ struct sPAPRPHBState { > }; > > #define SPAPR_PCI_MEM_WIN_BUS_OFFSET 0x80000000ULL > +#define SPAPR_PCI_MEM32_WIN_SIZE \ > + ((1ULL << 32) - SPAPR_PCI_MEM_WIN_BUS_OFFSET) > > #define SPAPR_PCI_MMIO_WIN_SIZE 0xf80000000 > #define SPAPR_PCI_IO_WIN_SIZE 0x10000 > diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h > index a05783c..aeaba3e 100644 > --- a/include/hw/ppc/spapr.h > +++ b/include/hw/ppc/spapr.h > @@ -41,7 +41,8 @@ struct sPAPRMachineClass { > bool use_ohci_by_default; /* use USB-OHCI instead of XHCI */ > const char *tcg_default_cpu; /* which (TCG) CPU to simulate by default */ > void (*phb_placement)(sPAPRMachineState *spapr, uint32_t index, > - uint64_t *buid, hwaddr *pio, hwaddr *mmio, > + uint64_t *buid, hwaddr *pio, > + hwaddr *mmio32, hwaddr *mmio64, > unsigned n_dma, uint32_t *liobns, Error **errp); > }; > >