On Tue, May 17, 2016 at 02:45:07PM +0800, Peter Xu wrote: > This fix SID verification failure when IOMMU IR is enabled with PCI > bridges. Existing pci_requester_id() is more like getting BDF info > only. Renaming it to pci_get_bdf(). Meanwhile, we provide the correct > implementation to get requester ID. VT-d spec 5.1.1 is a good reference > to go, though it talks only about interrupt delivery, the rule works > exactly the same for non-interrupt cases. > > Currently, there are three use cases for pci_requester_id(): > > - PCIX status bits: here we need BDF only, not requester ID. Replacing > with pci_get_bdf(). > - PCIe Error injection and MSI delivery: for both these cases, we are > looking for requester IDs. Here we should use the new impl. > > To avoid a PCI walk every time we send MSI message, one requester_id > field is added to PCIDevice to cache the result when we use it the first > time. Here assumption is made that requester_id will never change > during device lifecycle.
That's wrong though. It can change if bus number changes. That's why I said add a pointer to the actual requester, set it up during initialization, not on first use. > Signed-off-by: Peter Xu <pet...@redhat.com> > --- > hw/i386/kvm/pci-assign.c | 2 +- > hw/pci/pci.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++ > include/hw/pci/pci.h | 11 +++++++++-- > 3 files changed, 56 insertions(+), 3 deletions(-) > > diff --git a/hw/i386/kvm/pci-assign.c b/hw/i386/kvm/pci-assign.c > index bf425a2..c40ab36 100644 > --- a/hw/i386/kvm/pci-assign.c > +++ b/hw/i386/kvm/pci-assign.c > @@ -1481,7 +1481,7 @@ static int assigned_device_pci_cap_init(PCIDevice > *pci_dev, Error **errp) > * error bits, leave the rest. */ > status = pci_get_long(pci_dev->config + pos + PCI_X_STATUS); > status &= ~(PCI_X_STATUS_BUS | PCI_X_STATUS_DEVFN); > - status |= pci_requester_id(pci_dev); > + status |= pci_get_bdf(pci_dev); > status &= ~(PCI_X_STATUS_SPL_DISC | PCI_X_STATUS_UNX_SPL | > PCI_X_STATUS_SPL_ERR); > pci_set_long(pci_dev->config + pos + PCI_X_STATUS, status); > diff --git a/hw/pci/pci.c b/hw/pci/pci.c > index bb605ef..0a35255 100644 > --- a/hw/pci/pci.c > +++ b/hw/pci/pci.c > @@ -885,6 +885,7 @@ static PCIDevice *do_pci_register_device(PCIDevice > *pci_dev, PCIBus *bus, > } > > pci_dev->devfn = devfn; > + pci_dev->requester_id = 0; /* Not cached */ > dma_as = pci_device_iommu_address_space(pci_dev); > > memory_region_init_alias(&pci_dev->bus_master_enable_region, > @@ -2498,6 +2499,51 @@ PCIDevice *pci_get_function_0(PCIDevice *pci_dev) > } > } > > +/* Parse bridges up to the root complex and get final Requester ID > + * for this device. For full PCIe topology, this works exactly as > + * what pci_get_bdf() does. However, several tricks are required > + * when mixed up with legacy PCI devices and PCIe-to-PCI bridges. */ > +static uint16_t pci_requester_id_no_cache(PCIDevice *dev) > +{ > + uint8_t bus_n; > + uint16_t result = pci_get_bdf(dev); > + > + while (!pci_bus_is_root(dev->bus)) { > + /* We are under PCI/PCIe bridges, fetch bus number of > + * current bus, which is the secondary bus number of > + * parent bridge. */ > + bus_n = pci_bus_num(dev->bus); > + dev = dev->bus->parent_dev; > + if (pci_is_express(dev)) { > + if (pcie_cap_get_type(dev) == PCI_EXP_TYPE_PCI_BRIDGE) { > + /* When we pass through PCIe-to-PCI/PCIX bridges, we > + * override the requester ID using secondary bus > + * number of parent bridge with zeroed devfn > + * (pcie-to-pci bridge spec chap 2.3). */ > + result = PCI_BUILD_BDF(bus_n, 0); > + } > + } else { > + /* Legacy PCI, override requester ID with the bridge's > + * BDF upstream. When the root complex connects to > + * legacy PCI devices (including buses), it can only > + * obtain requester ID info from directly attached > + * devices. If devices are attached under bridges, only > + * the requester ID of the bridge that is directly > + * attached to the root complex can be recognized. */ > + result = pci_get_bdf(dev); > + } > + } > + return result; > +} > + > +uint16_t pci_requester_id(PCIDevice *dev) > +{ > + if (unlikely(!dev->requester_id)) { > + dev->requester_id = pci_requester_id_no_cache(dev); > + } > + return dev->requester_id; > +} > + > static const TypeInfo pci_device_type_info = { > .name = TYPE_PCI_DEVICE, > .parent = TYPE_DEVICE, > diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h > index ef6ba51..cb3ab3b 100644 > --- a/include/hw/pci/pci.h > +++ b/include/hw/pci/pci.h > @@ -15,6 +15,7 @@ > #define PCI_DEVFN(slot, func) ((((slot) & 0x1f) << 3) | ((func) & 0x07)) > #define PCI_SLOT(devfn) (((devfn) >> 3) & 0x1f) > #define PCI_FUNC(devfn) ((devfn) & 0x07) > +#define PCI_BUILD_BDF(bus, devfn) ((bus << 8) | (devfn)) > #define PCI_SLOT_MAX 32 > #define PCI_FUNC_MAX 8 > > @@ -252,6 +253,10 @@ struct PCIDevice { > /* the following fields are read only */ > PCIBus *bus; > int32_t devfn; > + /* Cached requester ID, to avoid the PCI tree walking every time > + * we invoke PCI request (e.g., MSI). For conventional PCI root > + * complex, this field is meaningless. */ > + uint16_t requester_id; > char name[64]; > PCIIORegion io_regions[PCI_NUM_REGIONS]; > AddressSpace bus_master_as; > @@ -685,11 +690,13 @@ static inline uint32_t pci_config_size(const PCIDevice > *d) > return pci_is_express(d) ? PCIE_CONFIG_SPACE_SIZE : > PCI_CONFIG_SPACE_SIZE; > } > > -static inline uint16_t pci_requester_id(PCIDevice *dev) > +static inline uint16_t pci_get_bdf(PCIDevice *dev) > { > - return (pci_bus_num(dev->bus) << 8) | dev->devfn; > + return PCI_BUILD_BDF(pci_bus_num(dev->bus), dev->devfn); > } > > +uint16_t pci_requester_id(PCIDevice *dev); > + > /* DMA access functions */ > static inline AddressSpace *pci_get_address_space(PCIDevice *dev) > { > -- > 2.4.11