This fix SID verification failure when IOMMU IR is enabled with PCI bridges. Existing pci_requester_id() is more like getting BDF info only. Renaming it to pci_get_bdf(). Meanwhile, we provide the correct implementation to get requester ID. VT-d spec 5.1.1 is a good reference to go, though it talks only about interrupt delivery, the rule works exactly the same for non-interrupt cases.
Currently, there are three use cases for pci_requester_id(): - PCIX status bits: here we need BDF only, not requester ID. Replacing with pci_get_bdf(). - PCIe Error injection and MSI delivery: for both these cases, we are looking for requester IDs. Here we should use the new impl. To avoid a PCI walk every time we send MSI message, one requester_id field is added to PCIDevice to cache the result when we use it the first time. Here assumption is made that requester_id will never change during device lifecycle. Signed-off-by: Peter Xu <pet...@redhat.com> --- hw/i386/kvm/pci-assign.c | 2 +- hw/pci/pci.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++ include/hw/pci/pci.h | 11 +++++++++-- 3 files changed, 56 insertions(+), 3 deletions(-) diff --git a/hw/i386/kvm/pci-assign.c b/hw/i386/kvm/pci-assign.c index bf425a2..c40ab36 100644 --- a/hw/i386/kvm/pci-assign.c +++ b/hw/i386/kvm/pci-assign.c @@ -1481,7 +1481,7 @@ static int assigned_device_pci_cap_init(PCIDevice *pci_dev, Error **errp) * error bits, leave the rest. */ status = pci_get_long(pci_dev->config + pos + PCI_X_STATUS); status &= ~(PCI_X_STATUS_BUS | PCI_X_STATUS_DEVFN); - status |= pci_requester_id(pci_dev); + status |= pci_get_bdf(pci_dev); status &= ~(PCI_X_STATUS_SPL_DISC | PCI_X_STATUS_UNX_SPL | PCI_X_STATUS_SPL_ERR); pci_set_long(pci_dev->config + pos + PCI_X_STATUS, status); diff --git a/hw/pci/pci.c b/hw/pci/pci.c index bb605ef..0a35255 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -885,6 +885,7 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus, } pci_dev->devfn = devfn; + pci_dev->requester_id = 0; /* Not cached */ dma_as = pci_device_iommu_address_space(pci_dev); memory_region_init_alias(&pci_dev->bus_master_enable_region, @@ -2498,6 +2499,51 @@ PCIDevice *pci_get_function_0(PCIDevice *pci_dev) } } +/* Parse bridges up to the root complex and get final Requester ID + * for this device. For full PCIe topology, this works exactly as + * what pci_get_bdf() does. However, several tricks are required + * when mixed up with legacy PCI devices and PCIe-to-PCI bridges. */ +static uint16_t pci_requester_id_no_cache(PCIDevice *dev) +{ + uint8_t bus_n; + uint16_t result = pci_get_bdf(dev); + + while (!pci_bus_is_root(dev->bus)) { + /* We are under PCI/PCIe bridges, fetch bus number of + * current bus, which is the secondary bus number of + * parent bridge. */ + bus_n = pci_bus_num(dev->bus); + dev = dev->bus->parent_dev; + if (pci_is_express(dev)) { + if (pcie_cap_get_type(dev) == PCI_EXP_TYPE_PCI_BRIDGE) { + /* When we pass through PCIe-to-PCI/PCIX bridges, we + * override the requester ID using secondary bus + * number of parent bridge with zeroed devfn + * (pcie-to-pci bridge spec chap 2.3). */ + result = PCI_BUILD_BDF(bus_n, 0); + } + } else { + /* Legacy PCI, override requester ID with the bridge's + * BDF upstream. When the root complex connects to + * legacy PCI devices (including buses), it can only + * obtain requester ID info from directly attached + * devices. If devices are attached under bridges, only + * the requester ID of the bridge that is directly + * attached to the root complex can be recognized. */ + result = pci_get_bdf(dev); + } + } + return result; +} + +uint16_t pci_requester_id(PCIDevice *dev) +{ + if (unlikely(!dev->requester_id)) { + dev->requester_id = pci_requester_id_no_cache(dev); + } + return dev->requester_id; +} + static const TypeInfo pci_device_type_info = { .name = TYPE_PCI_DEVICE, .parent = TYPE_DEVICE, diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h index ef6ba51..cb3ab3b 100644 --- a/include/hw/pci/pci.h +++ b/include/hw/pci/pci.h @@ -15,6 +15,7 @@ #define PCI_DEVFN(slot, func) ((((slot) & 0x1f) << 3) | ((func) & 0x07)) #define PCI_SLOT(devfn) (((devfn) >> 3) & 0x1f) #define PCI_FUNC(devfn) ((devfn) & 0x07) +#define PCI_BUILD_BDF(bus, devfn) ((bus << 8) | (devfn)) #define PCI_SLOT_MAX 32 #define PCI_FUNC_MAX 8 @@ -252,6 +253,10 @@ struct PCIDevice { /* the following fields are read only */ PCIBus *bus; int32_t devfn; + /* Cached requester ID, to avoid the PCI tree walking every time + * we invoke PCI request (e.g., MSI). For conventional PCI root + * complex, this field is meaningless. */ + uint16_t requester_id; char name[64]; PCIIORegion io_regions[PCI_NUM_REGIONS]; AddressSpace bus_master_as; @@ -685,11 +690,13 @@ static inline uint32_t pci_config_size(const PCIDevice *d) return pci_is_express(d) ? PCIE_CONFIG_SPACE_SIZE : PCI_CONFIG_SPACE_SIZE; } -static inline uint16_t pci_requester_id(PCIDevice *dev) +static inline uint16_t pci_get_bdf(PCIDevice *dev) { - return (pci_bus_num(dev->bus) << 8) | dev->devfn; + return PCI_BUILD_BDF(pci_bus_num(dev->bus), dev->devfn); } +uint16_t pci_requester_id(PCIDevice *dev); + /* DMA access functions */ static inline AddressSpace *pci_get_address_space(PCIDevice *dev) { -- 2.4.11