On Fri, Jun 21, 2013 at 12:08:58AM +1000, Alexey Kardashevskiy wrote: > At the moment QEMU creates a route for every MSI IRQ. > > Now we are about to add IRQFD support on PPC64-pseries platform. > pSeries already has in-kernel emulated interrupt controller with > 8192 IRQs. Also, pSeries PHB already supports MSIMessage to IRQ > mapping as a part of PAPR requirements for MSI/MSIX guests. > Specifically, the pSeries guest does not touch MSIMessage's at > all, instead it uses rtas_ibm_change_msi and rtas_ibm_query_interrupt_source > rtas calls to do the mapping. > > Therefore we do not really need more routing than we got already. > The patch introduces the infrastructure to enable direct IRQ mapping. > > Signed-off-by: Alexey Kardashevskiy <a...@ozlabs.ru> > > --- > > The patch is raw and ugly indeed, I made it only to demonstrate > the idea and see if it has right to live or not. > > For some reason which I do not really understand (limited GSI numbers?) > the existing code always adds routing and I do not see why we would need it. > > Thanks! > --- > hw/misc/vfio.c | 11 +++++++++-- > hw/pci/pci.c | 13 +++++++++++++ > hw/ppc/spapr_pci.c | 13 +++++++++++++ > hw/virtio/virtio-pci.c | 26 ++++++++++++++++++++------ > include/hw/pci/pci.h | 4 ++++ > include/hw/pci/pci_bus.h | 1 + > 6 files changed, 60 insertions(+), 8 deletions(-) > > diff --git a/hw/misc/vfio.c b/hw/misc/vfio.c > index 14aac04..2d9eef7 100644 > --- a/hw/misc/vfio.c > +++ b/hw/misc/vfio.c > @@ -639,7 +639,11 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, > unsigned int nr, > * Attempt to enable route through KVM irqchip, > * default to userspace handling if unavailable. > */ > - vector->virq = msg ? kvm_irqchip_add_msi_route(kvm_state, *msg) : -1; > + > + vector->virq = msg ? pci_bus_map_msi(vdev->pdev.bus, *msg) : -1; > + if (vector->virq < 0) { > + vector->virq = msg ? kvm_irqchip_add_msi_route(kvm_state, *msg) : -1; > + } > if (vector->virq < 0 || > kvm_irqchip_add_irqfd_notifier(kvm_state, &vector->interrupt, > vector->virq) < 0) { > @@ -807,7 +811,10 @@ retry: > * Attempt to enable route through KVM irqchip, > * default to userspace handling if unavailable. > */ > - vector->virq = kvm_irqchip_add_msi_route(kvm_state, msg); > + vector->virq = pci_bus_map_msi(vdev->pdev.bus, msg); > + if (vector->virq < 0) { > + vector->virq = kvm_irqchip_add_msi_route(kvm_state, msg); > + } > if (vector->virq < 0 || > kvm_irqchip_add_irqfd_notifier(kvm_state, &vector->interrupt, > vector->virq) < 0) { > diff --git a/hw/pci/pci.c b/hw/pci/pci.c > index a976e46..a9875e9 100644 > --- a/hw/pci/pci.c > +++ b/hw/pci/pci.c > @@ -1254,6 +1254,19 @@ void pci_device_set_intx_routing_notifier(PCIDevice > *dev, > dev->intx_routing_notifier = notifier; > } > > +void pci_bus_set_map_msi_fn(PCIBus *bus, pci_map_msi_fn map_msi_fn) > +{ > + bus->map_msi = map_msi_fn; > +} > + > +int pci_bus_map_msi(PCIBus *bus, MSIMessage msg) > +{ > + if (bus->map_msi) { > + return bus->map_msi(bus, msg); > + } > + return -1; > +} > + > /* > * PCI-to-PCI bridge specification > * 9.1: Interrupt routing. Table 9-1 > diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c > index 80408c9..9ef9a29 100644 > --- a/hw/ppc/spapr_pci.c > +++ b/hw/ppc/spapr_pci.c > @@ -500,6 +500,18 @@ static void spapr_msi_write(void *opaque, hwaddr addr, > qemu_irq_pulse(xics_get_qirq(spapr->icp, irq)); > } > > +static int spapr_msi_get_irq(PCIBus *bus, MSIMessage msg) > +{ > + DeviceState *par = bus->qbus.parent; > + sPAPRPHBState *sphb = (sPAPRPHBState *) par; > + unsigned long addr = msg.address - sphb->msi_win_addr; > + int ndev = addr >> 16; > + int vec = ((addr & 0xFFFF) >> 2) | msg.data; > + uint32_t irq = sphb->msi_table[ndev].irq + vec;
This array seems to be SPAPR_MSIX_MAX_DEVS in size, no? Won't this overflow if ndev is large? > + > + return (int)irq; > +} > + > static const MemoryRegionOps spapr_msi_ops = { > /* There is no .read as the read result is undefined by PCI spec */ > .read = NULL, > @@ -664,6 +676,7 @@ static int _spapr_phb_init(SysBusDevice *s) > > sphb->lsi_table[i].irq = irq; > } > + pci_bus_set_map_msi_fn(bus, spapr_msi_get_irq); > > return 0; > } > diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c > index d309416..587f53e 100644 > --- a/hw/virtio/virtio-pci.c > +++ b/hw/virtio/virtio-pci.c > @@ -472,6 +472,8 @@ static unsigned virtio_pci_get_features(DeviceState *d) > return proxy->host_features; > } > > +extern int spapr_msi_get_irq(PCIBus *bus, MSIMessage *msg); > + > static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy *proxy, > unsigned int queue_no, > unsigned int vector, > @@ -481,7 +483,10 @@ static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy > *proxy, > int ret; > > if (irqfd->users == 0) { > - ret = kvm_irqchip_add_msi_route(kvm_state, msg); > + ret = pci_bus_map_msi(proxy->pci_dev.bus, msg); > + if (ret < 0) { > + ret = kvm_irqchip_add_msi_route(kvm_state, msg); > + } > if (ret < 0) { > return ret; > } > @@ -609,14 +614,23 @@ static int virtio_pci_vq_vector_unmask(VirtIOPCIProxy > *proxy, > VirtQueue *vq = virtio_get_queue(proxy->vdev, queue_no); > EventNotifier *n = virtio_queue_get_guest_notifier(vq); > VirtIOIRQFD *irqfd; > - int ret = 0; > + int ret = 0, tmp; > > if (proxy->vector_irqfd) { > irqfd = &proxy->vector_irqfd[vector]; > - if (irqfd->msg.data != msg.data || irqfd->msg.address != > msg.address) { > - ret = kvm_irqchip_update_msi_route(kvm_state, irqfd->virq, msg); > - if (ret < 0) { > - return ret; > + > + tmp = pci_bus_map_msi(proxy->pci_dev.bus, msg); > + if (tmp >= 0) { > + if (irqfd->virq != tmp) { > + fprintf(stderr, "FIXME: MSI(-X) vector has changed from %X > to %x\n", > + irqfd->virq, tmp); > + } > + } else { > + if (irqfd->msg.data != msg.data || irqfd->msg.address != > msg.address) { > + ret = kvm_irqchip_update_msi_route(kvm_state, irqfd->virq, > msg); > + if (ret < 0) { > + return ret; > + } > } > } > } > diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h > index 8797802..632739a 100644 > --- a/include/hw/pci/pci.h > +++ b/include/hw/pci/pci.h > @@ -332,6 +332,7 @@ MemoryRegion *pci_address_space_io(PCIDevice *dev); > typedef void (*pci_set_irq_fn)(void *opaque, int irq_num, int level); > typedef int (*pci_map_irq_fn)(PCIDevice *pci_dev, int irq_num); > typedef PCIINTxRoute (*pci_route_irq_fn)(void *opaque, int pin); > +typedef int (*pci_map_msi_fn)(PCIBus *bus, MSIMessage msg); > > typedef enum { > PCI_HOTPLUG_DISABLED, > @@ -375,6 +376,9 @@ bool pci_intx_route_changed(PCIINTxRoute *old, > PCIINTxRoute *new); > void pci_bus_fire_intx_routing_notifier(PCIBus *bus); > void pci_device_set_intx_routing_notifier(PCIDevice *dev, > PCIINTxRoutingNotifier notifier); > +void pci_bus_set_map_msi_fn(PCIBus *bus, pci_map_msi_fn map_msi_fn); > +int pci_bus_map_msi(PCIBus *bus, MSIMessage msg); > + > void pci_device_reset(PCIDevice *dev); > void pci_bus_reset(PCIBus *bus); > > diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h > index 66762f6..81efd2b 100644 > --- a/include/hw/pci/pci_bus.h > +++ b/include/hw/pci/pci_bus.h > @@ -16,6 +16,7 @@ struct PCIBus { > pci_set_irq_fn set_irq; > pci_map_irq_fn map_irq; > pci_route_irq_fn route_intx_to_irq; > + pci_map_msi_fn map_msi; > pci_hotplug_fn hotplug; > DeviceState *hotplug_qdev; > void *irq_opaque; > -- > 1.7.10.4