On Fri, Aug 24, 2012 at 10:21:44AM +0200, Jan Kiszka wrote: > On 2012-08-24 10:20, Michael S. Tsirkin wrote: > > On Fri, Aug 24, 2012 at 08:19:50AM +0200, Jan Kiszka wrote: > >> From: Jan Kiszka <jan.kis...@siemens.com> > >> > >> This optimization was once used in qemu-kvm to keep KVM route usage low. > >> But now we solved that problem via lazy updates. It also tried to handle > >> the case of vectors shared between different sources of the same device. > >> However, this never really worked and will have to be addressed > >> differently anyway. So drop this obsolete interface. > >> > >> We still need interfaces to clear pending vectors though. Provide > >> msix_clear_vector and msix_clear_all_vectors for this. > >> > >> This also unbreaks MSI-X after reset for ivshmem and megasas as device > >> models can't easily mark their vectors used afterward (megasas didn't > >> even try). > >> > >> Signed-off-by: Jan Kiszka <jan.kis...@siemens.com> > >> --- > >> > >> This patch has been posted some moons again, and we had a discussion > >> about the impact on the existing users. At that time, MSI-X refactoring > >> for KVM support was not yet merged. Now it is, and it should be better > >> much clearer that vector usage tracking has no business with that > >> feature. > >> > >> hw/ivshmem.c | 20 ------------------- > >> hw/megasas.c | 4 --- > >> hw/msix.c | 57 > >> ++++++++++++------------------------------------------ > >> hw/msix.h | 5 +-- > >> hw/pci.h | 2 - > >> hw/virtio-pci.c | 20 +++++++----------- > >> 6 files changed, 23 insertions(+), 85 deletions(-) > >> > >> diff --git a/hw/ivshmem.c b/hw/ivshmem.c > >> index 47f2a16..5ffff48 100644 > >> --- a/hw/ivshmem.c > >> +++ b/hw/ivshmem.c > >> @@ -523,28 +523,11 @@ static void ivshmem_read(void *opaque, const uint8_t > >> * buf, int flags) > >> return; > >> } > >> > >> -/* Select the MSI-X vectors used by device. > >> - * ivshmem maps events to vectors statically, so > >> - * we just enable all vectors on init and after reset. */ > >> -static void ivshmem_use_msix(IVShmemState * s) > >> -{ > >> - int i; > >> - > >> - if (!msix_present(&s->dev)) { > >> - return; > >> - } > >> - > >> - for (i = 0; i < s->vectors; i++) { > >> - msix_vector_use(&s->dev, i); > >> - } > >> -} > >> - > >> static void ivshmem_reset(DeviceState *d) > >> { > >> IVShmemState *s = DO_UPCAST(IVShmemState, dev.qdev, d); > >> > >> s->intrstatus = 0; > >> - ivshmem_use_msix(s); > >> return; > >> } > >> > >> @@ -586,8 +569,6 @@ static void ivshmem_setup_msi(IVShmemState * s) > >> > >> /* allocate QEMU char devices for receiving interrupts */ > >> s->eventfd_table = g_malloc0(s->vectors * sizeof(EventfdEntry)); > >> - > >> - ivshmem_use_msix(s); > >> } > >> > >> static void ivshmem_save(QEMUFile* f, void *opaque) > >> @@ -629,7 +610,6 @@ static int ivshmem_load(QEMUFile* f, void *opaque, int > >> version_id) > >> > >> if (ivshmem_has_feature(proxy, IVSHMEM_MSI)) { > >> msix_load(&proxy->dev, f); > >> - ivshmem_use_msix(proxy); > >> } else { > >> proxy->intrstatus = qemu_get_be32(f); > >> proxy->intrmask = qemu_get_be32(f); > >> diff --git a/hw/megasas.c b/hw/megasas.c > >> index c35a15d..4766117 100644 > >> --- a/hw/megasas.c > >> +++ b/hw/megasas.c > >> @@ -2121,10 +2121,6 @@ static int megasas_scsi_init(PCIDevice *dev) > >> pci_register_bar(&s->dev, 2, PCI_BASE_ADDRESS_SPACE_IO, &s->port_io); > >> pci_register_bar(&s->dev, 3, bar_type, &s->queue_io); > >> > >> - if (megasas_use_msix(s)) { > >> - msix_vector_use(&s->dev, 0); > >> - } > >> - > >> if (!s->sas_addr) { > >> s->sas_addr = ((NAA_LOCALLY_ASSIGNED_ID << 24) | > >> IEEE_COMPANY_LOCALLY_ASSIGNED) << 36; > >> diff --git a/hw/msix.c b/hw/msix.c > >> index aea340b..163ce4c 100644 > >> --- a/hw/msix.c > >> +++ b/hw/msix.c > >> @@ -273,7 +273,6 @@ int msix_init(struct PCIDevice *dev, unsigned short > >> nentries, > >> > >> dev->msix_table = g_malloc0(table_size); > >> dev->msix_pba = g_malloc0(pba_size); > >> - dev->msix_entry_used = g_malloc0(nentries * sizeof > >> *dev->msix_entry_used); > >> > >> msix_mask_all(dev, nentries); > >> > >> @@ -326,16 +325,6 @@ int msix_init_exclusive_bar(PCIDevice *dev, unsigned > >> short nentries, > >> return 0; > >> } > >> > >> -static void msix_free_irq_entries(PCIDevice *dev) > >> -{ > >> - int vector; > >> - > >> - for (vector = 0; vector < dev->msix_entries_nr; ++vector) { > >> - dev->msix_entry_used[vector] = 0; > >> - msix_clr_pending(dev, vector); > >> - } > >> -} > >> - > >> /* Clean up resources for the device. */ > >> void msix_uninit(PCIDevice *dev, MemoryRegion *table_bar, MemoryRegion > >> *pba_bar) > >> { > >> @@ -344,7 +333,6 @@ void msix_uninit(PCIDevice *dev, MemoryRegion > >> *table_bar, MemoryRegion *pba_bar) > >> } > >> pci_del_capability(dev, PCI_CAP_ID_MSIX, MSIX_CAP_LENGTH); > >> dev->msix_cap = 0; > >> - msix_free_irq_entries(dev); > >> dev->msix_entries_nr = 0; > >> memory_region_del_subregion(pba_bar, &dev->msix_pba_mmio); > >> memory_region_destroy(&dev->msix_pba_mmio); > >> @@ -354,8 +342,6 @@ void msix_uninit(PCIDevice *dev, MemoryRegion > >> *table_bar, MemoryRegion *pba_bar) > >> memory_region_destroy(&dev->msix_table_mmio); > >> g_free(dev->msix_table); > >> dev->msix_table = NULL; > >> - g_free(dev->msix_entry_used); > >> - dev->msix_entry_used = NULL; > >> dev->cap_present &= ~QEMU_PCI_CAP_MSIX; > >> return; > >> } > >> @@ -390,7 +376,6 @@ void msix_load(PCIDevice *dev, QEMUFile *f) > >> return; > >> } > >> > >> - msix_free_irq_entries(dev); > >> qemu_get_buffer(f, dev->msix_table, n * PCI_MSIX_ENTRY_SIZE); > >> qemu_get_buffer(f, dev->msix_pba, (n + 7) / 8); > >> msix_update_function_masked(dev); > >> @@ -419,8 +404,9 @@ void msix_notify(PCIDevice *dev, unsigned vector) > >> { > >> MSIMessage msg; > >> > >> - if (vector >= dev->msix_entries_nr || !dev->msix_entry_used[vector]) > >> + if (vector >= dev->msix_entries_nr) { > >> return; > >> + } > >> if (msix_is_masked(dev, vector)) { > >> msix_set_pending(dev, vector); > >> return; > >> @@ -436,7 +422,7 @@ void msix_reset(PCIDevice *dev) > >> if (!msix_present(dev)) { > >> return; > >> } > >> - msix_free_irq_entries(dev); > >> + msix_clear_all_vectors(dev); > >> dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] &= > >> ~dev->wmask[dev->msix_cap + MSIX_CONTROL_OFFSET]; > >> memset(dev->msix_table, 0, dev->msix_entries_nr * > >> PCI_MSIX_ENTRY_SIZE); > >> @@ -444,41 +430,24 @@ void msix_reset(PCIDevice *dev) > >> msix_mask_all(dev, dev->msix_entries_nr); > >> } > >> > >> -/* PCI spec suggests that devices make it possible for software to > >> configure > >> - * less vectors than supported by the device, but does not specify a > >> standard > >> - * mechanism for devices to do so. > >> - * > >> - * We support this by asking devices to declare vectors software is going > >> to > >> - * actually use, and checking this on the notification path. Devices that > >> - * don't want to follow the spec suggestion can declare all vectors as > >> used. */ > >> - > >> -/* Mark vector as used. */ > >> -int msix_vector_use(PCIDevice *dev, unsigned vector) > >> +/* Clear pending vector. */ > >> +void msix_clear_vector(PCIDevice *dev, unsigned vector) > >> { > >> - if (vector >= dev->msix_entries_nr) > >> - return -EINVAL; > >> - dev->msix_entry_used[vector]++; > >> - return 0; > >> -} > >> - > > > > I keep thinking we should instead call vector notifier > > here, so that virtio can detect and handle cases where > > kvm_virtio_pci_vq_vector_use fails. > > ATM it just asserts. > > > > Maybe I am wrong. > > > > Let's delay this decision until 1.3. > > Yep, that feature can be added on top of this patch in 1.3, but we > should still remove the usage tracking to fix its issues. There is no > feature regression related to this. > > Jan > >
I'd like to keep it for virtio and remove for everyone else. Draft patch sent, it's weekend here so consider it pseudo-code, pls take a look. -- MST