On 12/2/20 1:52 AM, Alexey Kardashevskiy wrote: > From: Oliver O'Halloran <ooh...@gmail.com> > > When a passthrough IO adapter is removed from a pseries machine using hash > MMU and the XIVE interrupt mode, the POWER hypervisor expects the guest OS > to clear all page table entries related to the adapter. If some are still > present, the RTAS call which isolates the PCI slot returns error 9001 > "valid outstanding translations" and the removal of the IO adapter fails. > This is because when the PHBs are scanned, Linux maps automatically the > INTx interrupts in the Linux interrupt number space but these are never > removed. > > This problem can be fixed by adding the corresponding unmap operation when > the device is removed. There's no pcibios_* hook for the remove case, but > the same effect can be achieved using a bus notifier. > > Because INTx are shared among PHBs (and potentially across the system), > this adds tracking of virq to unmap them only when the last user is gone. > > Signed-off-by: Oliver O'Halloran <ooh...@gmail.com> > [aik: added refcounter] > Signed-off-by: Alexey Kardashevskiy <a...@ozlabs.ru>
Reviewed-by: Cédric Le Goater <c...@kaod.org> I did some PHB hotplug tests on a KVM guest and a LPAR using only LSIs. Tested-by: Cédric Le Goater <c...@kaod.org> Thanks Alexey, C. > --- > Changes: > v3: > * free @vi on error path > > v2: > * added refcounter > --- > arch/powerpc/kernel/pci-common.c | 82 ++++++++++++++++++++++++++++++-- > 1 file changed, 78 insertions(+), 4 deletions(-) > > diff --git a/arch/powerpc/kernel/pci-common.c > b/arch/powerpc/kernel/pci-common.c > index be108616a721..2b555997b295 100644 > --- a/arch/powerpc/kernel/pci-common.c > +++ b/arch/powerpc/kernel/pci-common.c > @@ -353,6 +353,55 @@ struct pci_controller > *pci_find_controller_for_domain(int domain_nr) > return NULL; > } > > +struct pci_intx_virq { > + int virq; > + struct kref kref; > + struct list_head list_node; > +}; > + > +static LIST_HEAD(intx_list); > +static DEFINE_MUTEX(intx_mutex); > + > +static void ppc_pci_intx_release(struct kref *kref) > +{ > + struct pci_intx_virq *vi = container_of(kref, struct pci_intx_virq, > kref); > + > + list_del(&vi->list_node); > + irq_dispose_mapping(vi->virq); > + kfree(vi); > +} > + > +static int ppc_pci_unmap_irq_line(struct notifier_block *nb, > + unsigned long action, void *data) > +{ > + struct pci_dev *pdev = to_pci_dev(data); > + > + if (action == BUS_NOTIFY_DEL_DEVICE) { > + struct pci_intx_virq *vi; > + > + mutex_lock(&intx_mutex); > + list_for_each_entry(vi, &intx_list, list_node) { > + if (vi->virq == pdev->irq) { > + kref_put(&vi->kref, ppc_pci_intx_release); > + break; > + } > + } > + mutex_unlock(&intx_mutex); > + } > + > + return NOTIFY_DONE; > +} > + > +static struct notifier_block ppc_pci_unmap_irq_notifier = { > + .notifier_call = ppc_pci_unmap_irq_line, > +}; > + > +static int ppc_pci_register_irq_notifier(void) > +{ > + return bus_register_notifier(&pci_bus_type, > &ppc_pci_unmap_irq_notifier); > +} > +arch_initcall(ppc_pci_register_irq_notifier); > + > /* > * Reads the interrupt pin to determine if interrupt is use by card. > * If the interrupt is used, then gets the interrupt line from the > @@ -361,6 +410,12 @@ struct pci_controller > *pci_find_controller_for_domain(int domain_nr) > static int pci_read_irq_line(struct pci_dev *pci_dev) > { > int virq; > + struct pci_intx_virq *vi, *vitmp; > + > + /* Preallocate vi as rewind is complex if this fails after mapping */ > + vi = kzalloc(sizeof(struct pci_intx_virq), GFP_KERNEL); > + if (!vi) > + return -1; > > pr_debug("PCI: Try to map irq for %s...\n", pci_name(pci_dev)); > > @@ -377,12 +432,12 @@ static int pci_read_irq_line(struct pci_dev *pci_dev) > * function. > */ > if (pci_read_config_byte(pci_dev, PCI_INTERRUPT_PIN, &pin)) > - return -1; > + goto error_exit; > if (pin == 0) > - return -1; > + goto error_exit; > if (pci_read_config_byte(pci_dev, PCI_INTERRUPT_LINE, &line) || > line == 0xff || line == 0) { > - return -1; > + goto error_exit; > } > pr_debug(" No map ! Using line %d (pin %d) from PCI config\n", > line, pin); > @@ -394,14 +449,33 @@ static int pci_read_irq_line(struct pci_dev *pci_dev) > > if (!virq) { > pr_debug(" Failed to map !\n"); > - return -1; > + goto error_exit; > } > > pr_debug(" Mapped to linux irq %d\n", virq); > > pci_dev->irq = virq; > > + mutex_lock(&intx_mutex); > + list_for_each_entry(vitmp, &intx_list, list_node) { > + if (vitmp->virq == virq) { > + kref_get(&vitmp->kref); > + kfree(vi); > + vi = NULL; > + break; > + } > + } > + if (vi) { > + vi->virq = virq; > + kref_init(&vi->kref); > + list_add_tail(&vi->list_node, &intx_list); > + } > + mutex_unlock(&intx_mutex); > + > return 0; > +error_exit: > + kfree(vi); > + return -1; > } > > /* >