On 12/2/20 1:52 AM, Alexey Kardashevskiy wrote:
> From: Oliver O'Halloran <ooh...@gmail.com>
> 
> When a passthrough IO adapter is removed from a pseries machine using hash
> MMU and the XIVE interrupt mode, the POWER hypervisor expects the guest OS
> to clear all page table entries related to the adapter. If some are still
> present, the RTAS call which isolates the PCI slot returns error 9001
> "valid outstanding translations" and the removal of the IO adapter fails.
> This is because when the PHBs are scanned, Linux maps automatically the
> INTx interrupts in the Linux interrupt number space but these are never
> removed.
> 
> This problem can be fixed by adding the corresponding unmap operation when
> the device is removed. There's no pcibios_* hook for the remove case, but
> the same effect can be achieved using a bus notifier.
> 
> Because INTx are shared among PHBs (and potentially across the system),
> this adds tracking of virq to unmap them only when the last user is gone.
> 
> Signed-off-by: Oliver O'Halloran <ooh...@gmail.com>
> [aik: added refcounter]
> Signed-off-by: Alexey Kardashevskiy <a...@ozlabs.ru>


Reviewed-by: Cédric Le Goater <c...@kaod.org>

I did some PHB hotplug tests on a KVM guest and a LPAR using only LSIs.

Tested-by: Cédric Le Goater <c...@kaod.org>

Thanks Alexey,

C.

> ---
> Changes:
> v3:
> * free @vi on error path
> 
> v2:
> * added refcounter
> ---
>  arch/powerpc/kernel/pci-common.c | 82 ++++++++++++++++++++++++++++++--
>  1 file changed, 78 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/pci-common.c 
> b/arch/powerpc/kernel/pci-common.c
> index be108616a721..2b555997b295 100644
> --- a/arch/powerpc/kernel/pci-common.c
> +++ b/arch/powerpc/kernel/pci-common.c
> @@ -353,6 +353,55 @@ struct pci_controller 
> *pci_find_controller_for_domain(int domain_nr)
>       return NULL;
>  }
>  
> +struct pci_intx_virq {
> +     int virq;
> +     struct kref kref;
> +     struct list_head list_node;
> +};
> +
> +static LIST_HEAD(intx_list);
> +static DEFINE_MUTEX(intx_mutex);
> +
> +static void ppc_pci_intx_release(struct kref *kref)
> +{
> +     struct pci_intx_virq *vi = container_of(kref, struct pci_intx_virq, 
> kref);
> +
> +     list_del(&vi->list_node);
> +     irq_dispose_mapping(vi->virq);
> +     kfree(vi);
> +}
> +
> +static int ppc_pci_unmap_irq_line(struct notifier_block *nb,
> +                            unsigned long action, void *data)
> +{
> +     struct pci_dev *pdev = to_pci_dev(data);
> +
> +     if (action == BUS_NOTIFY_DEL_DEVICE) {
> +             struct pci_intx_virq *vi;
> +
> +             mutex_lock(&intx_mutex);
> +             list_for_each_entry(vi, &intx_list, list_node) {
> +                     if (vi->virq == pdev->irq) {
> +                             kref_put(&vi->kref, ppc_pci_intx_release);
> +                             break;
> +                     }
> +             }
> +             mutex_unlock(&intx_mutex);
> +     }
> +
> +     return NOTIFY_DONE;
> +}
> +
> +static struct notifier_block ppc_pci_unmap_irq_notifier = {
> +     .notifier_call = ppc_pci_unmap_irq_line,
> +};
> +
> +static int ppc_pci_register_irq_notifier(void)
> +{
> +     return bus_register_notifier(&pci_bus_type, 
> &ppc_pci_unmap_irq_notifier);
> +}
> +arch_initcall(ppc_pci_register_irq_notifier);
> +
>  /*
>   * Reads the interrupt pin to determine if interrupt is use by card.
>   * If the interrupt is used, then gets the interrupt line from the
> @@ -361,6 +410,12 @@ struct pci_controller 
> *pci_find_controller_for_domain(int domain_nr)
>  static int pci_read_irq_line(struct pci_dev *pci_dev)
>  {
>       int virq;
> +     struct pci_intx_virq *vi, *vitmp;
> +
> +     /* Preallocate vi as rewind is complex if this fails after mapping */
> +     vi = kzalloc(sizeof(struct pci_intx_virq), GFP_KERNEL);
> +     if (!vi)
> +             return -1;
>  
>       pr_debug("PCI: Try to map irq for %s...\n", pci_name(pci_dev));
>  
> @@ -377,12 +432,12 @@ static int pci_read_irq_line(struct pci_dev *pci_dev)
>                * function.
>                */
>               if (pci_read_config_byte(pci_dev, PCI_INTERRUPT_PIN, &pin))
> -                     return -1;
> +                     goto error_exit;
>               if (pin == 0)
> -                     return -1;
> +                     goto error_exit;
>               if (pci_read_config_byte(pci_dev, PCI_INTERRUPT_LINE, &line) ||
>                   line == 0xff || line == 0) {
> -                     return -1;
> +                     goto error_exit;
>               }
>               pr_debug(" No map ! Using line %d (pin %d) from PCI config\n",
>                        line, pin);
> @@ -394,14 +449,33 @@ static int pci_read_irq_line(struct pci_dev *pci_dev)
>  
>       if (!virq) {
>               pr_debug(" Failed to map !\n");
> -             return -1;
> +             goto error_exit;
>       }
>  
>       pr_debug(" Mapped to linux irq %d\n", virq);
>  
>       pci_dev->irq = virq;
>  
> +     mutex_lock(&intx_mutex);
> +     list_for_each_entry(vitmp, &intx_list, list_node) {
> +             if (vitmp->virq == virq) {
> +                     kref_get(&vitmp->kref);
> +                     kfree(vi);
> +                     vi = NULL;
> +                     break;
> +             }
> +     }
> +     if (vi) {
> +             vi->virq = virq;
> +             kref_init(&vi->kref);
> +             list_add_tail(&vi->list_node, &intx_list);
> +     }
> +     mutex_unlock(&intx_mutex);
> +
>       return 0;
> +error_exit:
> +     kfree(vi);
> +     return -1;
>  }
>  
>  /*
> 

Reply via email to