On 12/6/25 8:08 PM, Nam Cao wrote:
> Nilay Shroff <[email protected]> writes:
>> Yes you can find the architecture document here:
>> https://github.com/linuxppc/public-docs/blob/main/LoPAPR/LoPAR-20200812.pdf
>>
>> You may refer section 7 in the above document, which describes RTAS API.
>
> Thank you, that helped a lot.
>
> Can you please confirm that the below diff fixes the problem? It brings
> back the "fallback" thing that you mentioned.
>
> Best regards,
> Nam
>
> diff --git a/arch/powerpc/platforms/pseries/msi.c
> b/arch/powerpc/platforms/pseries/msi.c
> index a82aaa786e9e..8898a968a59b 100644
> --- a/arch/powerpc/platforms/pseries/msi.c
> +++ b/arch/powerpc/platforms/pseries/msi.c
> @@ -19,6 +19,11 @@
>
> #include "pseries.h"
>
> +struct pseries_msi_device {
> + unsigned int msi_quota;
> + unsigned int msi_used;
> +};
> +
> static int query_token, change_token;
>
> #define RTAS_QUERY_FN 0
> @@ -433,8 +438,26 @@ static int pseries_msi_ops_prepare(struct irq_domain
> *domain, struct device *dev
> struct msi_domain_info *info = domain->host_data;
> struct pci_dev *pdev = to_pci_dev(dev);
> int type = (info->flags & MSI_FLAG_PCI_MSIX) ? PCI_CAP_ID_MSIX :
> PCI_CAP_ID_MSI;
> + int ret;
> +
> + struct pseries_msi_device *pseries_dev __free(kfree)
> + = kmalloc(sizeof(*pseries_dev), GFP_KERNEL);
> + if (!pseries_dev)
> + return -ENOMEM;
> +
> + ret = rtas_prepare_msi_irqs(pdev, nvec, type, arg);
> + if (ret > 0) {
> + nvec = ret;
> + ret = rtas_prepare_msi_irqs(pdev, nvec, type, arg);
> + }
> + if (ret < 0)
> + return ret;
>
> - return rtas_prepare_msi_irqs(pdev, nvec, type, arg);
> + pseries_dev->msi_quota = nvec;
> + pseries_dev->msi_used = 0;
> +
> + arg->scratchpad[0].ptr = no_free_ptr(pseries_dev);
> + return 0;
> }
>
> /*
> @@ -443,9 +466,13 @@ static int pseries_msi_ops_prepare(struct irq_domain
> *domain, struct device *dev
> */
> static void pseries_msi_ops_teardown(struct irq_domain *domain,
> msi_alloc_info_t *arg)
> {
> + struct pseries_msi_device *pseries_dev = arg->scratchpad[0].ptr;
> struct pci_dev *pdev = to_pci_dev(domain->dev);
>
> rtas_disable_msi(pdev);
> +
> + WARN_ON(pseries_dev->msi_used);
> + kfree(pseries_dev);
> }
>
> static void pseries_msi_shutdown(struct irq_data *d)
> @@ -546,12 +573,18 @@ static int pseries_irq_domain_alloc(struct irq_domain
> *domain, unsigned int virq
> unsigned int nr_irqs, void *arg)
> {
> struct pci_controller *phb = domain->host_data;
> + struct pseries_msi_device *pseries_dev;
> msi_alloc_info_t *info = arg;
> struct msi_desc *desc = info->desc;
> struct pci_dev *pdev = msi_desc_to_pci_dev(desc);
> int hwirq;
> int i, ret;
>
> + pseries_dev = info->scratchpad[0].ptr;
> +
> + if (pseries_dev->msi_used + nr_irqs > pseries_dev->msi_quota)
> + return -ENOSPC;
> +
> hwirq = rtas_query_irq_number(pci_get_pdn(pdev), desc->msi_index);
> if (hwirq < 0) {
> dev_err(&pdev->dev, "Failed to query HW IRQ: %d\n", hwirq);
> @@ -567,9 +600,10 @@ static int pseries_irq_domain_alloc(struct irq_domain
> *domain, unsigned int virq
> goto out;
>
> irq_domain_set_hwirq_and_chip(domain, virq + i, hwirq + i,
> - &pseries_msi_irq_chip,
> domain->host_data);
> + &pseries_msi_irq_chip,
> pseries_dev);
> }
>
> + pseries_dev->msi_used++;
> return 0;
>
> out:
> @@ -582,9 +616,11 @@ static void pseries_irq_domain_free(struct irq_domain
> *domain, unsigned int virq
> unsigned int nr_irqs)
> {
> struct irq_data *d = irq_domain_get_irq_data(domain, virq);
> - struct pci_controller *phb = irq_data_get_irq_chip_data(d);
> + struct pseries_msi_device *pseries_dev = irq_data_get_irq_chip_data(d);
> + struct pci_controller *phb = domain->host_data;
>
> pr_debug("%s bridge %pOF %d #%d\n", __func__, phb->dn, virq, nr_irqs);
> + pseries_dev->msi_used -= nr_irqs;
> irq_domain_free_irqs_parent(domain, virq, nr_irqs);
> }
>
Thnaks for the patch! I tested it on my system and I confirmed that
this patch fixes the bug reported earlier. That said, if you're
planning to send a formal patch upstream with the above change then
please feel free to add,
Acked-by: Nilay Shroff <[email protected]>