From: Nam Cao <[email protected]> Sent: Thursday, June 26, 2025 7:48 AM
> 
> Move away from the legacy MSI domain setup, switch to use
> msi_create_parent_irq_domain().

>From a build standpoint, this patch does not apply cleanly to
linux-next20250630. See also an issue below where a needed irq
function isn't exported.

At runtime, I've done basic smoke testing on an x86 VM in the Azure
cloud that has a Mellanox NIC VF and two NVMe devices as PCI devices.
So far everything looks good. But I'm still doing additional testing, and
I want to also test on an ARM64 VM. Please give me another day or two
to be completely satisfied.

Michael Kelley

> 
> Signed-off-by: Nam Cao <[email protected]>
> ---
> Cc: K. Y. Srinivasan <[email protected]>
> Cc: Haiyang Zhang <[email protected]>
> Cc: Wei Liu <[email protected]>
> Cc: Dexuan Cui <[email protected]>
> Cc: [email protected]
> ---
>  drivers/pci/Kconfig                 |  1 +
>  drivers/pci/controller/pci-hyperv.c | 98 +++++++++++++++++++++++------
>  2 files changed, 80 insertions(+), 19 deletions(-)
> 
> diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig
> index 9c0e4aaf4e8cb..9a249c65aedcd 100644
> --- a/drivers/pci/Kconfig
> +++ b/drivers/pci/Kconfig
> @@ -223,6 +223,7 @@ config PCI_HYPERV
>       tristate "Hyper-V PCI Frontend"
>       depends on ((X86 && X86_64) || ARM64) && HYPERV && PCI_MSI && SYSFS
>       select PCI_HYPERV_INTERFACE
> +     select IRQ_MSI_LIB
>       help
>         The PCI device frontend driver allows the kernel to import arbitrary
>         PCI devices from a PCI backend to support PCI driver domains.
> diff --git a/drivers/pci/controller/pci-hyperv.c 
> b/drivers/pci/controller/pci-hyperv.c
> index ef5d655a0052c..3a24fadddb83b 100644
> --- a/drivers/pci/controller/pci-hyperv.c
> +++ b/drivers/pci/controller/pci-hyperv.c
> @@ -44,6 +44,7 @@
>  #include <linux/delay.h>
>  #include <linux/semaphore.h>
>  #include <linux/irq.h>
> +#include <linux/irqchip/irq-msi-lib.h>
>  #include <linux/msi.h>
>  #include <linux/hyperv.h>
>  #include <linux/refcount.h>
> @@ -508,7 +509,6 @@ struct hv_pcibus_device {
>       struct list_head children;
>       struct list_head dr_list;
> 
> -     struct msi_domain_info msi_info;
>       struct irq_domain *irq_domain;
> 
>       struct workqueue_struct *wq;
> @@ -1687,7 +1687,7 @@ static void hv_msi_free(struct irq_domain *domain, 
> struct msi_domain_info *info,
>       struct msi_desc *msi = irq_data_get_msi_desc(irq_data);
> 
>       pdev = msi_desc_to_pci_dev(msi);
> -     hbus = info->data;
> +     hbus = domain->host_data;
>       int_desc = irq_data_get_irq_chip_data(irq_data);
>       if (!int_desc)
>               return;
> @@ -1705,7 +1705,6 @@ static void hv_msi_free(struct irq_domain *domain, 
> struct msi_domain_info *info,
> 
>  static void hv_irq_mask(struct irq_data *data)
>  {
> -     pci_msi_mask_irq(data);
>       if (data->parent_data->chip->irq_mask)
>               irq_chip_mask_parent(data);
>  }
> @@ -1716,7 +1715,6 @@ static void hv_irq_unmask(struct irq_data *data)
> 
>       if (data->parent_data->chip->irq_unmask)
>               irq_chip_unmask_parent(data);
> -     pci_msi_unmask_irq(data);
>  }
> 
>  struct compose_comp_ctxt {
> @@ -2101,6 +2099,44 @@ static void hv_compose_msi_msg(struct irq_data *data, 
> struct msi_msg *msg)
>       msg->data = 0;
>  }
> 
> +static bool hv_pcie_init_dev_msi_info(struct device *dev, struct irq_domain 
> *domain,
> +                                   struct irq_domain *real_parent, struct 
> msi_domain_info *info)
> +{
> +     struct irq_chip *chip = info->chip;
> +
> +     if (!msi_lib_init_dev_msi_info(dev, domain, real_parent, info))
> +             return false;
> +
> +     info->ops->msi_prepare = hv_msi_prepare;
> +
> +     chip->irq_set_affinity = irq_chip_set_affinity_parent;
> +
> +     if (IS_ENABLED(CONFIG_X86))
> +             chip->flags |= IRQCHIP_MOVE_DEFERRED;
> +
> +     return true;
> +}
> +
> +#define HV_PCIE_MSI_FLAGS_REQUIRED (MSI_FLAG_USE_DEF_DOM_OPS
>       | \
> +                                 MSI_FLAG_USE_DEF_CHIP_OPS           | \
> +                                 MSI_FLAG_PCI_MSI_MASK_PARENT)
> +#define HV_PCIE_MSI_FLAGS_SUPPORTED (MSI_FLAG_MULTI_PCI_MSI
>       | \
> +                                  MSI_FLAG_PCI_MSIX                  | \
> +                                  MSI_GENERIC_FLAGS_MASK)
> +
> +static const struct msi_parent_ops hv_pcie_msi_parent_ops = {
> +     .required_flags         = HV_PCIE_MSI_FLAGS_REQUIRED,
> +     .supported_flags        = HV_PCIE_MSI_FLAGS_SUPPORTED,
> +     .bus_select_token       = DOMAIN_BUS_PCI_MSI,
> +#ifdef CONFIG_X86
> +     .chip_flags             = MSI_CHIP_FLAG_SET_ACK,
> +#elif defined(CONFIG_ARM64)
> +     .chip_flags             = MSI_CHIP_FLAG_SET_EOI,
> +#endif
> +     .prefix                 = "HV-",
> +     .init_dev_msi_info      = hv_pcie_init_dev_msi_info,
> +};
> +
>  /* HW Interrupt Chip Descriptor */
>  static struct irq_chip hv_msi_irq_chip = {
>       .name                   = "Hyper-V PCIe MSI",
> @@ -2108,7 +2144,6 @@ static struct irq_chip hv_msi_irq_chip = {
>       .irq_set_affinity       = irq_chip_set_affinity_parent,
>  #ifdef CONFIG_X86
>       .irq_ack                = irq_chip_ack_parent,
> -     .flags                  = IRQCHIP_MOVE_DEFERRED,
>  #elif defined(CONFIG_ARM64)
>       .irq_eoi                = irq_chip_eoi_parent,
>  #endif
> @@ -2116,9 +2151,37 @@ static struct irq_chip hv_msi_irq_chip = {
>       .irq_unmask             = hv_irq_unmask,
>  };
> 
> -static struct msi_domain_ops hv_msi_ops = {
> -     .msi_prepare    = hv_msi_prepare,
> -     .msi_free       = hv_msi_free,
> +static int hv_pcie_domain_alloc(struct irq_domain *d, unsigned int virq, 
> unsigned int nr_irqs,
> +                            void *arg)
> +{
> +     /* TODO: move the content of hv_compose_msi_msg() in here */
> +     int ret;
> +
> +     ret = irq_domain_alloc_irqs_parent(d, virq, nr_irqs, arg);
> +     if (ret < 0)
> +             return ret;
> +
> +     for (int i = 0; i < nr_irqs; i++) {
> +             irq_domain_set_info(d, virq + i, 0, &hv_msi_irq_chip, NULL, 
> FLOW_HANDLER, NULL,
> +                                 FLOW_NAME);
> +     }
> +
> +     return 0;
> +}
> +
> +static void hv_pcie_domain_free(struct irq_domain *d, unsigned int virq, 
> unsigned int nr_irqs)
> +{
> +     struct msi_domain_info *info = d->host_data;
> +
> +     for (int i = 0; i < nr_irqs; i++)
> +             hv_msi_free(d, info, virq + i);
> +
> +     irq_domain_free_irqs_top(d, virq, nr_irqs);

This code can be built as a module, so irq_domain_free_irqs_top() needs to be
exported, which it currently is not.

> +}
> +
> +static const struct irq_domain_ops hv_pcie_domain_ops = {
> +     .alloc  = hv_pcie_domain_alloc,
> +     .free   = hv_pcie_domain_free,
>  };
> 
>  /**
> @@ -2136,17 +2199,14 @@ static struct msi_domain_ops hv_msi_ops = {
>   */
>  static int hv_pcie_init_irq_domain(struct hv_pcibus_device *hbus)
>  {
> -     hbus->msi_info.chip = &hv_msi_irq_chip;
> -     hbus->msi_info.ops = &hv_msi_ops;
> -     hbus->msi_info.flags = (MSI_FLAG_USE_DEF_DOM_OPS |
> -             MSI_FLAG_USE_DEF_CHIP_OPS | MSI_FLAG_MULTI_PCI_MSI |
> -             MSI_FLAG_PCI_MSIX);
> -     hbus->msi_info.handler = FLOW_HANDLER;
> -     hbus->msi_info.handler_name = FLOW_NAME;
> -     hbus->msi_info.data = hbus;
> -     hbus->irq_domain = pci_msi_create_irq_domain(hbus->fwnode,
> -                                                  &hbus->msi_info,
> -                                                  hv_pci_get_root_domain());
> +     struct irq_domain_info info = {
> +             .fwnode         = hbus->fwnode,
> +             .ops            = &hv_pcie_domain_ops,
> +             .host_data      = hbus,
> +             .parent         = hv_pci_get_root_domain(),
> +     };
> +
> +     hbus->irq_domain = msi_create_parent_irq_domain(&info, 
> &hv_pcie_msi_parent_ops);
>       if (!hbus->irq_domain) {
>               dev_err(&hbus->hdev->device,
>                       "Failed to build an MSI IRQ domain\n");
> --
> 2.39.5
> 


Reply via email to