> This is the wrong direction as it does not allow to do initial affinity
> assignement for the non-managed interrupts on allocation time. And
that's
> what Kashyap and Sumit are looking for.
>
> The trivial fix for the possible breakage when irq_default_affinity !=
> cpu_possible_mask is to set the affinity for the pre/post vectors to
> cpu_possible_mask and be done with it.
>
> One other thing I noticed while staring at that is the fact, that the
PCI
> code does not care about the return value of irq_create_affinity_masks()
at
> all. It just proceeds if masks is NULL as if the stuff was requested
with
> the affinity descriptor pointer being NULL. I don't think that's a
> brilliant idea because the drivers might rely on the interrupt being
> managed, but it might be a non-issue and just result in bad locality.
>
> Christoph?
>
> So back to the problem at hand. We need to change the affinity
management
> scheme in a way which lets us differentiate between managed and
> unmanaged
> interrupts, so it allows to automatically assign (initial) affinity to
all
> of them.
>
> Right now everything is bound to the cpumasks array, which does not
allow
> to convey more information. So we need to come up with something
> different.
>
> Something like the below (does not compile and is just for reference)
> should do the trick. I'm not sure whether we want to convey the
information
> (masks and bitmap) in a single data structure or not, but that's an
> implementation detail.


Dou -  Do you want me to test your patch or shall I wait for next version
?

>
> Thanks,
>
>       tglx
>
> 8<---------
> --- a/drivers/pci/msi.c
> +++ b/drivers/pci/msi.c
> @@ -535,15 +535,16 @@ static struct msi_desc *
>  msi_setup_entry(struct pci_dev *dev, int nvec, const struct
irq_affinity *affd)
>  {
>       struct cpumask *masks = NULL;
> +     unsigned long managed = 0;
>       struct msi_desc *entry;
>       u16 control;
>
>       if (affd)
> -             masks = irq_create_affinity_masks(nvec, affd);
> +             masks = irq_create_affinity_masks(nvec, affd, &managed);
>
>
>       /* MSI Entry Initialization */
> -     entry = alloc_msi_entry(&dev->dev, nvec, masks);
> +     entry = alloc_msi_entry(&dev->dev, nvec, masks, managed);
>       if (!entry)
>               goto out;
>
> @@ -672,15 +673,22 @@ static int msix_setup_entries(struct pci
>                             struct msix_entry *entries, int nvec,
>                             const struct irq_affinity *affd)
>  {
> +     /*
> +      * MSIX_MAX_VECTORS = 2048, i.e. 256 bytes. Might need runtime
> +      * allocation. OTOH, are 2048 vectors realistic?
> +      */
> +     DECLARE_BITMAP(managed, MSIX_MAX_VECTORS);
>       struct cpumask *curmsk, *masks = NULL;
>       struct msi_desc *entry;
>       int ret, i;
>
>       if (affd)
> -             masks = irq_create_affinity_masks(nvec, affd);
> +             masks = irq_create_affinity_masks(nvec, affd, managed);
>
>       for (i = 0, curmsk = masks; i < nvec; i++) {
> -             entry = alloc_msi_entry(&dev->dev, 1, curmsk);
> +             unsigned long m = test_bit(i, managed) ? 1 : 0;
> +
> +             entry = alloc_msi_entry(&dev->dev, 1, curmsk, m);
>               if (!entry) {
>                       if (!i)
>                               iounmap(base);
> --- a/kernel/irq/msi.c
> +++ b/kernel/irq/msi.c
> @@ -27,7 +27,8 @@
>   * and the affinity masks from @affinity are copied.
>   */
>  struct msi_desc *
> -alloc_msi_entry(struct device *dev, int nvec, const struct cpumask
*affinity)
> +alloc_msi_entry(struct device *dev, int nvec, const struct cpumask
*affinity,
> +             unsigned long managed)
>  {
>       struct msi_desc *desc;
>
> @@ -38,6 +39,7 @@ alloc_msi_entry(struct device *dev, int
>       INIT_LIST_HEAD(&desc->list);
>       desc->dev = dev;
>       desc->nvec_used = nvec;
> +     desc->managed = managed;
>       if (affinity) {
>               desc->affinity = kmemdup(affinity,
>                       nvec * sizeof(*desc->affinity), GFP_KERNEL);
> @@ -416,7 +418,7 @@ int msi_domain_alloc_irqs(struct irq_dom
>
>               virq = __irq_domain_alloc_irqs(domain, -1,
desc->nvec_used,
>                                              dev_to_node(dev), &arg,
false,
> -                                            desc->affinity);
> +                                            desc->affinity,
desc->managed);
>               if (virq < 0) {
>                       ret = -ENOSPC;
>                       if (ops->handle_error)

Reply via email to