On Fri, Jan 17, 2025 at 05:33:32PM -0700, Ahmed Zaki wrote:
> A common task for most drivers is to remember the user-set CPU affinity
> to its IRQs. On each netdev reset, the driver should re-assign the
> user's settings to the IRQs.
> 
> Add CPU affinity mask to napi_config. To delegate the CPU affinity
> management to the core, drivers must:
>  1 - set the new netdev flag "irq_affinity_auto":
>                                        netif_enable_irq_affinity(netdev)
>  2 - create the napi with persistent config:
>                                        netif_napi_add_config()
>  3 - bind an IRQ to the napi instance: netif_napi_set_irq()
> 
> the core will then make sure to use re-assign affinity to the napi's
> IRQ.
> 
> The default IRQ mask is set to one cpu starting from the closest NUMA.

Maybe the above is helpful to add to
Documentation/networking/napi.rst ?

> Signed-off-by: Ahmed Zaki <ahmed.z...@intel.com>
> ---
>  include/linux/netdevice.h | 14 ++++++++++-
>  net/core/dev.c            | 51 +++++++++++++++++++++++++++++----------
>  2 files changed, 51 insertions(+), 14 deletions(-)
> 
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index 98259f19c627..d576e5c91c43 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -351,6 +351,7 @@ struct napi_config {
>       u64 gro_flush_timeout;
>       u64 irq_suspend_timeout;
>       u32 defer_hard_irqs;
> +     cpumask_t affinity_mask;
>       unsigned int napi_id;
>  };
>  
> @@ -393,8 +394,8 @@ struct napi_struct {
>       struct list_head        dev_list;
>       struct hlist_node       napi_hash_node;
>       int                     irq;
> -#ifdef CONFIG_RFS_ACCEL
>       struct irq_affinity_notify notify;
> +#ifdef CONFIG_RFS_ACCEL
>       int                     napi_rmap_idx;
>  #endif
>       int                     index;
> @@ -1991,6 +1992,11 @@ enum netdev_reg_state {
>   *
>   *   @threaded:      napi threaded mode is enabled
>   *
> + *   @irq_affinity_auto: driver wants the core to manage the IRQ affinity.
> + *                       Set by netif_enable_irq_affinity(), then driver must
> + *                       create persistent napi by netif_napi_add_config()
> + *                       and finally bind napi to IRQ (netif_napi_set_irq).
> + *
>   *   @rx_cpu_rmap_auto: driver wants the core to manage the ARFS rmap.
>   *                      Set by calling netif_enable_cpu_rmap().
>   *
> @@ -2401,6 +2407,7 @@ struct net_device {
>       struct lock_class_key   *qdisc_tx_busylock;
>       bool                    proto_down;
>       bool                    threaded;
> +     bool                    irq_affinity_auto;
>       bool                    rx_cpu_rmap_auto;
>  
>       /* priv_flags_slow, ungrouped to save space */
> @@ -2653,6 +2660,11 @@ static inline void netdev_set_ml_priv(struct 
> net_device *dev,
>       dev->ml_priv_type = type;
>  }
>  
> +static inline void netif_enable_irq_affinity(struct net_device *dev)
> +{
> +     dev->irq_affinity_auto = true;
> +}

I'll have to look at the patches which use the above function, but
the first thing that came to mind when I saw this was does the above
need a WRITE_ONCE ?

The reads below seem to be protected by a lock; I haven't yet looked
at the other patches so maybe the write is also protected by
netdev->lock ?

>  /*
>   * Net namespace inlines
>   */
> diff --git a/net/core/dev.c b/net/core/dev.c
> index dbb63005bc2b..bc82c7f621b3 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c

[...]

>  
> +#ifdef CONFIG_RFS_ACCEL
>  static void netif_napi_affinity_release(struct kref *ref)
>  {
>       struct napi_struct *napi =
> @@ -6901,7 +6908,7 @@ static int napi_irq_cpu_rmap_add(struct napi_struct 
> *napi, int irq)
>       if (!rmap)
>               return -EINVAL;
>  
> -     napi->notify.notify = netif_irq_cpu_rmap_notify;
> +     napi->notify.notify = netif_napi_irq_notify;

Same question as previous patch: does it make sense to only set the
callbacks below when all other operations have succeeded?

>       napi->notify.release = netif_napi_affinity_release;
>       cpu_rmap_get(rmap);
>       rc = cpu_rmap_add(rmap, napi);

[...]

> @@ -6976,23 +6987,28 @@ void netif_napi_set_irq_locked(struct napi_struct 
> *napi, int irq)
>  {
>       int rc;
>  
> -     if (!napi->dev->rx_cpu_rmap_auto)
> -             goto out;
> -
> -     /* Remove existing rmap entries */
> -     if (napi->irq != irq && napi->irq > 0)
> +     /* Remove existing resources */
> +     if ((napi->dev->rx_cpu_rmap_auto || napi->dev->irq_affinity_auto) &&
> +         napi->irq > 0 && napi->irq != irq)
>               irq_set_affinity_notifier(napi->irq, NULL);
>  
> -     if (irq > 0) {
> +     if (irq > 0 && napi->dev->rx_cpu_rmap_auto) {
>               rc = napi_irq_cpu_rmap_add(napi, irq);
>               if (rc) {
>                       netdev_warn(napi->dev, "Unable to update ARFS map 
> (%d)\n",
>                                   rc);
>                       netif_disable_cpu_rmap(napi->dev);
>               }
> +     } else if (irq > 0 && napi->config && napi->dev->irq_affinity_auto) {
> +             napi->notify.notify = netif_napi_irq_notify;
> +             napi->notify.release = netif_napi_affinity_release;
> +
> +             rc = irq_set_affinity_notifier(irq, &napi->notify);
> +             if (rc)
> +                     netdev_warn(napi->dev, "Unable to set IRQ notifier 
> (%d)\n",
> +                                 rc);

I see now that my comments on the previous patch are stale after
this patch is applied. I wonder if the "irq > 0" part can be pulled
out to simplify the branches here?

Reply via email to