On 12/19/2017 05:24 PM, Saeed Mahameed wrote:
> Before the offending commit, mlx5 core did the IRQ affinity itself,
> and it seems that the new generic code have some drawbacks and one
> of them is the lack for user ability to modify irq affinity after
> the initial affinity values got assigned.
> 
> The issue is still being discussed and a solution in the new generic code
> is required, until then we need to revert this patch.
> 
> This fixes the following issue:
> echo <new affinity> > /proc/irq/<x>/smp_affinity
> fails with  -EIO
> 
> This reverts commit a435393acafbf0ecff4deb3e3cb554b34f0d0664.
> Note: kept mlx5_get_vector_affinity in include/linux/mlx5/driver.h since
> it is used in mlx5_ib driver.
> 
> Fixes: a435393acafb ("mlx5: move affinity hints assignments to generic code")
> Cc: Sagi Grimberg <s...@grimberg.me>
> Cc: Thomas Gleixner <t...@linutronix.de>
> Cc: Jes Sorensen <jsoren...@fb.com>
> Reported-by: Jes Sorensen <jsoren...@fb.com>
> Signed-off-by: Saeed Mahameed <sae...@mellanox.com>

Acked-by: Jes Sorensen <jsoren...@fb.com>

Cheers,
Jes


> ---
>  drivers/net/ethernet/mellanox/mlx5/core/en.h      |  1 +
>  drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 45 +++++++-------
>  drivers/net/ethernet/mellanox/mlx5/core/main.c    | 75 
> +++++++++++++++++++++--
>  include/linux/mlx5/driver.h                       |  1 +
>  4 files changed, 93 insertions(+), 29 deletions(-)
> 
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h 
> b/drivers/net/ethernet/mellanox/mlx5/core/en.h
> index c0872b3284cb..43f9054830e5 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
> @@ -590,6 +590,7 @@ struct mlx5e_channel {
>       struct mlx5_core_dev      *mdev;
>       struct hwtstamp_config    *tstamp;
>       int                        ix;
> +     int                        cpu;
>  };
>  
>  struct mlx5e_channels {
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c 
> b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> index d2b057a3e512..cbec66bc82f1 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> @@ -71,11 +71,6 @@ struct mlx5e_channel_param {
>       struct mlx5e_cq_param      icosq_cq;
>  };
>  
> -static int mlx5e_get_node(struct mlx5e_priv *priv, int ix)
> -{
> -     return pci_irq_get_node(priv->mdev->pdev, MLX5_EQ_VEC_COMP_BASE + ix);
> -}
> -
>  static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev 
> *mdev)
>  {
>       return MLX5_CAP_GEN(mdev, striding_rq) &&
> @@ -444,17 +439,16 @@ static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq 
> *rq,
>       int wq_sz = mlx5_wq_ll_get_size(&rq->wq);
>       int mtt_sz = mlx5e_get_wqe_mtt_sz();
>       int mtt_alloc = mtt_sz + MLX5_UMR_ALIGN - 1;
> -     int node = mlx5e_get_node(c->priv, c->ix);
>       int i;
>  
>       rq->mpwqe.info = kzalloc_node(wq_sz * sizeof(*rq->mpwqe.info),
> -                                     GFP_KERNEL, node);
> +                                   GFP_KERNEL, cpu_to_node(c->cpu));
>       if (!rq->mpwqe.info)
>               goto err_out;
>  
>       /* We allocate more than mtt_sz as we will align the pointer */
> -     rq->mpwqe.mtt_no_align = kzalloc_node(mtt_alloc * wq_sz,
> -                                     GFP_KERNEL, node);
> +     rq->mpwqe.mtt_no_align = kzalloc_node(mtt_alloc * wq_sz, GFP_KERNEL,
> +                                     cpu_to_node(c->cpu));
>       if (unlikely(!rq->mpwqe.mtt_no_align))
>               goto err_free_wqe_info;
>  
> @@ -562,7 +556,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
>       int err;
>       int i;
>  
> -     rqp->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix);
> +     rqp->wq.db_numa_node = cpu_to_node(c->cpu);
>  
>       err = mlx5_wq_ll_create(mdev, &rqp->wq, rqc_wq, &rq->wq,
>                               &rq->wq_ctrl);
> @@ -629,8 +623,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
>       default: /* MLX5_WQ_TYPE_LINKED_LIST */
>               rq->wqe.frag_info =
>                       kzalloc_node(wq_sz * sizeof(*rq->wqe.frag_info),
> -                                  GFP_KERNEL,
> -                                  mlx5e_get_node(c->priv, c->ix));
> +                                  GFP_KERNEL, cpu_to_node(c->cpu));
>               if (!rq->wqe.frag_info) {
>                       err = -ENOMEM;
>                       goto err_rq_wq_destroy;
> @@ -1000,13 +993,13 @@ static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c,
>       sq->uar_map   = mdev->mlx5e_res.bfreg.map;
>       sq->min_inline_mode = params->tx_min_inline_mode;
>  
> -     param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix);
> +     param->wq.db_numa_node = cpu_to_node(c->cpu);
>       err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq, 
> &sq->wq_ctrl);
>       if (err)
>               return err;
>       sq->wq.db = &sq->wq.db[MLX5_SND_DBR];
>  
> -     err = mlx5e_alloc_xdpsq_db(sq, mlx5e_get_node(c->priv, c->ix));
> +     err = mlx5e_alloc_xdpsq_db(sq, cpu_to_node(c->cpu));
>       if (err)
>               goto err_sq_wq_destroy;
>  
> @@ -1053,13 +1046,13 @@ static int mlx5e_alloc_icosq(struct mlx5e_channel *c,
>       sq->channel   = c;
>       sq->uar_map   = mdev->mlx5e_res.bfreg.map;
>  
> -     param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix);
> +     param->wq.db_numa_node = cpu_to_node(c->cpu);
>       err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq, 
> &sq->wq_ctrl);
>       if (err)
>               return err;
>       sq->wq.db = &sq->wq.db[MLX5_SND_DBR];
>  
> -     err = mlx5e_alloc_icosq_db(sq, mlx5e_get_node(c->priv, c->ix));
> +     err = mlx5e_alloc_icosq_db(sq, cpu_to_node(c->cpu));
>       if (err)
>               goto err_sq_wq_destroy;
>  
> @@ -1126,13 +1119,13 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
>       if (MLX5_IPSEC_DEV(c->priv->mdev))
>               set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state);
>  
> -     param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix);
> +     param->wq.db_numa_node = cpu_to_node(c->cpu);
>       err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq, 
> &sq->wq_ctrl);
>       if (err)
>               return err;
>       sq->wq.db    = &sq->wq.db[MLX5_SND_DBR];
>  
> -     err = mlx5e_alloc_txqsq_db(sq, mlx5e_get_node(c->priv, c->ix));
> +     err = mlx5e_alloc_txqsq_db(sq, cpu_to_node(c->cpu));
>       if (err)
>               goto err_sq_wq_destroy;
>  
> @@ -1504,8 +1497,8 @@ static int mlx5e_alloc_cq(struct mlx5e_channel *c,
>       struct mlx5_core_dev *mdev = c->priv->mdev;
>       int err;
>  
> -     param->wq.buf_numa_node = mlx5e_get_node(c->priv, c->ix);
> -     param->wq.db_numa_node  = mlx5e_get_node(c->priv, c->ix);
> +     param->wq.buf_numa_node = cpu_to_node(c->cpu);
> +     param->wq.db_numa_node  = cpu_to_node(c->cpu);
>       param->eq_ix   = c->ix;
>  
>       err = mlx5e_alloc_cq_common(mdev, param, cq);
> @@ -1604,6 +1597,11 @@ static void mlx5e_close_cq(struct mlx5e_cq *cq)
>       mlx5e_free_cq(cq);
>  }
>  
> +static int mlx5e_get_cpu(struct mlx5e_priv *priv, int ix)
> +{
> +     return cpumask_first(priv->mdev->priv.irq_info[ix].mask);
> +}
> +
>  static int mlx5e_open_tx_cqs(struct mlx5e_channel *c,
>                            struct mlx5e_params *params,
>                            struct mlx5e_channel_param *cparam)
> @@ -1752,12 +1750,13 @@ static int mlx5e_open_channel(struct mlx5e_priv 
> *priv, int ix,
>  {
>       struct mlx5e_cq_moder icocq_moder = {0, 0};
>       struct net_device *netdev = priv->netdev;
> +     int cpu = mlx5e_get_cpu(priv, ix);
>       struct mlx5e_channel *c;
>       unsigned int irq;
>       int err;
>       int eqn;
>  
> -     c = kzalloc_node(sizeof(*c), GFP_KERNEL, mlx5e_get_node(priv, ix));
> +     c = kzalloc_node(sizeof(*c), GFP_KERNEL, cpu_to_node(cpu));
>       if (!c)
>               return -ENOMEM;
>  
> @@ -1765,6 +1764,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, 
> int ix,
>       c->mdev     = priv->mdev;
>       c->tstamp   = &priv->tstamp;
>       c->ix       = ix;
> +     c->cpu      = cpu;
>       c->pdev     = &priv->mdev->pdev->dev;
>       c->netdev   = priv->netdev;
>       c->mkey_be  = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key);
> @@ -1853,8 +1853,7 @@ static void mlx5e_activate_channel(struct mlx5e_channel 
> *c)
>       for (tc = 0; tc < c->num_tc; tc++)
>               mlx5e_activate_txqsq(&c->sq[tc]);
>       mlx5e_activate_rq(&c->rq);
> -     netif_set_xps_queue(c->netdev,
> -             mlx5_get_vector_affinity(c->priv->mdev, c->ix), c->ix);
> +     netif_set_xps_queue(c->netdev, get_cpu_mask(c->cpu), c->ix);
>  }
>  
>  static void mlx5e_deactivate_channel(struct mlx5e_channel *c)
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c 
> b/drivers/net/ethernet/mellanox/mlx5/core/main.c
> index 5f323442cc5a..8a89c7e8cd63 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
> @@ -317,9 +317,6 @@ static int mlx5_alloc_irq_vectors(struct mlx5_core_dev 
> *dev)
>  {
>       struct mlx5_priv *priv = &dev->priv;
>       struct mlx5_eq_table *table = &priv->eq_table;
> -     struct irq_affinity irqdesc = {
> -             .pre_vectors = MLX5_EQ_VEC_COMP_BASE,
> -     };
>       int num_eqs = 1 << MLX5_CAP_GEN(dev, log_max_eq);
>       int nvec;
>  
> @@ -333,10 +330,9 @@ static int mlx5_alloc_irq_vectors(struct mlx5_core_dev 
> *dev)
>       if (!priv->irq_info)
>               goto err_free_msix;
>  
> -     nvec = pci_alloc_irq_vectors_affinity(dev->pdev,
> +     nvec = pci_alloc_irq_vectors(dev->pdev,
>                       MLX5_EQ_VEC_COMP_BASE + 1, nvec,
> -                     PCI_IRQ_MSIX | PCI_IRQ_AFFINITY,
> -                     &irqdesc);
> +                     PCI_IRQ_MSIX);
>       if (nvec < 0)
>               return nvec;
>  
> @@ -622,6 +618,63 @@ u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev)
>       return (u64)timer_l | (u64)timer_h1 << 32;
>  }
>  
> +static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i)
> +{
> +     struct mlx5_priv *priv  = &mdev->priv;
> +     int irq = pci_irq_vector(mdev->pdev, MLX5_EQ_VEC_COMP_BASE + i);
> +
> +     if (!zalloc_cpumask_var(&priv->irq_info[i].mask, GFP_KERNEL)) {
> +             mlx5_core_warn(mdev, "zalloc_cpumask_var failed");
> +             return -ENOMEM;
> +     }
> +
> +     cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node),
> +                     priv->irq_info[i].mask);
> +
> +     if (IS_ENABLED(CONFIG_SMP) &&
> +         irq_set_affinity_hint(irq, priv->irq_info[i].mask))
> +             mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 
> 0x%.4x", irq);
> +
> +     return 0;
> +}
> +
> +static void mlx5_irq_clear_affinity_hint(struct mlx5_core_dev *mdev, int i)
> +{
> +     struct mlx5_priv *priv  = &mdev->priv;
> +     int irq = pci_irq_vector(mdev->pdev, MLX5_EQ_VEC_COMP_BASE + i);
> +
> +     irq_set_affinity_hint(irq, NULL);
> +     free_cpumask_var(priv->irq_info[i].mask);
> +}
> +
> +static int mlx5_irq_set_affinity_hints(struct mlx5_core_dev *mdev)
> +{
> +     int err;
> +     int i;
> +
> +     for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++) {
> +             err = mlx5_irq_set_affinity_hint(mdev, i);
> +             if (err)
> +                     goto err_out;
> +     }
> +
> +     return 0;
> +
> +err_out:
> +     for (i--; i >= 0; i--)
> +             mlx5_irq_clear_affinity_hint(mdev, i);
> +
> +     return err;
> +}
> +
> +static void mlx5_irq_clear_affinity_hints(struct mlx5_core_dev *mdev)
> +{
> +     int i;
> +
> +     for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++)
> +             mlx5_irq_clear_affinity_hint(mdev, i);
> +}
> +
>  int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn,
>                   unsigned int *irqn)
>  {
> @@ -1097,6 +1150,12 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, 
> struct mlx5_priv *priv,
>               goto err_stop_eqs;
>       }
>  
> +     err = mlx5_irq_set_affinity_hints(dev);
> +     if (err) {
> +             dev_err(&pdev->dev, "Failed to alloc affinity hint cpumask\n");
> +             goto err_affinity_hints;
> +     }
> +
>       err = mlx5_init_fs(dev);
>       if (err) {
>               dev_err(&pdev->dev, "Failed to init flow steering\n");
> @@ -1154,6 +1213,9 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, 
> struct mlx5_priv *priv,
>       mlx5_cleanup_fs(dev);
>  
>  err_fs:
> +     mlx5_irq_clear_affinity_hints(dev);
> +
> +err_affinity_hints:
>       free_comp_eqs(dev);
>  
>  err_stop_eqs:
> @@ -1222,6 +1284,7 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, 
> struct mlx5_priv *priv,
>  
>       mlx5_sriov_detach(dev);
>       mlx5_cleanup_fs(dev);
> +     mlx5_irq_clear_affinity_hints(dev);
>       free_comp_eqs(dev);
>       mlx5_stop_eqs(dev);
>       mlx5_put_uars_page(dev, priv->uar);
> diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
> index a886b51511ab..40a6f33c4cde 100644
> --- a/include/linux/mlx5/driver.h
> +++ b/include/linux/mlx5/driver.h
> @@ -556,6 +556,7 @@ struct mlx5_core_sriov {
>  };
>  
>  struct mlx5_irq_info {
> +     cpumask_var_t mask;
>       char name[MLX5_MAX_IRQ_NAME];
>  };
>  
> 

Reply via email to