On 12/19/2017 05:24 PM, Saeed Mahameed wrote: > Before the offending commit, mlx5 core did the IRQ affinity itself, > and it seems that the new generic code have some drawbacks and one > of them is the lack for user ability to modify irq affinity after > the initial affinity values got assigned. > > The issue is still being discussed and a solution in the new generic code > is required, until then we need to revert this patch. > > This fixes the following issue: > echo <new affinity> > /proc/irq/<x>/smp_affinity > fails with -EIO > > This reverts commit a435393acafbf0ecff4deb3e3cb554b34f0d0664. > Note: kept mlx5_get_vector_affinity in include/linux/mlx5/driver.h since > it is used in mlx5_ib driver. > > Fixes: a435393acafb ("mlx5: move affinity hints assignments to generic code") > Cc: Sagi Grimberg <s...@grimberg.me> > Cc: Thomas Gleixner <t...@linutronix.de> > Cc: Jes Sorensen <jsoren...@fb.com> > Reported-by: Jes Sorensen <jsoren...@fb.com> > Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
Acked-by: Jes Sorensen <jsoren...@fb.com> Cheers, Jes > --- > drivers/net/ethernet/mellanox/mlx5/core/en.h | 1 + > drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 45 +++++++------- > drivers/net/ethernet/mellanox/mlx5/core/main.c | 75 > +++++++++++++++++++++-- > include/linux/mlx5/driver.h | 1 + > 4 files changed, 93 insertions(+), 29 deletions(-) > > diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h > b/drivers/net/ethernet/mellanox/mlx5/core/en.h > index c0872b3284cb..43f9054830e5 100644 > --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h > +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h > @@ -590,6 +590,7 @@ struct mlx5e_channel { > struct mlx5_core_dev *mdev; > struct hwtstamp_config *tstamp; > int ix; > + int cpu; > }; > > struct mlx5e_channels { > diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c > b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c > index d2b057a3e512..cbec66bc82f1 100644 > --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c > +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c > @@ -71,11 +71,6 @@ struct mlx5e_channel_param { > struct mlx5e_cq_param icosq_cq; > }; > > -static int mlx5e_get_node(struct mlx5e_priv *priv, int ix) > -{ > - return pci_irq_get_node(priv->mdev->pdev, MLX5_EQ_VEC_COMP_BASE + ix); > -} > - > static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev > *mdev) > { > return MLX5_CAP_GEN(mdev, striding_rq) && > @@ -444,17 +439,16 @@ static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq > *rq, > int wq_sz = mlx5_wq_ll_get_size(&rq->wq); > int mtt_sz = mlx5e_get_wqe_mtt_sz(); > int mtt_alloc = mtt_sz + MLX5_UMR_ALIGN - 1; > - int node = mlx5e_get_node(c->priv, c->ix); > int i; > > rq->mpwqe.info = kzalloc_node(wq_sz * sizeof(*rq->mpwqe.info), > - GFP_KERNEL, node); > + GFP_KERNEL, cpu_to_node(c->cpu)); > if (!rq->mpwqe.info) > goto err_out; > > /* We allocate more than mtt_sz as we will align the pointer */ > - rq->mpwqe.mtt_no_align = kzalloc_node(mtt_alloc * wq_sz, > - GFP_KERNEL, node); > + rq->mpwqe.mtt_no_align = kzalloc_node(mtt_alloc * wq_sz, GFP_KERNEL, > + cpu_to_node(c->cpu)); > if (unlikely(!rq->mpwqe.mtt_no_align)) > goto err_free_wqe_info; > > @@ -562,7 +556,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, > int err; > int i; > > - rqp->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix); > + rqp->wq.db_numa_node = cpu_to_node(c->cpu); > > err = mlx5_wq_ll_create(mdev, &rqp->wq, rqc_wq, &rq->wq, > &rq->wq_ctrl); > @@ -629,8 +623,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, > default: /* MLX5_WQ_TYPE_LINKED_LIST */ > rq->wqe.frag_info = > kzalloc_node(wq_sz * sizeof(*rq->wqe.frag_info), > - GFP_KERNEL, > - mlx5e_get_node(c->priv, c->ix)); > + GFP_KERNEL, cpu_to_node(c->cpu)); > if (!rq->wqe.frag_info) { > err = -ENOMEM; > goto err_rq_wq_destroy; > @@ -1000,13 +993,13 @@ static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c, > sq->uar_map = mdev->mlx5e_res.bfreg.map; > sq->min_inline_mode = params->tx_min_inline_mode; > > - param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix); > + param->wq.db_numa_node = cpu_to_node(c->cpu); > err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, &sq->wq, > &sq->wq_ctrl); > if (err) > return err; > sq->wq.db = &sq->wq.db[MLX5_SND_DBR]; > > - err = mlx5e_alloc_xdpsq_db(sq, mlx5e_get_node(c->priv, c->ix)); > + err = mlx5e_alloc_xdpsq_db(sq, cpu_to_node(c->cpu)); > if (err) > goto err_sq_wq_destroy; > > @@ -1053,13 +1046,13 @@ static int mlx5e_alloc_icosq(struct mlx5e_channel *c, > sq->channel = c; > sq->uar_map = mdev->mlx5e_res.bfreg.map; > > - param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix); > + param->wq.db_numa_node = cpu_to_node(c->cpu); > err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, &sq->wq, > &sq->wq_ctrl); > if (err) > return err; > sq->wq.db = &sq->wq.db[MLX5_SND_DBR]; > > - err = mlx5e_alloc_icosq_db(sq, mlx5e_get_node(c->priv, c->ix)); > + err = mlx5e_alloc_icosq_db(sq, cpu_to_node(c->cpu)); > if (err) > goto err_sq_wq_destroy; > > @@ -1126,13 +1119,13 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, > if (MLX5_IPSEC_DEV(c->priv->mdev)) > set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state); > > - param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix); > + param->wq.db_numa_node = cpu_to_node(c->cpu); > err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, &sq->wq, > &sq->wq_ctrl); > if (err) > return err; > sq->wq.db = &sq->wq.db[MLX5_SND_DBR]; > > - err = mlx5e_alloc_txqsq_db(sq, mlx5e_get_node(c->priv, c->ix)); > + err = mlx5e_alloc_txqsq_db(sq, cpu_to_node(c->cpu)); > if (err) > goto err_sq_wq_destroy; > > @@ -1504,8 +1497,8 @@ static int mlx5e_alloc_cq(struct mlx5e_channel *c, > struct mlx5_core_dev *mdev = c->priv->mdev; > int err; > > - param->wq.buf_numa_node = mlx5e_get_node(c->priv, c->ix); > - param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix); > + param->wq.buf_numa_node = cpu_to_node(c->cpu); > + param->wq.db_numa_node = cpu_to_node(c->cpu); > param->eq_ix = c->ix; > > err = mlx5e_alloc_cq_common(mdev, param, cq); > @@ -1604,6 +1597,11 @@ static void mlx5e_close_cq(struct mlx5e_cq *cq) > mlx5e_free_cq(cq); > } > > +static int mlx5e_get_cpu(struct mlx5e_priv *priv, int ix) > +{ > + return cpumask_first(priv->mdev->priv.irq_info[ix].mask); > +} > + > static int mlx5e_open_tx_cqs(struct mlx5e_channel *c, > struct mlx5e_params *params, > struct mlx5e_channel_param *cparam) > @@ -1752,12 +1750,13 @@ static int mlx5e_open_channel(struct mlx5e_priv > *priv, int ix, > { > struct mlx5e_cq_moder icocq_moder = {0, 0}; > struct net_device *netdev = priv->netdev; > + int cpu = mlx5e_get_cpu(priv, ix); > struct mlx5e_channel *c; > unsigned int irq; > int err; > int eqn; > > - c = kzalloc_node(sizeof(*c), GFP_KERNEL, mlx5e_get_node(priv, ix)); > + c = kzalloc_node(sizeof(*c), GFP_KERNEL, cpu_to_node(cpu)); > if (!c) > return -ENOMEM; > > @@ -1765,6 +1764,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, > int ix, > c->mdev = priv->mdev; > c->tstamp = &priv->tstamp; > c->ix = ix; > + c->cpu = cpu; > c->pdev = &priv->mdev->pdev->dev; > c->netdev = priv->netdev; > c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key); > @@ -1853,8 +1853,7 @@ static void mlx5e_activate_channel(struct mlx5e_channel > *c) > for (tc = 0; tc < c->num_tc; tc++) > mlx5e_activate_txqsq(&c->sq[tc]); > mlx5e_activate_rq(&c->rq); > - netif_set_xps_queue(c->netdev, > - mlx5_get_vector_affinity(c->priv->mdev, c->ix), c->ix); > + netif_set_xps_queue(c->netdev, get_cpu_mask(c->cpu), c->ix); > } > > static void mlx5e_deactivate_channel(struct mlx5e_channel *c) > diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c > b/drivers/net/ethernet/mellanox/mlx5/core/main.c > index 5f323442cc5a..8a89c7e8cd63 100644 > --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c > +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c > @@ -317,9 +317,6 @@ static int mlx5_alloc_irq_vectors(struct mlx5_core_dev > *dev) > { > struct mlx5_priv *priv = &dev->priv; > struct mlx5_eq_table *table = &priv->eq_table; > - struct irq_affinity irqdesc = { > - .pre_vectors = MLX5_EQ_VEC_COMP_BASE, > - }; > int num_eqs = 1 << MLX5_CAP_GEN(dev, log_max_eq); > int nvec; > > @@ -333,10 +330,9 @@ static int mlx5_alloc_irq_vectors(struct mlx5_core_dev > *dev) > if (!priv->irq_info) > goto err_free_msix; > > - nvec = pci_alloc_irq_vectors_affinity(dev->pdev, > + nvec = pci_alloc_irq_vectors(dev->pdev, > MLX5_EQ_VEC_COMP_BASE + 1, nvec, > - PCI_IRQ_MSIX | PCI_IRQ_AFFINITY, > - &irqdesc); > + PCI_IRQ_MSIX); > if (nvec < 0) > return nvec; > > @@ -622,6 +618,63 @@ u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev) > return (u64)timer_l | (u64)timer_h1 << 32; > } > > +static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i) > +{ > + struct mlx5_priv *priv = &mdev->priv; > + int irq = pci_irq_vector(mdev->pdev, MLX5_EQ_VEC_COMP_BASE + i); > + > + if (!zalloc_cpumask_var(&priv->irq_info[i].mask, GFP_KERNEL)) { > + mlx5_core_warn(mdev, "zalloc_cpumask_var failed"); > + return -ENOMEM; > + } > + > + cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node), > + priv->irq_info[i].mask); > + > + if (IS_ENABLED(CONFIG_SMP) && > + irq_set_affinity_hint(irq, priv->irq_info[i].mask)) > + mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq > 0x%.4x", irq); > + > + return 0; > +} > + > +static void mlx5_irq_clear_affinity_hint(struct mlx5_core_dev *mdev, int i) > +{ > + struct mlx5_priv *priv = &mdev->priv; > + int irq = pci_irq_vector(mdev->pdev, MLX5_EQ_VEC_COMP_BASE + i); > + > + irq_set_affinity_hint(irq, NULL); > + free_cpumask_var(priv->irq_info[i].mask); > +} > + > +static int mlx5_irq_set_affinity_hints(struct mlx5_core_dev *mdev) > +{ > + int err; > + int i; > + > + for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++) { > + err = mlx5_irq_set_affinity_hint(mdev, i); > + if (err) > + goto err_out; > + } > + > + return 0; > + > +err_out: > + for (i--; i >= 0; i--) > + mlx5_irq_clear_affinity_hint(mdev, i); > + > + return err; > +} > + > +static void mlx5_irq_clear_affinity_hints(struct mlx5_core_dev *mdev) > +{ > + int i; > + > + for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++) > + mlx5_irq_clear_affinity_hint(mdev, i); > +} > + > int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, > unsigned int *irqn) > { > @@ -1097,6 +1150,12 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, > struct mlx5_priv *priv, > goto err_stop_eqs; > } > > + err = mlx5_irq_set_affinity_hints(dev); > + if (err) { > + dev_err(&pdev->dev, "Failed to alloc affinity hint cpumask\n"); > + goto err_affinity_hints; > + } > + > err = mlx5_init_fs(dev); > if (err) { > dev_err(&pdev->dev, "Failed to init flow steering\n"); > @@ -1154,6 +1213,9 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, > struct mlx5_priv *priv, > mlx5_cleanup_fs(dev); > > err_fs: > + mlx5_irq_clear_affinity_hints(dev); > + > +err_affinity_hints: > free_comp_eqs(dev); > > err_stop_eqs: > @@ -1222,6 +1284,7 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, > struct mlx5_priv *priv, > > mlx5_sriov_detach(dev); > mlx5_cleanup_fs(dev); > + mlx5_irq_clear_affinity_hints(dev); > free_comp_eqs(dev); > mlx5_stop_eqs(dev); > mlx5_put_uars_page(dev, priv->uar); > diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h > index a886b51511ab..40a6f33c4cde 100644 > --- a/include/linux/mlx5/driver.h > +++ b/include/linux/mlx5/driver.h > @@ -556,6 +556,7 @@ struct mlx5_core_sriov { > }; > > struct mlx5_irq_info { > + cpumask_var_t mask; > char name[MLX5_MAX_IRQ_NAME]; > }; > >