Hi Michael, > -----Original Message----- > From: Michael Baum <michae...@nvidia.com> > Sent: Monday, July 12, 2021 10:07 AM > > The issue can cause illegal physical address access while a huge-page A is > released and huge-page B is allocated on the same virtual address. > The old MR can be matched using the virtual address of huge-page B but the > HW will access the physical address of huge-page A which is no more part of > the DPDK process. > > Register a driver callback for memory event in order to free out all the MRs > of > memory that is going to be freed from the dpdk process. > > Fixes: cda883bbb655 ("regex/mlx5: add dynamic memory registration to > datapath") > Cc: sta...@dpdk.org > > Signed-off-by: Michael Baum <michae...@nvidia.com> > --- > drivers/regex/mlx5/mlx5_regex.c | 55 ++++++++++++++++++++++++ > drivers/regex/mlx5/mlx5_regex.h | 2 + > drivers/regex/mlx5/mlx5_regex_control.c | 2 + > drivers/regex/mlx5/mlx5_regex_fastpath.c | 50 +++++++++++++++------ > 4 files changed, 97 insertions(+), 12 deletions(-) > > diff --git a/drivers/regex/mlx5/mlx5_regex.c > b/drivers/regex/mlx5/mlx5_regex.c index dcb2ced88e..0f12d94d7e 100644 > --- a/drivers/regex/mlx5/mlx5_regex.c > +++ b/drivers/regex/mlx5/mlx5_regex.c > @@ -11,6 +11,7 @@ > #include <rte_regexdev_driver.h> > > #include <mlx5_common_pci.h> > +#include <mlx5_common_mr.h> > #include <mlx5_common.h> > #include <mlx5_glue.h> > #include <mlx5_devx_cmds.h> > @@ -24,6 +25,10 @@ > > int mlx5_regex_logtype; > > +TAILQ_HEAD(regex_mem_event, mlx5_regex_priv) mlx5_mem_event_list > = > + > TAILQ_HEAD_INITIALIZER(mlx5_mem_event_list); > +static pthread_mutex_t mem_event_list_lock = > PTHREAD_MUTEX_INITIALIZER; > + > const struct rte_regexdev_ops mlx5_regexdev_ops = { > .dev_info_get = mlx5_regex_info_get, > .dev_configure = mlx5_regex_configure, @@ -82,6 +87,40 @@ > mlx5_regex_get_name(char *name, struct rte_pci_device *pci_dev > __rte_unused) > pci_dev->addr.devid, pci_dev->addr.function); } > > +/** > + * Callback for memory event. > + * > + * @param event_type > + * Memory event type. > + * @param addr > + * Address of memory. > + * @param len > + * Size of memory. > + */ > +static void > +mlx5_regex_mr_mem_event_cb(enum rte_mem_event event_type, > const void *addr, > + size_t len, void *arg __rte_unused) { > + struct mlx5_regex_priv *priv; > + > + /* Must be called from the primary process. */ > + MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); > + switch (event_type) { > + case RTE_MEM_EVENT_FREE: > + pthread_mutex_lock(&mem_event_list_lock); > + /* Iterate all the existing mlx5 devices. */ > + TAILQ_FOREACH(priv, &mlx5_mem_event_list, > mem_event_cb) > + mlx5_free_mr_by_addr(&priv->mr_scache, > + priv->ctx->device->name, > + addr, len); > + pthread_mutex_unlock(&mem_event_list_lock); > + break; > + case RTE_MEM_EVENT_ALLOC: > + default: > + break; > + } > +} > + > static int > mlx5_regex_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, > struct rte_pci_device *pci_dev) > @@ -193,6 +232,15 @@ mlx5_regex_pci_probe(struct rte_pci_driver > *pci_drv __rte_unused, > rte_errno = ENOMEM; > goto error; > } > + /* Register callback function for global shared MR cache > management. */ > + if (TAILQ_EMPTY(&mlx5_mem_event_list)) > + > rte_mem_event_callback_register("MLX5_MEM_EVENT_CB", > + > mlx5_regex_mr_mem_event_cb, > + NULL); > + /* Add device to memory callback list. */ > + pthread_mutex_lock(&mem_event_list_lock); > + TAILQ_INSERT_TAIL(&mlx5_mem_event_list, priv, mem_event_cb); > + pthread_mutex_unlock(&mem_event_list_lock); > DRV_LOG(INFO, "RegEx GGA is %s.", > priv->has_umr ? "supported" : "unsupported"); > return 0; > @@ -225,6 +273,13 @@ mlx5_regex_pci_remove(struct rte_pci_device > *pci_dev) > return 0; > priv = dev->data->dev_private; > if (priv) { > + /* Remove from memory callback device list. */ > + pthread_mutex_lock(&mem_event_list_lock); > + TAILQ_REMOVE(&mlx5_mem_event_list, priv, > mem_event_cb); > + pthread_mutex_unlock(&mem_event_list_lock); > + if (TAILQ_EMPTY(&mlx5_mem_event_list)) > + > rte_mem_event_callback_unregister("MLX5_MEM_EVENT_CB", > + NULL); > if (priv->pd) > mlx5_glue->dealloc_pd(priv->pd); > if (priv->uar) > diff --git a/drivers/regex/mlx5/mlx5_regex.h > b/drivers/regex/mlx5/mlx5_regex.h index 51a2101e53..61f59ba873 100644 > --- a/drivers/regex/mlx5/mlx5_regex.h > +++ b/drivers/regex/mlx5/mlx5_regex.h > @@ -70,6 +70,8 @@ struct mlx5_regex_priv { > uint32_t nb_engines; /* Number of RegEx engines. */ > struct mlx5dv_devx_uar *uar; /* UAR object. */ > struct ibv_pd *pd; > + TAILQ_ENTRY(mlx5_regex_priv) mem_event_cb; > + /**< Called by memory event callback. */ > struct mlx5_mr_share_cache mr_scache; /* Global shared MR cache. > */ > uint8_t is_bf2; /* The device is BF2 device. */ > uint8_t sq_ts_format; /* Whether SQ supports timestamp formats. > */ diff --git a/drivers/regex/mlx5/mlx5_regex_control.c > b/drivers/regex/mlx5/mlx5_regex_control.c > index eef0fe579d..8ce2dabb55 100644 > --- a/drivers/regex/mlx5/mlx5_regex_control.c > +++ b/drivers/regex/mlx5/mlx5_regex_control.c > @@ -246,6 +246,8 @@ mlx5_regex_qp_setup(struct rte_regexdev *dev, > uint16_t qp_ind, > nb_sq_config++; > } > > + /* Save pointer of global generation number to check memory > event. */ > + qp->mr_ctrl.dev_gen_ptr = &priv->mr_scache.dev_gen; > ret = mlx5_mr_btree_init(&qp->mr_ctrl.cache_bh, > MLX5_MR_BTREE_CACHE_N, > rte_socket_id()); > if (ret) { > diff --git a/drivers/regex/mlx5/mlx5_regex_fastpath.c > b/drivers/regex/mlx5/mlx5_regex_fastpath.c > index b57e7d7794..6d5096701f 100644 > --- a/drivers/regex/mlx5/mlx5_regex_fastpath.c > +++ b/drivers/regex/mlx5/mlx5_regex_fastpath.c > @@ -109,6 +109,40 @@ set_wqe_ctrl_seg(struct mlx5_wqe_ctrl_seg *seg, > uint16_t pi, uint8_t opcode, > seg->imm = imm; > } > > +/** > + * Query LKey from a packet buffer for QP. If not found, add the mempool. > + * > + * @param priv > + * Pointer to the priv object. > + * @param mr_ctrl > + * Pointer to per-queue MR control structure. > + * @param mbuf > + * Pointer to source mbuf, to search in. > + * > + * @return > + * Searched LKey on success, UINT32_MAX on no match. > + */ > +static inline uint32_t > +mlx5_regex_addr2mr(struct mlx5_regex_priv *priv, struct mlx5_mr_ctrl > *mr_ctrl, > + struct rte_mbuf *mbuf) > +{ > + uintptr_t addr = rte_pktmbuf_mtod(mbuf, uintptr_t); > + uint32_t lkey; > + > + /* Check generation bit to see if there's any change on existing MRs. > */ > + if (unlikely(*mr_ctrl->dev_gen_ptr != mr_ctrl->cur_gen)) > + mlx5_mr_flush_local_cache(mr_ctrl); > + /* Linear search on MR cache array. */ > + lkey = mlx5_mr_lookup_lkey(mr_ctrl->cache, &mr_ctrl->mru, > + MLX5_MR_CACHE_N, addr); > + if (likely(lkey != UINT32_MAX)) > + return lkey; > + /* Take slower bottom-half on miss. */ > + return mlx5_mr_addr2mr_bh(priv->pd, 0, &priv->mr_scache, > mr_ctrl, addr, > + !!(mbuf->ol_flags & > EXT_ATTACHED_MBUF)); } > + > + > static inline void > __prep_one(struct mlx5_regex_priv *priv, struct mlx5_regex_sq *sq, > struct rte_regex_ops *op, struct mlx5_regex_job *job, @@ -160,10 > +194,7 @@ prep_one(struct mlx5_regex_priv *priv, struct mlx5_regex_qp > *qp, > struct mlx5_klm klm; > > klm.byte_count = rte_pktmbuf_data_len(op->mbuf); > - klm.mkey = mlx5_mr_addr2mr_bh(priv->pd, 0, > - &priv->mr_scache, &qp->mr_ctrl, > - rte_pktmbuf_mtod(op->mbuf, uintptr_t), > - !!(op->mbuf->ol_flags & > EXT_ATTACHED_MBUF)); > + klm.mkey = mlx5_regex_addr2mr(priv, &qp->mr_ctrl, op->mbuf); > klm.address = rte_pktmbuf_mtod(op->mbuf, uintptr_t); > __prep_one(priv, sq, op, job, sq->pi, &klm); > sq->db_pi = sq->pi; > @@ -329,10 +360,8 @@ prep_regex_umr_wqe_set(struct mlx5_regex_priv > *priv, struct mlx5_regex_qp *qp, > (qp->jobs[mkey_job_id].imkey->id); > while (mbuf) { > /* Build indirect mkey seg's KLM. */ > - mkey_klm->mkey = > mlx5_mr_addr2mr_bh(priv->pd, > - NULL, &priv->mr_scache, &qp- > >mr_ctrl, > - rte_pktmbuf_mtod(mbuf, uintptr_t), > - !!(mbuf->ol_flags & > EXT_ATTACHED_MBUF)); > + mkey_klm->mkey = mlx5_regex_addr2mr > + (priv, &qp->mr_ctrl, mbuf); > mkey_klm->address = rte_cpu_to_be_64 > (rte_pktmbuf_mtod(mbuf, > uintptr_t)); > mkey_klm->byte_count = rte_cpu_to_be_32 > @@ -350,10 +379,7 @@ prep_regex_umr_wqe_set(struct mlx5_regex_priv > *priv, struct mlx5_regex_qp *qp, > klm.byte_count = scatter_size; > } else { > /* The single mubf case. Build the KLM directly. */ > - klm.mkey = mlx5_mr_addr2mr_bh(priv->pd, NULL, > - &priv->mr_scache, &qp->mr_ctrl, > - rte_pktmbuf_mtod(mbuf, uintptr_t), > - !!(mbuf->ol_flags & > EXT_ATTACHED_MBUF)); > + klm.mkey = mlx5_regex_addr2mr(priv, &qp- > >mr_ctrl, mbuf); > klm.address = rte_pktmbuf_mtod(mbuf, uintptr_t); > klm.byte_count = rte_pktmbuf_data_len(mbuf); > } > -- > 2.25.1
Acked-by: Ori Kam <or...@nvidia.com> Thanks, Ori