Memory regions becomes shared by the same time, the control plane as it own
reference in addition of the Tx/Rx queues.
This should also avoid to un-register a memory region when the Tx queue
evicts it from its local cache.

Signed-off-by: Nelio Laranjeiro <nelio.laranje...@6wind.com>
---
 drivers/net/mlx5/mlx5.h      |   8 ++
 drivers/net/mlx5/mlx5_mr.c   | 202 ++++++++++++++++++++++++++++++-------------
 drivers/net/mlx5/mlx5_rxq.c  |  17 ++--
 drivers/net/mlx5/mlx5_rxtx.h |  42 +++++----
 drivers/net/mlx5/mlx5_txq.c  |   8 +-
 5 files changed, 186 insertions(+), 91 deletions(-)

diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index a5e9aa1..1ae5f59 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -145,6 +145,7 @@ struct priv {
        unsigned int reta_idx_n; /* RETA index size. */
        struct rte_flow_drop *flow_drop_queue; /* Flow drop queue. */
        TAILQ_HEAD(mlx5_flows, rte_flow) flows; /* RTE Flow rules. */
+       LIST_HEAD(mr, mlx5_mr) mr; /* Memory region. */
        uint32_t link_speed_capa; /* Link speed capabilities. */
        struct mlx5_xstats_ctrl xstats_ctrl; /* Extended stats control. */
        rte_spinlock_t lock; /* Lock for control functions. */
@@ -289,4 +290,11 @@ void priv_flow_stop(struct priv *);
 int priv_flow_rxq_in_use(struct priv *, struct mlx5_rxq_data *);
 int priv_flow_verify(struct priv *);
 
+/* mlx5_mr.c */
+
+struct mlx5_mr* priv_mr_new(struct priv *, struct rte_mempool *);
+struct mlx5_mr* priv_mr_get(struct priv *, struct rte_mempool *);
+int priv_mr_release(struct priv *, struct mlx5_mr *);
+int priv_mr_verify(struct priv *);
+
 #endif /* RTE_PMD_MLX5_H_ */
diff --git a/drivers/net/mlx5/mlx5_mr.c b/drivers/net/mlx5/mlx5_mr.c
index e8adde5..b5e9500 100644
--- a/drivers/net/mlx5/mlx5_mr.c
+++ b/drivers/net/mlx5/mlx5_mr.c
@@ -42,6 +42,7 @@
 #endif
 
 #include <rte_mempool.h>
+#include <rte_malloc.h>
 
 #include "mlx5.h"
 #include "mlx5_rxtx.h"
@@ -111,54 +112,6 @@ static int mlx5_check_mempool(struct rte_mempool *mp, 
uintptr_t *start,
 }
 
 /**
- * Register mempool as a memory region.
- *
- * @param pd
- *   Pointer to protection domain.
- * @param mp
- *   Pointer to memory pool.
- *
- * @return
- *   Memory region pointer, NULL in case of error.
- */
-struct ibv_mr *
-mlx5_mp2mr(struct ibv_pd *pd, struct rte_mempool *mp)
-{
-       const struct rte_memseg *ms = rte_eal_get_physmem_layout();
-       uintptr_t start;
-       uintptr_t end;
-       unsigned int i;
-
-       if (mlx5_check_mempool(mp, &start, &end) != 0) {
-               ERROR("mempool %p: not virtually contiguous",
-                     (void *)mp);
-               return NULL;
-       }
-
-       DEBUG("mempool %p area start=%p end=%p size=%zu",
-             (void *)mp, (void *)start, (void *)end,
-             (size_t)(end - start));
-       /* Round start and end to page boundary if found in memory segments. */
-       for (i = 0; (i < RTE_MAX_MEMSEG) && (ms[i].addr != NULL); ++i) {
-               uintptr_t addr = (uintptr_t)ms[i].addr;
-               size_t len = ms[i].len;
-               unsigned int align = ms[i].hugepage_sz;
-
-               if ((start > addr) && (start < addr + len))
-                       start = RTE_ALIGN_FLOOR(start, align);
-               if ((end > addr) && (end < addr + len))
-                       end = RTE_ALIGN_CEIL(end, align);
-       }
-       DEBUG("mempool %p using start=%p end=%p size=%zu for MR",
-             (void *)mp, (void *)start, (void *)end,
-             (size_t)(end - start));
-       return ibv_reg_mr(pd,
-                         (void *)start,
-                         end - start,
-                         IBV_ACCESS_LOCAL_WRITE);
-}
-
-/**
  * Register a Memory Region (MR) <-> Memory Pool (MP) association in
  * txq->mp2mr[]. If mp2mr[] is full, remove an entry first.
  *
@@ -180,12 +133,14 @@ mlx5_txq_mp2mr_reg(struct mlx5_txq_data *txq, struct 
rte_mempool *mp,
 {
        struct mlx5_txq_ctrl *txq_ctrl =
                container_of(txq, struct mlx5_txq_ctrl, txq);
-       struct ibv_mr *mr;
+       struct mlx5_mr *mr;
 
        /* Add a new entry, register MR first. */
        DEBUG("%p: discovered new memory pool \"%s\" (%p)",
              (void *)txq_ctrl, mp->name, (void *)mp);
-       mr = mlx5_mp2mr(txq_ctrl->priv->pd, mp);
+       mr = priv_mr_get(txq_ctrl->priv, mp);
+       if (mr == NULL)
+               mr = priv_mr_new(txq_ctrl->priv, mp);
        if (unlikely(mr == NULL)) {
                DEBUG("%p: unable to configure MR, ibv_reg_mr() failed.",
                      (void *)txq_ctrl);
@@ -196,20 +151,17 @@ mlx5_txq_mp2mr_reg(struct mlx5_txq_data *txq, struct 
rte_mempool *mp,
                DEBUG("%p: MR <-> MP table full, dropping oldest entry.",
                      (void *)txq_ctrl);
                --idx;
-               claim_zero(ibv_dereg_mr(txq_ctrl->txq.mp2mr[0].mr));
+               priv_mr_release(txq_ctrl->priv, txq_ctrl->txq.mp2mr[0]);
                memmove(&txq_ctrl->txq.mp2mr[0], &txq_ctrl->txq.mp2mr[1],
                        (sizeof(txq_ctrl->txq.mp2mr) -
                         sizeof(txq_ctrl->txq.mp2mr[0])));
        }
        /* Store the new entry. */
-       txq_ctrl->txq.mp2mr[idx].start = (uintptr_t)mr->addr;
-       txq_ctrl->txq.mp2mr[idx].end = (uintptr_t)mr->addr + mr->length;
-       txq_ctrl->txq.mp2mr[idx].mr = mr;
-       txq_ctrl->txq.mp2mr[idx].lkey = htonl(mr->lkey);
+       txq_ctrl->txq.mp2mr[idx] = mr;
        DEBUG("%p: new MR lkey for MP \"%s\" (%p): 0x%08" PRIu32,
              (void *)txq_ctrl, mp->name, (void *)mp,
-             txq_ctrl->txq.mp2mr[idx].lkey);
-       return txq_ctrl->txq.mp2mr[idx].lkey;
+             txq_ctrl->txq.mp2mr[idx]->lkey);
+       return txq_ctrl->txq.mp2mr[idx]->lkey;
 }
 
 struct txq_mp2mr_mbuf_check_data {
@@ -275,15 +227,141 @@ mlx5_txq_mp2mr_iter(struct rte_mempool *mp, void *arg)
                return;
        }
        for (i = 0; (i != RTE_DIM(txq_ctrl->txq.mp2mr)); ++i) {
-               struct ibv_mr *mr = txq_ctrl->txq.mp2mr[i].mr;
-
-               if (unlikely(mr == NULL)) {
+               if (unlikely(txq_ctrl->txq.mp2mr[i] == NULL)) {
                        /* Unknown MP, add a new MR for it. */
                        break;
                }
-               if (start >= (uintptr_t)mr->addr &&
-                   end <= (uintptr_t)mr->addr + mr->length)
+               if (start >= (uintptr_t)txq_ctrl->txq.mp2mr[i]->start &&
+                   end <= (uintptr_t)txq_ctrl->txq.mp2mr[i]->end)
                        return;
        }
        mlx5_txq_mp2mr_reg(&txq_ctrl->txq, mp, i);
 }
+
+/**
+ * Register a new memory region from the mempool and store it in the memory
+ * region list.
+ *
+ * @param  priv
+ *   Pointer to private structure.
+ * @param mp
+ *   Pointer to the memory pool to register.
+ * @return
+ *   The memory region on success.
+ */
+struct mlx5_mr*
+priv_mr_new(struct priv *priv, struct rte_mempool *mp)
+{
+       const struct rte_memseg *ms = rte_eal_get_physmem_layout();
+       uintptr_t start;
+       uintptr_t end;
+       unsigned int i;
+       struct mlx5_mr *mr;
+
+       mr = rte_zmalloc_socket(__func__, sizeof(*mr), 0, mp->socket_id);
+       if (!mr) {
+               DEBUG("unable to configure MR, ibv_reg_mr() failed.");
+               return NULL;
+       }
+       if (mlx5_check_mempool(mp, &start, &end) != 0) {
+               ERROR("mempool %p: not virtually contiguous",
+                     (void *)mp);
+               return NULL;
+       }
+       DEBUG("mempool %p area start=%p end=%p size=%zu",
+             (void *)mp, (void *)start, (void *)end,
+             (size_t)(end - start));
+       /* Round start and end to page boundary if found in memory segments. */
+       for (i = 0; (i < RTE_MAX_MEMSEG) && (ms[i].addr != NULL); ++i) {
+               uintptr_t addr = (uintptr_t)ms[i].addr;
+               size_t len = ms[i].len;
+               unsigned int align = ms[i].hugepage_sz;
+
+               if ((start > addr) && (start < addr + len))
+                       start = RTE_ALIGN_FLOOR(start, align);
+               if ((end > addr) && (end < addr + len))
+                       end = RTE_ALIGN_CEIL(end, align);
+       }
+       DEBUG("mempool %p using start=%p end=%p size=%zu for MR",
+             (void *)mp, (void *)start, (void *)end,
+             (size_t)(end - start));
+       mr->mr = ibv_reg_mr(priv->pd, (void *)start, end - start,
+                           IBV_ACCESS_LOCAL_WRITE);
+       mr->mp = mp;
+       mr->lkey = htonl(mr->mr->lkey);
+       mr->start = start;
+       mr->end = (uintptr_t)mr->mr->addr + mr->mr->length;
+       rte_atomic32_inc(&mr->refcnt);
+       LIST_INSERT_HEAD(&priv->mr, mr, next);
+       return mr;
+}
+
+/**
+ * Search the memory region object in the memory region list.
+ *
+ * @param  priv
+ *   Pointer to private structure.
+ * @param mp
+ *   Pointer to the memory pool to register.
+ * @return
+ *   The memory region on success.
+ */
+struct mlx5_mr*
+priv_mr_get(struct priv *priv, struct rte_mempool *mp)
+{
+       struct mlx5_mr *mr;
+
+       if (LIST_EMPTY(&priv->mr))
+               return NULL;
+       LIST_FOREACH(mr, &priv->mr, next) {
+               if (mr->mp == mp) {
+                       rte_atomic32_inc(&mr->refcnt);
+                       return mr;
+               }
+       }
+       return NULL;
+}
+
+/**
+ * Release the memory region object.
+ *
+ * @param  mr
+ *   Pointer to memory region to release.
+ *
+ * @return
+ *   0 on success, errno on failure.
+ */
+int
+priv_mr_release(struct priv *priv, struct mlx5_mr *mr)
+{
+       (void)priv;
+       if (rte_atomic32_dec_and_test(&mr->refcnt)) {
+               claim_zero(ibv_dereg_mr(mr->mr));
+               LIST_REMOVE(mr, next);
+               rte_free(mr);
+               return 0;
+       }
+       return EBUSY;
+}
+
+/**
+ * Verify the flow list is empty
+ *
+ * @param priv
+ *  Pointer to private structure.
+ *
+ * @return the number of object not released.
+ */
+int
+priv_mr_verify(struct priv *priv)
+{
+       int ret = 0;
+       struct mlx5_mr *mr;
+
+       LIST_FOREACH(mr, &priv->mr, next) {
+               DEBUG("%p: mr %p still referenced", (void*)priv,
+                     (void*)mr);
+               ++ret;
+       }
+       return ret;
+}
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index 88a024c..80cfd96 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -674,7 +674,7 @@ rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl, unsigned int 
elts_n)
                *scat = (struct mlx5_wqe_data_seg){
                        .addr = htonll(rte_pktmbuf_mtod(buf, uintptr_t)),
                        .byte_count = htonl(DATA_LEN(buf)),
-                       .lkey = htonl(rxq_ctrl->mr->lkey),
+                       .lkey = rxq_ctrl->mr->lkey,
                };
                (*rxq_ctrl->rxq.elts)[i] = buf;
        }
@@ -768,7 +768,7 @@ mlx5_rxq_cleanup(struct mlx5_rxq_ctrl *rxq_ctrl)
        if (rxq_ctrl->channel != NULL)
                claim_zero(ibv_destroy_comp_channel(rxq_ctrl->channel));
        if (rxq_ctrl->mr != NULL)
-               claim_zero(ibv_dereg_mr(rxq_ctrl->mr));
+               priv_mr_release(rxq_ctrl->priv, rxq_ctrl->mr);
        memset(rxq_ctrl, 0, sizeof(*rxq_ctrl));
 }
 
@@ -920,12 +920,15 @@ mlx5_rxq_ctrl_setup(struct rte_eth_dev *dev, struct 
mlx5_rxq_ctrl *rxq_ctrl,
                tmpl.rxq.csum_l2tun =
                        !!dev->data->dev_conf.rxmode.hw_ip_checksum;
        /* Use the entire RX mempool as the memory region. */
-       tmpl.mr = mlx5_mp2mr(priv->pd, mp);
+       tmpl.mr = priv_mr_get(priv, mp);
        if (tmpl.mr == NULL) {
-               ret = EINVAL;
-               ERROR("%p: MR creation failure: %s",
-                     (void *)dev, strerror(ret));
-               goto error;
+               tmpl.mr = priv_mr_new(priv, mp);
+               if (tmpl.mr == NULL) {
+                       ret = EINVAL;
+                       ERROR("%p: MR creation failure: %s",
+                             (void *)dev, strerror(ret));
+                       goto error;
+               }
        }
        if (dev->data->dev_conf.intr_conf.rxq) {
                tmpl.channel = ibv_create_comp_channel(priv->ctx);
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index 12366c5..c7c7518 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -36,6 +36,7 @@
 
 #include <stddef.h>
 #include <stdint.h>
+#include <sys/queue.h>
 
 /* Verbs header. */
 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
@@ -51,6 +52,7 @@
 #include <rte_mbuf.h>
 #include <rte_mempool.h>
 #include <rte_common.h>
+#include <rte_atomic.h>
 
 #include "mlx5_utils.h"
 #include "mlx5.h"
@@ -79,6 +81,17 @@ struct mlx5_txq_stats {
 
 struct priv;
 
+/* Memory region queue object. */
+struct mlx5_mr {
+       LIST_ENTRY(mlx5_mr) next; /**< Pointer to the next element. */
+       rte_atomic32_t refcnt; /*<< Reference counter. */
+       uint32_t lkey; /*<< htonl(mr->lkey) */
+       uintptr_t start; /* Start address of MR */
+       uintptr_t end; /* End address of MR */
+       struct ibv_mr *mr; /*<< Memory Region. */
+       struct rte_mempool *mp; /*<< Memory Pool. */
+};
+
 /* Compressed CQE context. */
 struct rxq_zip {
        uint16_t ai; /* Array index. */
@@ -122,7 +135,7 @@ struct mlx5_rxq_ctrl {
        struct priv *priv; /* Back pointer to private data. */
        struct ibv_cq *cq; /* Completion Queue. */
        struct ibv_exp_wq *wq; /* Work Queue. */
-       struct ibv_mr *mr; /* Memory Region (for mp). */
+       struct mlx5_mr *mr; /* Memory Region (for mp). */
        struct ibv_comp_channel *channel;
        unsigned int socket; /* CPU socket ID for allocations. */
        struct mlx5_rxq_data rxq; /* Data path structure. */
@@ -248,6 +261,7 @@ struct mlx5_txq_data {
        uint16_t mpw_hdr_dseg:1; /* Enable DSEGs in the title WQEBB. */
        uint16_t max_inline; /* Multiple of RTE_CACHE_LINE_SIZE to inline. */
        uint16_t inline_max_packet_sz; /* Max packet size for inlining. */
+       uint16_t mr_cache_idx; /* Index of last hit entry. */
        uint32_t qp_num_8s; /* QP number shifted by 8. */
        uint32_t flags; /* Flags for Tx Queue. */
        volatile struct mlx5_cqe (*cqes)[]; /* Completion queue. */
@@ -255,13 +269,7 @@ struct mlx5_txq_data {
        volatile uint32_t *qp_db; /* Work queue doorbell. */
        volatile uint32_t *cq_db; /* Completion queue doorbell. */
        volatile void *bf_reg; /* Blueflame register. */
-       struct {
-               uintptr_t start; /* Start address of MR */
-               uintptr_t end; /* End address of MR */
-               struct ibv_mr *mr; /* Memory Region (for mp). */
-               uint32_t lkey; /* htonl(mr->lkey) */
-       } mp2mr[MLX5_PMD_TX_MP_CACHE]; /* MP to MR translation table. */
-       uint16_t mr_cache_idx; /* Index of last hit entry. */
+       struct mlx5_mr *mp2mr[MLX5_PMD_TX_MP_CACHE]; /* MR translation table. */
        struct rte_mbuf *(*elts)[]; /* TX elements. */
        struct mlx5_txq_stats stats; /* TX queue counters. */
 } __rte_cache_aligned;
@@ -553,20 +561,20 @@ mlx5_tx_mb2mr(struct mlx5_txq_data *txq, struct rte_mbuf 
*mb)
        uintptr_t addr = rte_pktmbuf_mtod(mb, uintptr_t);
 
        assert(i < RTE_DIM(txq->mp2mr));
-       if (likely(txq->mp2mr[i].start <= addr && txq->mp2mr[i].end >= addr))
-               return txq->mp2mr[i].lkey;
+       if (likely(txq->mp2mr[i]->start <= addr && txq->mp2mr[i]->end >= addr))
+               return txq->mp2mr[i]->lkey;
        for (i = 0; (i != RTE_DIM(txq->mp2mr)); ++i) {
-               if (unlikely(txq->mp2mr[i].mr == NULL)) {
+               if (unlikely(txq->mp2mr[i]->mr == NULL)) {
                        /* Unknown MP, add a new MR for it. */
                        break;
                }
-               if (txq->mp2mr[i].start <= addr &&
-                   txq->mp2mr[i].end >= addr) {
-                       assert(txq->mp2mr[i].lkey != (uint32_t)-1);
-                       assert(htonl(txq->mp2mr[i].mr->lkey) ==
-                              txq->mp2mr[i].lkey);
+               if (txq->mp2mr[i]->start <= addr &&
+                   txq->mp2mr[i]->end >= addr) {
+                       assert(txq->mp2mr[i]->lkey != (uint32_t)-1);
+                       assert(htonl(txq->mp2mr[i]->mr->lkey) ==
+                              txq->mp2mr[i]->lkey);
                        txq->mr_cache_idx = i;
-                       return txq->mp2mr[i].lkey;
+                       return txq->mp2mr[i]->lkey;
                }
        }
        txq->mr_cache_idx = 0;
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index 3f6702a..ce826dd 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -140,11 +140,9 @@ mlx5_txq_cleanup(struct mlx5_txq_ctrl *txq_ctrl)
                claim_zero(ibv_destroy_qp(txq_ctrl->qp));
        if (txq_ctrl->cq != NULL)
                claim_zero(ibv_destroy_cq(txq_ctrl->cq));
-       for (i = 0; (i != RTE_DIM(txq_ctrl->txq.mp2mr)); ++i) {
-               if (txq_ctrl->txq.mp2mr[i].mr == NULL)
-                       break;
-               claim_zero(ibv_dereg_mr(txq_ctrl->txq.mp2mr[i].mr));
-       }
+       for (i = 0; (i != RTE_DIM(txq_ctrl->txq.mp2mr)); ++i)
+               if (txq_ctrl->txq.mp2mr[i])
+                       priv_mr_release(txq_ctrl->priv, txq_ctrl->txq.mp2mr[i]);
        memset(txq_ctrl, 0, sizeof(*txq_ctrl));
 }
 
-- 
2.1.4

Reply via email to