> On Apr 24, 2019, at 1:44 AM, Viacheslav Ovsiienko <viachesl...@mellanox.com> 
> wrote:
> 
> The multiport Infiniband device support was introduced [1].
> All active ports, belonging to the same Infiniband device use the signle
> shared Infiniband context of that device and share the resources:
>  - QPs are created within shared context
>  - Verbs flows are also created with specifying port index
>  - DV/DR resources
>  - Protection Domain
>  - Event Handlers
> 
> This patchset adds support for Memory Regions sharing between
> portes, created on the base of multiport Infiniban device.
> The datapath of mlx5 uses the layered cache subsystem for
> allocating/releasing Memory Regions, only the lowest layer L3
> is subject to share due to performance issues.
> 
> [1] http://patches.dpdk.org/cover/51800/
> 
> Signed-off-by: Viacheslav Ovsiienko <viachesl...@mellanox.com>
> ---
Acked-by: Yongseok Koh <ys...@mellanox.com>

> v2:
>  - intendation issues
>  - comments cleanup
> 
> v1:
>  http://patches.dpdk.org/patch/52723/
> 
> drivers/net/mlx5/mlx5.c     |  40 +++++++----
> drivers/net/mlx5/mlx5.h     |  15 ++--
> drivers/net/mlx5/mlx5_mr.c  | 164 ++++++++++++++++++++++----------------------
> drivers/net/mlx5/mlx5_mr.h  |   5 +-
> drivers/net/mlx5/mlx5_txq.c |   2 +-
> 5 files changed, 121 insertions(+), 105 deletions(-)
> 
> diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
> index 1bb58b1..b563e0f 100644
> --- a/drivers/net/mlx5/mlx5.c
> +++ b/drivers/net/mlx5/mlx5.c
> @@ -147,6 +147,7 @@ struct mlx5_dev_spawn_data {
>       struct mlx5_switch_info info; /**< Switch information. */
>       struct ibv_device *ibv_dev; /**< Associated IB device. */
>       struct rte_eth_dev *eth_dev; /**< Associated Ethernet device. */
> +     struct rte_pci_device *pci_dev; /**< Backend PCI device. */
> };
> 
> static LIST_HEAD(, mlx5_ibv_shared) mlx5_ibv_list = LIST_HEAD_INITIALIZER();
> @@ -225,6 +226,7 @@ struct mlx5_dev_spawn_data {
>               sizeof(sh->ibdev_name));
>       strncpy(sh->ibdev_path, sh->ctx->device->ibdev_path,
>               sizeof(sh->ibdev_path));
> +     sh->pci_dev = spawn->pci_dev;
>       pthread_mutex_init(&sh->intr_mutex, NULL);
>       /*
>        * Setting port_id to max unallowed value means
> @@ -239,6 +241,22 @@ struct mlx5_dev_spawn_data {
>               err = ENOMEM;
>               goto error;
>       }
> +     /*
> +      * Once the device is added to the list of memory event
> +      * callback, its global MR cache table cannot be expanded
> +      * on the fly because of deadlock. If it overflows, lookup
> +      * should be done by searching MR list linearly, which is slow.
> +      *
> +      * At this point the device is not added to the memory
> +      * event list yet, context is just being created.
> +      */
> +     err = mlx5_mr_btree_init(&sh->mr.cache,
> +                              MLX5_MR_BTREE_CACHE_N * 2,
> +                              sh->pci_dev->device.numa_node);
> +     if (err) {
> +             err = rte_errno;
> +             goto error;
> +     }
>       LIST_INSERT_HEAD(&mlx5_ibv_list, sh, next);
> exit:
>       pthread_mutex_unlock(&mlx5_ibv_list_mutex);
> @@ -286,6 +304,8 @@ struct mlx5_dev_spawn_data {
>       assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
>       if (--sh->refcnt)
>               goto exit;
> +     /* Release created Memory Regions. */
> +     mlx5_mr_release(sh);
>       LIST_REMOVE(sh, next);
>       /*
>        *  Ensure there is no async event handler installed.
> @@ -651,7 +671,10 @@ struct mlx5_dev_spawn_data {
>       }
>       mlx5_proc_priv_uninit(dev);
>       mlx5_mprq_free_mp(dev);
> -     mlx5_mr_release(dev);
> +     /* Remove from memory callback device list. */
> +     rte_rwlock_write_lock(&mlx5_shared_data->mem_event_rwlock);
> +     LIST_REMOVE(priv, mem_event_cb);
> +     rte_rwlock_write_unlock(&mlx5_shared_data->mem_event_rwlock);
>       assert(priv->sh);
>       mlx5_free_shared_dr(priv);
>       if (priv->rss_conf.rss_key != NULL)
> @@ -1548,19 +1571,6 @@ struct mlx5_dev_spawn_data {
>               goto error;
>       }
>       priv->config.flow_prio = err;
> -     /*
> -      * Once the device is added to the list of memory event
> -      * callback, its global MR cache table cannot be expanded
> -      * on the fly because of deadlock. If it overflows, lookup
> -      * should be done by searching MR list linearly, which is slow.
> -      */
> -     err = mlx5_mr_btree_init(&priv->mr.cache,
> -                              MLX5_MR_BTREE_CACHE_N * 2,
> -                              eth_dev->device->numa_node);
> -     if (err) {
> -             err = rte_errno;
> -             goto error;
> -     }
>       /* Add device to memory callback list. */
>       rte_rwlock_write_lock(&mlx5_shared_data->mem_event_rwlock);
>       LIST_INSERT_HEAD(&mlx5_shared_data->mem_event_cb_list,
> @@ -1757,6 +1767,7 @@ struct mlx5_dev_spawn_data {
>                       list[ns].ibv_port = i;
>                       list[ns].ibv_dev = ibv_match[0];
>                       list[ns].eth_dev = NULL;
> +                     list[ns].pci_dev = pci_dev;
>                       list[ns].ifindex = mlx5_nl_ifindex
>                                       (nl_rdma, list[ns].ibv_dev->name, i);
>                       if (!list[ns].ifindex) {
> @@ -1823,6 +1834,7 @@ struct mlx5_dev_spawn_data {
>                       list[ns].ibv_port = 1;
>                       list[ns].ibv_dev = ibv_match[i];
>                       list[ns].eth_dev = NULL;
> +                     list[ns].pci_dev = pci_dev;
>                       list[ns].ifindex = 0;
>                       if (nl_rdma >= 0)
>                               list[ns].ifindex = mlx5_nl_ifindex
> diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
> index 0a6d7f1..2575732 100644
> --- a/drivers/net/mlx5/mlx5.h
> +++ b/drivers/net/mlx5/mlx5.h
> @@ -275,6 +275,14 @@ struct mlx5_ibv_shared {
>       char ibdev_name[IBV_SYSFS_NAME_MAX]; /* IB device name. */
>       char ibdev_path[IBV_SYSFS_PATH_MAX]; /* IB device path for secondary */
>       struct ibv_device_attr_ex device_attr; /* Device properties. */
> +     struct rte_pci_device *pci_dev; /* Backend PCI device. */
> +     struct {
> +             uint32_t dev_gen; /* Generation number to flush local caches. */
> +             rte_rwlock_t rwlock; /* MR Lock. */
> +             struct mlx5_mr_btree cache; /* Global MR cache table. */
> +             struct mlx5_mr_list mr_list; /* Registered MR list. */
> +             struct mlx5_mr_list mr_free_list; /* Freed MR list. */
> +     } mr;
>       /* Shared DV/DR flow data section. */
>       pthread_mutex_t dv_mutex; /* DV context mutex. */
>       uint32_t dv_refcnt; /* DV/DR data reference counter. */
> @@ -347,13 +355,6 @@ struct mlx5_priv {
>       struct mlx5_flows ctrl_flows; /* Control flow rules. */
>       LIST_HEAD(counters, mlx5_flow_counter) flow_counters;
>       /* Flow counters. */
> -     struct {
> -             uint32_t dev_gen; /* Generation number to flush local caches. */
> -             rte_rwlock_t rwlock; /* MR Lock. */
> -             struct mlx5_mr_btree cache; /* Global MR cache table. */
> -             struct mlx5_mr_list mr_list; /* Registered MR list. */
> -             struct mlx5_mr_list mr_free_list; /* Freed MR list. */
> -     } mr;
>       LIST_HEAD(rxq, mlx5_rxq_ctrl) rxqsctrl; /* DPDK Rx queues. */
>       LIST_HEAD(rxqibv, mlx5_rxq_ibv) rxqsibv; /* Verbs Rx queues. */
>       LIST_HEAD(hrxq, mlx5_hrxq) hrxqs; /* Verbs Hash Rx queues. */
> diff --git a/drivers/net/mlx5/mlx5_mr.c b/drivers/net/mlx5/mlx5_mr.c
> index e4957a4..a7a63b1 100644
> --- a/drivers/net/mlx5/mlx5_mr.c
> +++ b/drivers/net/mlx5/mlx5_mr.c
> @@ -36,7 +36,7 @@ struct mr_update_mp_data {
> 
> /**
>  * Expand B-tree table to a given size. Can't be called with holding
> - * memory_hotplug_lock or priv->mr.rwlock due to rte_realloc().
> + * memory_hotplug_lock or sh->mr.rwlock due to rte_realloc().
>  *
>  * @param bt
>  *   Pointer to B-tree structure.
> @@ -350,7 +350,7 @@ struct mr_update_mp_data {
>               n = mr_find_next_chunk(mr, &entry, n);
>               if (!entry.end)
>                       break;
> -             if (mr_btree_insert(&priv->mr.cache, &entry) < 0) {
> +             if (mr_btree_insert(&priv->sh->mr.cache, &entry) < 0) {
>                       /*
>                        * Overflowed, but the global table cannot be expanded
>                        * because of deadlock.
> @@ -382,7 +382,7 @@ struct mr_update_mp_data {
>       struct mlx5_mr *mr;
> 
>       /* Iterate all the existing MRs. */
> -     LIST_FOREACH(mr, &priv->mr.mr_list, mr) {
> +     LIST_FOREACH(mr, &priv->sh->mr.mr_list, mr) {
>               unsigned int n;
> 
>               if (mr->ms_n == 0)
> @@ -420,6 +420,7 @@ struct mr_update_mp_data {
>             uintptr_t addr)
> {
>       struct mlx5_priv *priv = dev->data->dev_private;
> +     struct mlx5_ibv_shared *sh = priv->sh;
>       uint16_t idx;
>       uint32_t lkey = UINT32_MAX;
>       struct mlx5_mr *mr;
> @@ -430,10 +431,10 @@ struct mr_update_mp_data {
>        * has to be searched by traversing the original MR list instead, which
>        * is very slow path. Otherwise, the global cache is all inclusive.
>        */
> -     if (!unlikely(priv->mr.cache.overflow)) {
> -             lkey = mr_btree_lookup(&priv->mr.cache, &idx, addr);
> +     if (!unlikely(sh->mr.cache.overflow)) {
> +             lkey = mr_btree_lookup(&sh->mr.cache, &idx, addr);
>               if (lkey != UINT32_MAX)
> -                     *entry = (*priv->mr.cache.table)[idx];
> +                     *entry = (*sh->mr.cache.table)[idx];
>       } else {
>               /* Falling back to the slowest path. */
>               mr = mr_lookup_dev_list(dev, entry, addr);
> @@ -468,13 +469,12 @@ struct mr_update_mp_data {
> /**
>  * Release resources of detached MR having no online entry.
>  *
> - * @param dev
> - *   Pointer to Ethernet device.
> + * @param sh
> + *   Pointer to Ethernet device shared context.
>  */
> static void
> -mlx5_mr_garbage_collect(struct rte_eth_dev *dev)
> +mlx5_mr_garbage_collect(struct mlx5_ibv_shared *sh)
> {
> -     struct mlx5_priv *priv = dev->data->dev_private;
>       struct mlx5_mr *mr_next;
>       struct mlx5_mr_list free_list = LIST_HEAD_INITIALIZER(free_list);
> 
> @@ -484,11 +484,11 @@ struct mr_update_mp_data {
>        * MR can't be freed with holding the lock because rte_free() could call
>        * memory free callback function. This will be a deadlock situation.
>        */
> -     rte_rwlock_write_lock(&priv->mr.rwlock);
> +     rte_rwlock_write_lock(&sh->mr.rwlock);
>       /* Detach the whole free list and release it after unlocking. */
> -     free_list = priv->mr.mr_free_list;
> -     LIST_INIT(&priv->mr.mr_free_list);
> -     rte_rwlock_write_unlock(&priv->mr.rwlock);
> +     free_list = sh->mr.mr_free_list;
> +     LIST_INIT(&sh->mr.mr_free_list);
> +     rte_rwlock_write_unlock(&sh->mr.rwlock);
>       /* Release resources. */
>       mr_next = LIST_FIRST(&free_list);
>       while (mr_next != NULL) {
> @@ -548,12 +548,12 @@ struct mr_update_mp_data {
>                     dev->data->port_id, (void *)addr);
>               return UINT32_MAX;
>       }
> -     rte_rwlock_read_lock(&priv->mr.rwlock);
> +     rte_rwlock_read_lock(&priv->sh->mr.rwlock);
>       /* Fill in output data. */
>       mr_lookup_dev(dev, entry, addr);
>       /* Lookup can't fail. */
>       assert(entry->lkey != UINT32_MAX);
> -     rte_rwlock_read_unlock(&priv->mr.rwlock);
> +     rte_rwlock_read_unlock(&priv->sh->mr.rwlock);
>       DEBUG("port %u MR CREATED by primary process for %p:\n"
>             "  [0x%" PRIxPTR ", 0x%" PRIxPTR "), lkey=0x%x",
>             dev->data->port_id, (void *)addr,
> @@ -582,6 +582,7 @@ struct mr_update_mp_data {
>                      uintptr_t addr)
> {
>       struct mlx5_priv *priv = dev->data->dev_private;
> +     struct mlx5_ibv_shared *sh = priv->sh;
>       struct mlx5_dev_config *config = &priv->config;
>       struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
>       const struct rte_memseg_list *msl;
> @@ -602,12 +603,12 @@ struct mr_update_mp_data {
>               dev->data->port_id, (void *)addr);
>       /*
>        * Release detached MRs if any. This can't be called with holding either
> -      * memory_hotplug_lock or priv->mr.rwlock. MRs on the free list have
> +      * memory_hotplug_lock or sh->mr.rwlock. MRs on the free list have
>        * been detached by the memory free event but it couldn't be released
>        * inside the callback due to deadlock. As a result, releasing resources
>        * is quite opportunistic.
>        */
> -     mlx5_mr_garbage_collect(dev);
> +     mlx5_mr_garbage_collect(sh);
>       /*
>        * If enabled, find out a contiguous virtual address chunk in use, to
>        * which the given address belongs, in order to register maximum range.
> @@ -710,7 +711,7 @@ struct mr_update_mp_data {
>               goto alloc_resources;
>       }
>       assert(data.msl == data_re.msl);
> -     rte_rwlock_write_lock(&priv->mr.rwlock);
> +     rte_rwlock_write_lock(&sh->mr.rwlock);
>       /*
>        * Check the address is really missing. If other thread already created
>        * one or it is not found due to overflow, abort and return.
> @@ -721,10 +722,10 @@ struct mr_update_mp_data {
>                * low-on-memory. Then, this entry will have to be searched
>                * here again.
>                */
> -             mr_btree_insert(&priv->mr.cache, entry);
> +             mr_btree_insert(&sh->mr.cache, entry);
>               DEBUG("port %u found MR for %p on final lookup, abort",
>                     dev->data->port_id, (void *)addr);
> -             rte_rwlock_write_unlock(&priv->mr.rwlock);
> +             rte_rwlock_write_unlock(&sh->mr.rwlock);
>               rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
>               /*
>                * Must be unlocked before calling rte_free() because
> @@ -769,7 +770,7 @@ struct mr_update_mp_data {
>        * mlx5_alloc_buf_extern() which eventually calls rte_malloc_socket()
>        * through mlx5_alloc_verbs_buf().
>        */
> -     mr->ibv_mr = mlx5_glue->reg_mr(priv->sh->pd, (void *)data.start, len,
> +     mr->ibv_mr = mlx5_glue->reg_mr(sh->pd, (void *)data.start, len,
>                                      IBV_ACCESS_LOCAL_WRITE);
>       if (mr->ibv_mr == NULL) {
>               DEBUG("port %u fail to create a verbs MR for address (%p)",
> @@ -779,7 +780,7 @@ struct mr_update_mp_data {
>       }
>       assert((uintptr_t)mr->ibv_mr->addr == data.start);
>       assert(mr->ibv_mr->length == len);
> -     LIST_INSERT_HEAD(&priv->mr.mr_list, mr, mr);
> +     LIST_INSERT_HEAD(&sh->mr.mr_list, mr, mr);
>       DEBUG("port %u MR CREATED (%p) for %p:\n"
>             "  [0x%" PRIxPTR ", 0x%" PRIxPTR "),"
>             " lkey=0x%x base_idx=%u ms_n=%u, ms_bmp_n=%u",
> @@ -792,11 +793,11 @@ struct mr_update_mp_data {
>       mr_lookup_dev(dev, entry, addr);
>       /* Lookup can't fail. */
>       assert(entry->lkey != UINT32_MAX);
> -     rte_rwlock_write_unlock(&priv->mr.rwlock);
> +     rte_rwlock_write_unlock(&sh->mr.rwlock);
>       rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
>       return entry->lkey;
> err_mrlock:
> -     rte_rwlock_write_unlock(&priv->mr.rwlock);
> +     rte_rwlock_write_unlock(&sh->mr.rwlock);
> err_memlock:
>       rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
> err_nolock:
> @@ -854,14 +855,15 @@ struct mr_update_mp_data {
> mr_rebuild_dev_cache(struct rte_eth_dev *dev)
> {
>       struct mlx5_priv *priv = dev->data->dev_private;
> +     struct mlx5_ibv_shared *sh = priv->sh;
>       struct mlx5_mr *mr;
> 
>       DRV_LOG(DEBUG, "port %u rebuild dev cache[]", dev->data->port_id);
>       /* Flush cache to rebuild. */
> -     priv->mr.cache.len = 1;
> -     priv->mr.cache.overflow = 0;
> +     sh->mr.cache.len = 1;
> +     sh->mr.cache.overflow = 0;
>       /* Iterate all the existing MRs. */
> -     LIST_FOREACH(mr, &priv->mr.mr_list, mr)
> +     LIST_FOREACH(mr, &sh->mr.mr_list, mr)
>               if (mr_insert_dev_cache(dev, mr) < 0)
>                       return;
> }
> @@ -888,6 +890,7 @@ struct mr_update_mp_data {
> mlx5_mr_mem_event_free_cb(struct rte_eth_dev *dev, const void *addr, size_t 
> len)
> {
>       struct mlx5_priv *priv = dev->data->dev_private;
> +     struct mlx5_ibv_shared *sh = priv->sh;
>       const struct rte_memseg_list *msl;
>       struct mlx5_mr *mr;
>       int ms_n;
> @@ -901,7 +904,7 @@ struct mr_update_mp_data {
>       assert((uintptr_t)addr == RTE_ALIGN((uintptr_t)addr, msl->page_sz));
>       assert(len == RTE_ALIGN(len, msl->page_sz));
>       ms_n = len / msl->page_sz;
> -     rte_rwlock_write_lock(&priv->mr.rwlock);
> +     rte_rwlock_write_lock(&sh->mr.rwlock);
>       /* Clear bits of freed memsegs from MR. */
>       for (i = 0; i < ms_n; ++i) {
>               const struct rte_memseg *ms;
> @@ -928,7 +931,7 @@ struct mr_update_mp_data {
>               rte_bitmap_clear(mr->ms_bmp, pos);
>               if (--mr->ms_n == 0) {
>                       LIST_REMOVE(mr, mr);
> -                     LIST_INSERT_HEAD(&priv->mr.mr_free_list, mr, mr);
> +                     LIST_INSERT_HEAD(&sh->mr.mr_free_list, mr, mr);
>                       DEBUG("port %u remove MR(%p) from list",
>                             dev->data->port_id, (void *)mr);
>               }
> @@ -949,12 +952,12 @@ struct mr_update_mp_data {
>                * generation below) will be guaranteed to be seen by other core
>                * before the core sees the newly allocated memory.
>                */
> -             ++priv->mr.dev_gen;
> +             ++sh->mr.dev_gen;
>               DEBUG("broadcasting local cache flush, gen=%d",
> -                   priv->mr.dev_gen);
> +                   sh->mr.dev_gen);
>               rte_smp_wmb();
>       }
> -     rte_rwlock_write_unlock(&priv->mr.rwlock);
> +     rte_rwlock_write_unlock(&sh->mr.rwlock);
> }
> 
> /**
> @@ -1013,6 +1016,7 @@ struct mr_update_mp_data {
>                  struct mlx5_mr_cache *entry, uintptr_t addr)
> {
>       struct mlx5_priv *priv = dev->data->dev_private;
> +     struct mlx5_ibv_shared *sh = priv->sh;
>       struct mlx5_mr_btree *bt = &mr_ctrl->cache_bh;
>       uint16_t idx;
>       uint32_t lkey;
> @@ -1021,12 +1025,12 @@ struct mr_update_mp_data {
>       if (unlikely(bt->len == bt->size))
>               mr_btree_expand(bt, bt->size << 1);
>       /* Look up in the global cache. */
> -     rte_rwlock_read_lock(&priv->mr.rwlock);
> -     lkey = mr_btree_lookup(&priv->mr.cache, &idx, addr);
> +     rte_rwlock_read_lock(&sh->mr.rwlock);
> +     lkey = mr_btree_lookup(&sh->mr.cache, &idx, addr);
>       if (lkey != UINT32_MAX) {
>               /* Found. */
> -             *entry = (*priv->mr.cache.table)[idx];
> -             rte_rwlock_read_unlock(&priv->mr.rwlock);
> +             *entry = (*sh->mr.cache.table)[idx];
> +             rte_rwlock_read_unlock(&sh->mr.rwlock);
>               /*
>                * Update local cache. Even if it fails, return the found entry
>                * to update top-half cache. Next time, this entry will be found
> @@ -1035,7 +1039,7 @@ struct mr_update_mp_data {
>               mr_btree_insert(bt, entry);
>               return lkey;
>       }
> -     rte_rwlock_read_unlock(&priv->mr.rwlock);
> +     rte_rwlock_read_unlock(&sh->mr.rwlock);
>       /* First time to see the address? Create a new MR. */
>       lkey = mlx5_mr_create(dev, entry, addr);
>       /*
> @@ -1261,6 +1265,7 @@ struct mr_update_mp_data {
>       struct mr_update_mp_data *data = opaque;
>       struct rte_eth_dev *dev = data->dev;
>       struct mlx5_priv *priv = dev->data->dev_private;
> +     struct mlx5_ibv_shared *sh = priv->sh;
>       struct mlx5_mr_ctrl *mr_ctrl = data->mr_ctrl;
>       struct mlx5_mr *mr = NULL;
>       uintptr_t addr = (uintptr_t)memhdr->addr;
> @@ -1270,9 +1275,9 @@ struct mr_update_mp_data {
> 
>       assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
>       /* If already registered, it should return. */
> -     rte_rwlock_read_lock(&priv->mr.rwlock);
> +     rte_rwlock_read_lock(&sh->mr.rwlock);
>       lkey = mr_lookup_dev(dev, &entry, addr);
> -     rte_rwlock_read_unlock(&priv->mr.rwlock);
> +     rte_rwlock_read_unlock(&sh->mr.rwlock);
>       if (lkey != UINT32_MAX)
>               return;
>       DRV_LOG(DEBUG, "port %u register MR for chunk #%d of mempool (%s)",
> @@ -1286,11 +1291,11 @@ struct mr_update_mp_data {
>               data->ret = -1;
>               return;
>       }
> -     rte_rwlock_write_lock(&priv->mr.rwlock);
> -     LIST_INSERT_HEAD(&priv->mr.mr_list, mr, mr);
> +     rte_rwlock_write_lock(&sh->mr.rwlock);
> +     LIST_INSERT_HEAD(&sh->mr.mr_list, mr, mr);
>       /* Insert to the global cache table. */
>       mr_insert_dev_cache(dev, mr);
> -     rte_rwlock_write_unlock(&priv->mr.rwlock);
> +     rte_rwlock_write_unlock(&sh->mr.rwlock);
>       /* Insert to the local cache table */
>       mlx5_mr_addr2mr_bh(dev, mr_ctrl, addr);
> }
> @@ -1339,6 +1344,7 @@ struct mr_update_mp_data {
>       struct rte_eth_dev *dev;
>       struct mlx5_mr *mr;
>       struct mlx5_priv *priv;
> +     struct mlx5_ibv_shared *sh;
> 
>       dev = pci_dev_to_eth_dev(pdev);
>       if (!dev) {
> @@ -1355,11 +1361,12 @@ struct mr_update_mp_data {
>               rte_errno = EINVAL;
>               return -1;
>       }
> -     rte_rwlock_write_lock(&priv->mr.rwlock);
> -     LIST_INSERT_HEAD(&priv->mr.mr_list, mr, mr);
> +     sh = priv->sh;
> +     rte_rwlock_write_lock(&sh->mr.rwlock);
> +     LIST_INSERT_HEAD(&sh->mr.mr_list, mr, mr);
>       /* Insert to the global cache table. */
>       mr_insert_dev_cache(dev, mr);
> -     rte_rwlock_write_unlock(&priv->mr.rwlock);
> +     rte_rwlock_write_unlock(&sh->mr.rwlock);
>       return 0;
> }
> 
> @@ -1384,6 +1391,7 @@ struct mr_update_mp_data {
> {
>       struct rte_eth_dev *dev;
>       struct mlx5_priv *priv;
> +     struct mlx5_ibv_shared *sh;
>       struct mlx5_mr *mr;
>       struct mlx5_mr_cache entry;
> 
> @@ -1395,10 +1403,11 @@ struct mr_update_mp_data {
>               return -1;
>       }
>       priv = dev->data->dev_private;
> -     rte_rwlock_read_lock(&priv->mr.rwlock);
> +     sh = priv->sh;
> +     rte_rwlock_read_lock(&sh->mr.rwlock);
>       mr = mr_lookup_dev_list(dev, &entry, (uintptr_t)addr);
>       if (!mr) {
> -             rte_rwlock_read_unlock(&priv->mr.rwlock);
> +             rte_rwlock_read_unlock(&sh->mr.rwlock);
>               DRV_LOG(WARNING, "address 0x%" PRIxPTR " wasn't registered "
>                                "to PCI device %p", (uintptr_t)addr,
>                                (void *)pdev);
> @@ -1406,7 +1415,7 @@ struct mr_update_mp_data {
>               return -1;
>       }
>       LIST_REMOVE(mr, mr);
> -     LIST_INSERT_HEAD(&priv->mr.mr_free_list, mr, mr);
> +     LIST_INSERT_HEAD(&sh->mr.mr_free_list, mr, mr);
>       DEBUG("port %u remove MR(%p) from list", dev->data->port_id,
>             (void *)mr);
>       mr_rebuild_dev_cache(dev);
> @@ -1419,11 +1428,10 @@ struct mr_update_mp_data {
>        * generation below) will be guaranteed to be seen by other core
>        * before the core sees the newly allocated memory.
>        */
> -     ++priv->mr.dev_gen;
> -     DEBUG("broadcasting local cache flush, gen=%d",
> -                     priv->mr.dev_gen);
> +     ++sh->mr.dev_gen;
> +     DEBUG("broadcasting local cache flush, gen=%d", sh->mr.dev_gen);
>       rte_smp_wmb();
> -     rte_rwlock_read_unlock(&priv->mr.rwlock);
> +     rte_rwlock_read_unlock(&sh->mr.rwlock);
>       return 0;
> }
> 
> @@ -1544,25 +1552,24 @@ struct mr_update_mp_data {
> /**
>  * Dump all the created MRs and the global cache entries.
>  *
> - * @param dev
> - *   Pointer to Ethernet device.
> + * @param sh
> + *   Pointer to Ethernet device shared context.
>  */
> void
> -mlx5_mr_dump_dev(struct rte_eth_dev *dev __rte_unused)
> +mlx5_mr_dump_dev(struct mlx5_ibv_shared *sh __rte_unused)
> {
> #ifndef NDEBUG
> -     struct mlx5_priv *priv = dev->data->dev_private;
>       struct mlx5_mr *mr;
>       int mr_n = 0;
>       int chunk_n = 0;
> 
> -     rte_rwlock_read_lock(&priv->mr.rwlock);
> +     rte_rwlock_read_lock(&sh->mr.rwlock);
>       /* Iterate all the existing MRs. */
> -     LIST_FOREACH(mr, &priv->mr.mr_list, mr) {
> +     LIST_FOREACH(mr, &sh->mr.mr_list, mr) {
>               unsigned int n;
> 
> -             DEBUG("port %u MR[%u], LKey = 0x%x, ms_n = %u, ms_bmp_n = %u",
> -                   dev->data->port_id, mr_n++,
> +             DEBUG("device %s MR[%u], LKey = 0x%x, ms_n = %u, ms_bmp_n = %u",
> +                   sh->ibdev_name, mr_n++,
>                     rte_cpu_to_be_32(mr->ibv_mr->lkey),
>                     mr->ms_n, mr->ms_bmp_n);
>               if (mr->ms_n == 0)
> @@ -1577,45 +1584,40 @@ struct mr_update_mp_data {
>                             chunk_n++, ret.start, ret.end);
>               }
>       }
> -     DEBUG("port %u dumping global cache", dev->data->port_id);
> -     mlx5_mr_btree_dump(&priv->mr.cache);
> -     rte_rwlock_read_unlock(&priv->mr.rwlock);
> +     DEBUG("device %s dumping global cache", sh->ibdev_name);
> +     mlx5_mr_btree_dump(&sh->mr.cache);
> +     rte_rwlock_read_unlock(&sh->mr.rwlock);
> #endif
> }
> 
> /**
> - * Release all the created MRs and resources. Remove device from memory 
> callback
> + * Release all the created MRs and resources for shared device context.
>  * list.
>  *
> - * @param dev
> - *   Pointer to Ethernet device.
> + * @param sh
> + *   Pointer to Ethernet device shared context.
>  */
> void
> -mlx5_mr_release(struct rte_eth_dev *dev)
> +mlx5_mr_release(struct mlx5_ibv_shared *sh)
> {
> -     struct mlx5_priv *priv = dev->data->dev_private;
>       struct mlx5_mr *mr_next;
> 
> -     /* Remove from memory callback device list. */
> -     rte_rwlock_write_lock(&mlx5_shared_data->mem_event_rwlock);
> -     LIST_REMOVE(priv, mem_event_cb);
> -     rte_rwlock_write_unlock(&mlx5_shared_data->mem_event_rwlock);
>       if (rte_log_get_level(mlx5_logtype) == RTE_LOG_DEBUG)
> -             mlx5_mr_dump_dev(dev);
> -     rte_rwlock_write_lock(&priv->mr.rwlock);
> +             mlx5_mr_dump_dev(sh);
> +     rte_rwlock_write_lock(&sh->mr.rwlock);
>       /* Detach from MR list and move to free list. */
> -     mr_next = LIST_FIRST(&priv->mr.mr_list);
> +     mr_next = LIST_FIRST(&sh->mr.mr_list);
>       while (mr_next != NULL) {
>               struct mlx5_mr *mr = mr_next;
> 
>               mr_next = LIST_NEXT(mr, mr);
>               LIST_REMOVE(mr, mr);
> -             LIST_INSERT_HEAD(&priv->mr.mr_free_list, mr, mr);
> +             LIST_INSERT_HEAD(&sh->mr.mr_free_list, mr, mr);
>       }
> -     LIST_INIT(&priv->mr.mr_list);
> +     LIST_INIT(&sh->mr.mr_list);
>       /* Free global cache. */
> -     mlx5_mr_btree_free(&priv->mr.cache);
> -     rte_rwlock_write_unlock(&priv->mr.rwlock);
> +     mlx5_mr_btree_free(&sh->mr.cache);
> +     rte_rwlock_write_unlock(&sh->mr.rwlock);
>       /* Free all remaining MRs. */
> -     mlx5_mr_garbage_collect(dev);
> +     mlx5_mr_garbage_collect(sh);
> }
> diff --git a/drivers/net/mlx5/mlx5_mr.h b/drivers/net/mlx5/mlx5_mr.h
> index 786f6a3..89e89b7 100644
> --- a/drivers/net/mlx5/mlx5_mr.h
> +++ b/drivers/net/mlx5/mlx5_mr.h
> @@ -62,6 +62,7 @@ struct mlx5_mr_ctrl {
>       struct mlx5_mr_btree cache_bh; /* Cache for bottom-half. */
> } __rte_packed;
> 
> +struct mlx5_ibv_shared;
> extern struct mlx5_dev_list  mlx5_mem_event_cb_list;
> extern rte_rwlock_t mlx5_mem_event_rwlock;
> 
> @@ -76,11 +77,11 @@ void mlx5_mr_mem_event_cb(enum rte_mem_event event_type, 
> const void *addr,
>                         size_t len, void *arg);
> int mlx5_mr_update_mp(struct rte_eth_dev *dev, struct mlx5_mr_ctrl *mr_ctrl,
>                     struct rte_mempool *mp);
> -void mlx5_mr_release(struct rte_eth_dev *dev);
> +void mlx5_mr_release(struct mlx5_ibv_shared *sh);
> 
> /* Debug purpose functions. */
> void mlx5_mr_btree_dump(struct mlx5_mr_btree *bt);
> -void mlx5_mr_dump_dev(struct rte_eth_dev *dev);
> +void mlx5_mr_dump_dev(struct mlx5_ibv_shared *sh);
> 
> /**
>  * Look up LKey from given lookup table by linear search. Firstly look up the
> diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
> index dbe074f..4d55fd4 100644
> --- a/drivers/net/mlx5/mlx5_txq.c
> +++ b/drivers/net/mlx5/mlx5_txq.c
> @@ -814,7 +814,7 @@ struct mlx5_txq_ctrl *
>               goto error;
>       }
>       /* Save pointer of global generation number to check memory event. */
> -     tmpl->txq.mr_ctrl.dev_gen_ptr = &priv->mr.dev_gen;
> +     tmpl->txq.mr_ctrl.dev_gen_ptr = &priv->sh->mr.dev_gen;
>       assert(desc > MLX5_TX_COMP_THRESH);
>       tmpl->txq.offloads = conf->offloads |
>                            dev->data->dev_conf.txmode.offloads;
> -- 
> 1.8.3.1
> 

Reply via email to