Hi Koh,

See my comments below, same comments apply for mlx4 patch. 

Tuesday, April 2, 2019 12:22 AM, Yongseok Koh:
> Subject: [dpdk-dev] [PATCH v2 2/3] net/mlx5: remove device register remap
> 
> UAR (User Access Region) registers will be stored in a process-local table and
> a process accesses a register in a table entry with index. Alloc/free of table
> entry is managed by a global bitmap.
> 
> When there's a need to store a UAR register such as Tx BlueFlame register
> for doorbell, an index should be allocated by mlx5_uar_alloc_index() and
> address of the allocated table entry must be acquired by
> mlx5_uar_get_addr_ptr() so that the table can be expanded if overflowed.
> The local UAR register table doesn't cover all the indexes in the bitmap.
> This will be expanded if more indexes are allocated than the current size of
> the table.
> 
> For example, the BlueFlame register for Tx doorbell has to be remapped on
> each secondary process. On initialization, primary process allocates an index
> for the UAR register table and stores the register address in the indexed
> entry of its own table when configuring a Tx queue. The index is stored in the
> shared memory(txq->bfreg_idx) and visiable to secondary processes. As
> secondary processes know the index, each process stores remapped register
> in the same indexed entry of its local UAR register table.
> 
> On the datapath of each process, the register can be referenced simply by
> MLX5_UAR_REG(idx) which accesses its local UAR register table by the index.
> 
> Signed-off-by: Yongseok Koh <ys...@mellanox.com>
> ---
>  drivers/net/mlx5/mlx5.c         | 262 +++++++++++++++++++++++++++--------
> -----
>  drivers/net/mlx5/mlx5.h         |  19 ++-
>  drivers/net/mlx5/mlx5_rxtx.h    |   8 +-
>  drivers/net/mlx5/mlx5_trigger.c |   2 +-
>  drivers/net/mlx5/mlx5_txq.c     |  96 +++++++--------
>  5 files changed, 242 insertions(+), 145 deletions(-)
> 
> diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index
> 40445056f5..103841b2bc 100644
> --- a/drivers/net/mlx5/mlx5.c
> +++ b/drivers/net/mlx5/mlx5.c
> @@ -25,6 +25,7 @@
>  #pragma GCC diagnostic error "-Wpedantic"
>  #endif
> 
> +#include <rte_bitmap.h>
>  #include <rte_malloc.h>
>  #include <rte_ethdev_driver.h>
>  #include <rte_ethdev_pci.h>
> @@ -131,7 +132,7 @@ struct mlx5_shared_data *mlx5_shared_data;  static
> rte_spinlock_t mlx5_shared_data_lock = RTE_SPINLOCK_INITIALIZER;
> 
>  /* Process local data for secondary processes. */ -static struct
> mlx5_local_data mlx5_local_data;
> +struct mlx5_local_data mlx5_local_data;
> 
>  /** Driver-specific log messages type. */  int mlx5_logtype; @@ -810,130
> +811,225 @@ mlx5_args(struct mlx5_dev_config *config, struct rte_devargs
> *devargs)
> 
>  static struct rte_pci_driver mlx5_driver;
> 
> +
> +/**
> + * Expand the local UAR register table.
> + *
> + * @param size
> + *   Size of the table to be expanded
> + *
> + * @return
> + *   0 on success, a negative errno value otherwise and rte_errno is set.
> + */
>  static int
> -find_lower_va_bound(const struct rte_memseg_list *msl,
> -             const struct rte_memseg *ms, void *arg)
> +uar_expand_table(uint32_t size)

This function needs to be protected w/ mutex. Since it can be called by 
multiple control threads on different eth devices. 

>  {
> -     void **addr = arg;
> +     struct mlx5_local_data *ld = &mlx5_local_data;
> +     void *mem;
> +     size_t tbl_sz = ld->uar_table_sz;
> 
> -     if (msl->external)
> +     if (size <= tbl_sz)
>               return 0;
> -     if (*addr == NULL)
> -             *addr = ms->addr;
> -     else
> -             *addr = RTE_MIN(*addr, ms->addr);
> -
> +     tbl_sz = RTE_ALIGN_CEIL(size, RTE_BITMAP_SLAB_BIT_SIZE);
> +     mem = rte_realloc(ld->uar_table, tbl_sz * sizeof(void *),
> +                       RTE_CACHE_LINE_SIZE);
> +     if (!mem) {
> +             rte_errno = ENOMEM;
> +             DRV_LOG(ERR, "failed to expand uar table");
> +             return -rte_errno;
> +     }
> +     DRV_LOG(DEBUG, "UAR reg. table is expanded to %zu", tbl_sz);
> +     ld->uar_table = mem;
> +     ld->uar_table_sz = tbl_sz;
>       return 0;
>  }
> 
>  /**
> - * Reserve UAR address space for primary process.
> + * Return the pointer of the indexed slot in the local UAR register table.
>   *
> - * Process local resource is used by both primary and secondary to avoid
> - * duplicate reservation. The space has to be available on both primary and
> - * secondary process, TXQ UAR maps to this area using fixed mmap w/o
> double
> - * check.
> + * The indexed slot must be allocated by mlx5_uar_alloc_index() in
> + advance. And
> + * the table will be expanded if overflowed.
> + *
> + * @param idx
> + *   Index of the table.
>   *
>   * @return
> - *   0 on success, a negative errno value otherwise and rte_errno is set.
> + *   Pointer of table entry on success, NULL otherwise and rte_errno is set.
>   */
> -static int
> -mlx5_uar_init_primary(void)
> +void **
> +mlx5_uar_get_addr_ptr(uint32_t idx)

Wondering if we can possibly have coherency issue here.
Suppose we have 2 eth devices. One is doing datapath, one is at configuration 
stage. 
The one on configuration stage may trigger the expand
The one on datapath may read from the UAR table.

> +{
> +     struct mlx5_local_data *ld = &mlx5_local_data;
> +     int ret;
> +
> +     assert(idx < MLX5_UAR_TABLE_SIZE_MAX);
> +     if (idx >= ld->uar_table_sz) {
> +             ret = uar_expand_table(idx + 1);
> +             if (ret)
> +                     return NULL;
> +     }
> +     return &(*ld->uar_table)[idx];
> +}
> +
> +/**
> + * Allocate a slot of UAR register table.
> + *
> + * Allocation is done by scanning the global bitmap. The global
> +spinlock should
> + * be held.
> + *
> + * @return
> + *   Index of a free slot on success, a negative errno value otherwise and
> + *   rte_errno is set.
> + */
> +uint32_t
> +mlx5_uar_alloc_index(void)
>  {
>       struct mlx5_shared_data *sd = mlx5_shared_data;
> -     void *addr = (void *)0;
> +     uint32_t idx = 0;
> +     uint64_t slab = 0;
> +     int ret;
> 
> -     if (sd->uar_base)
> -             return 0;
> -     /* find out lower bound of hugepage segments */
> -     rte_memseg_walk(find_lower_va_bound, &addr);
> -     /* keep distance to hugepages to minimize potential conflicts. */
> -     addr = RTE_PTR_SUB(addr, (uintptr_t)(MLX5_UAR_OFFSET +
> MLX5_UAR_SIZE));
> -     /* anonymous mmap, no real memory consumption. */
> -     addr = mmap(addr, MLX5_UAR_SIZE,
> -                 PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
> -     if (addr == MAP_FAILED) {
> -             DRV_LOG(ERR,
> -                     "Failed to reserve UAR address space, please"
> -                     " adjust MLX5_UAR_SIZE or try --base-virtaddr");
> -             rte_errno = ENOMEM;
> +     assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
> +     rte_spinlock_lock(&sd->lock);
> +     __rte_bitmap_scan_init(sd->uar_bmp);
> +     ret = rte_bitmap_scan(sd->uar_bmp, &idx, &slab);
> +     if (unlikely(!ret)) {
> +             /*
> +              * This cannot happen unless there are unreasonably large
> number
> +              * of queues and ports.
> +              */
> +             rte_errno = ENOSPC;
> +             rte_spinlock_unlock(&sd->lock);
>               return -rte_errno;
>       }
> -     /* Accept either same addr or a new addr returned from mmap if
> target
> -      * range occupied.
> -      */
> -     DRV_LOG(INFO, "Reserved UAR address space: %p", addr);
> -     sd->uar_base = addr; /* for primary and secondary UAR re-mmap. */
> -     return 0;
> +     idx += __builtin_ctzll(slab);
> +     /* Mark the slot is occupied. */
> +     rte_bitmap_clear(sd->uar_bmp, idx);
> +     rte_spinlock_unlock(&sd->lock);
> +     DRV_LOG(DEBUG, "index %d is allocated in UAR reg. table", idx);
> +     return idx;
>  }
> 
>  /**
> - * Unmap UAR address space reserved for primary process.
> + * Free a slot of UAR register table.
>   */
> -static void
> -mlx5_uar_uninit_primary(void)
> +void
> +mlx5_uar_free_index(uint32_t idx)
>  {
>       struct mlx5_shared_data *sd = mlx5_shared_data;
> 
> -     if (!sd->uar_base)
> -             return;
> -     munmap(sd->uar_base, MLX5_UAR_SIZE);
> -     sd->uar_base = NULL;
> +     assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
> +     assert(idx < MLX5_UAR_TABLE_SIZE_MAX);
> +     rte_spinlock_lock(&sd->lock);
> +     /* Mark the slot is empty. */
> +     rte_bitmap_set(sd->uar_bmp, idx);
> +     rte_spinlock_unlock(&sd->lock);
> +     DRV_LOG(DEBUG, "index %d is freed in UAR reg. table", idx);
>  }
> 
>  /**
> - * Reserve UAR address space for secondary process, align with primary
> process.
> + * Initialize UAR register table bitmap.
> + *
> + * UAR registers will be stored in a process-local table and the table
> + is
> + * managed by a global bitmap. When there's a need to store a UAR
> + register, an
> + * index should be allocated by mlx5_uar_alloc_index() and address of
> + the
> + * allocated table entry must be acquired by mlx5_uar_get_addr_ptr() so
> + that the
> + * table can be expanded if overflowed.
> + *
> + * The local UAR register table doesn't cover all the indexes in the bitmap.
> + * This will be expanded if more indexes are allocated than the current
> + size of
> + * the table.
> + *
> + * Secondary process should have reference of the index and store
> + remapped
> + * register at the same index in its local UAR register table.
> + *
> + * On the datapath of each process, the register can be referenced
> + simply by
> + * MLX5_UAR_REG(idx).
>   *
>   * @return
>   *   0 on success, a negative errno value otherwise and rte_errno is set.
>   */
>  static int
> -mlx5_uar_init_secondary(void)
> +uar_init_primary(void)
>  {
>       struct mlx5_shared_data *sd = mlx5_shared_data;
> -     struct mlx5_local_data *ld = &mlx5_local_data;
> -     void *addr;
> +     struct rte_bitmap *bmp;
> +     void *bmp_mem;
> +     uint32_t bmp_size;
> +     unsigned int i;
> 
> -     if (ld->uar_base) { /* Already reserved. */
> -             assert(sd->uar_base == ld->uar_base);
> -             return 0;
> -     }
> -     assert(sd->uar_base);
> -     /* anonymous mmap, no real memory consumption. */
> -     addr = mmap(sd->uar_base, MLX5_UAR_SIZE,
> -                 PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
> -     if (addr == MAP_FAILED) {
> -             DRV_LOG(ERR, "UAR mmap failed: %p size: %llu",
> -                     sd->uar_base, MLX5_UAR_SIZE);
> -             rte_errno = ENXIO;
> +     bmp_size =
> rte_bitmap_get_memory_footprint(MLX5_UAR_TABLE_SIZE_MAX);
> +     bmp_mem = rte_zmalloc("uar_table", bmp_size,
> RTE_CACHE_LINE_SIZE);
> +     if (!bmp_mem) {
> +             rte_errno = ENOMEM;
> +             DRV_LOG(ERR, "failed to allocate memory for uar table");
>               return -rte_errno;
>       }
> -     if (sd->uar_base != addr) {
> -             DRV_LOG(ERR,
> -                     "UAR address %p size %llu occupied, please"
> -                     " adjust MLX5_UAR_OFFSET or try EAL parameter"
> -                     " --base-virtaddr",
> -                     sd->uar_base, MLX5_UAR_SIZE);
> -             rte_errno = ENXIO;
> -             return -rte_errno;
> +     bmp = rte_bitmap_init(MLX5_UAR_TABLE_SIZE_MAX, bmp_mem,
> bmp_size);
> +     /* Set the entire bitmap as 1 means vacant and 0 means empty. */
> +     for (i = 0; i < bmp->array2_size; ++i)
> +             rte_bitmap_set_slab(bmp, i * RTE_BITMAP_SLAB_BIT_SIZE, -
> 1);
> +     sd->uar_bmp = bmp;
> +     return 0;
> +}
> +
> +/**
> + * Un-initialize UAR register resources.
> + *
> + * The global bitmap and the register table of primary process are freed.
> + */
> +static void
> +uar_uninit_primary(void)
> +{
> +     struct mlx5_shared_data *sd = mlx5_shared_data;
> +     struct mlx5_local_data *ld = &mlx5_local_data;
> +
> +     if (sd->uar_bmp) {
> +             rte_bitmap_free(sd->uar_bmp);
> +             rte_free(sd->uar_bmp);
> +             sd->uar_bmp = NULL;
> +     }
> +     /* Free primary's table. */
> +     if (ld->uar_table) {
> +             rte_free(ld->uar_table);
> +             ld->uar_table = NULL;
> +             ld->uar_table_sz = 0;
>       }
> -     ld->uar_base = addr;
> -     DRV_LOG(INFO, "Reserved UAR address space: %p", addr);
> +}
> +
> +/**
> + * Initialize UAR register resources for secondary process.
> + *
> + * Allocate the local UAR register table. Initially, the number of
> +entries is
> + * same as the size of a bitmap slab.
> + *
> + * @return
> + *   0 on success, a negative errno value otherwise and rte_errno is set.
> + */
> +static int
> +uar_init_secondary(void)
> +{
> +     /* Prepare at least a bitmap slab. */
> +     uar_expand_table(RTE_BITMAP_SLAB_BIT_SIZE);
>       return 0;
>  }
> 
>  /**
> - * Unmap UAR address space reserved for secondary process.
> + * Un-initialize UAR register resources for secondary process.
> + *
> + * The local UAR register table is freed.
>   */
>  static void
> -mlx5_uar_uninit_secondary(void)
> +uar_uninit_secondary(void)
>  {
>       struct mlx5_local_data *ld = &mlx5_local_data;
> 
> -     if (!ld->uar_base)
> -             return;
> -     munmap(ld->uar_base, MLX5_UAR_SIZE);
> -     ld->uar_base = NULL;
> +     /* Free process-local table. */
> +     if (ld->uar_table) {
> +             rte_free(ld->uar_table);
> +             ld->uar_table = NULL;
> +             ld->uar_table_sz = 0;
> +     }
>  }
> 
>  /**
> @@ -967,7 +1063,7 @@ mlx5_init_once(void)
> 
>       rte_mem_event_callback_register("MLX5_MEM_EVENT_CB",
>                                               mlx5_mr_mem_event_cb,
> NULL);
>               mlx5_mp_init_primary();
> -             ret = mlx5_uar_init_primary();
> +             ret = uar_init_primary();
>               if (ret)
>                       goto error;
>               sd->init_done = true;
> @@ -976,7 +1072,7 @@ mlx5_init_once(void)
>               if (ld->init_done)
>                       break;
>               mlx5_mp_init_secondary();
> -             ret = mlx5_uar_init_secondary();
> +             ret = uar_init_secondary();
>               if (ret)
>                       goto error;
>               ++sd->secondary_cnt;
> @@ -990,12 +1086,12 @@ mlx5_init_once(void)
>  error:
>       switch (rte_eal_process_type()) {
>       case RTE_PROC_PRIMARY:
> -             mlx5_uar_uninit_primary();
> +             uar_uninit_primary();
>               mlx5_mp_uninit_primary();
> 
>       rte_mem_event_callback_unregister("MLX5_MEM_EVENT_CB",
> NULL);
>               break;
>       case RTE_PROC_SECONDARY:
> -             mlx5_uar_uninit_secondary();
> +             uar_uninit_secondary();
>               mlx5_mp_uninit_secondary();
>               break;
>       default:
> @@ -1099,7 +1195,7 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
>               if (err < 0)
>                       return NULL;
>               /* Remap UAR for Tx queues. */
> -             err = mlx5_tx_uar_remap(eth_dev, err);
> +             err = mlx5_txq_uar_init(eth_dev, err);
>               if (err)
>                       return NULL;
>               /*
> diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index
> 8ce8361a85..f77517bee0 100644
> --- a/drivers/net/mlx5/mlx5.h
> +++ b/drivers/net/mlx5/mlx5.h
> @@ -97,8 +97,8 @@ struct mlx5_shared_data {
>       /* Global spinlock for primary and secondary processes. */
>       int init_done; /* Whether primary has done initialization. */
>       unsigned int secondary_cnt; /* Number of secondary processes
> init'd. */
> -     void *uar_base;
> -     /* Reserved UAR address space for TXQ UAR(hw doorbell) mapping.
> */
> +     struct rte_bitmap *uar_bmp;
> +     /* Bitmap to keep track of BlueFlame register table. */
>       struct mlx5_dev_list mem_event_cb_list;
>       rte_rwlock_t mem_event_rwlock;
>  };
> @@ -106,11 +106,19 @@ struct mlx5_shared_data {
>  /* Per-process data structure, not visible to other processes. */  struct
> mlx5_local_data {
>       int init_done; /* Whether a secondary has done initialization. */
> -     void *uar_base;
> -     /* Reserved UAR address space for TXQ UAR(hw doorbell) mapping.
> */
> +     void *(*uar_table)[];
> +     /* Table of BlueFlame registers for each process. */
> +     size_t uar_table_sz;
> +     /* Size of BlueFlame register table. */
>  };
> 
>  extern struct mlx5_shared_data *mlx5_shared_data;
> +extern struct mlx5_local_data mlx5_local_data;
> +
> +/* The maximum size of BlueFlame register table. */ #define
> +MLX5_UAR_TABLE_SIZE_MAX (RTE_MAX_ETHPORTS *
> RTE_MAX_QUEUES_PER_PORT)
> +
> +#define MLX5_UAR_REG(idx) ((*mlx5_local_data.uar_table)[(idx)])

Same concern - what if the uar table is expanded due to other device in 
configuration stage?

> 
>  struct mlx5_counter_ctrl {
>       /* Name of the counter. */
> @@ -331,6 +339,9 @@ struct mlx5_priv {
>  /* mlx5.c */
> 
>  int mlx5_getenv_int(const char *);
> +void **mlx5_uar_get_addr_ptr(uint32_t idx); uint32_t
> +mlx5_uar_alloc_index(void); void mlx5_uar_free_index(uint32_t idx);
> 
>  /* mlx5_ethdev.c */
> 
> diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
> index ced9945888..b32c1d6e0f 100644
> --- a/drivers/net/mlx5/mlx5_rxtx.h
> +++ b/drivers/net/mlx5/mlx5_rxtx.h
> @@ -203,7 +203,7 @@ struct mlx5_txq_data {
>       volatile void *wqes; /* Work queue (use volatile to write into). */
>       volatile uint32_t *qp_db; /* Work queue doorbell. */
>       volatile uint32_t *cq_db; /* Completion queue doorbell. */
> -     volatile void *bf_reg; /* Blueflame register remapped. */
> +     uint32_t bfreg_idx; /* Blueflame register index. */
>       struct rte_mbuf *(*elts)[]; /* TX elements. */
>       struct mlx5_txq_stats stats; /* TX queue counters. */  #ifndef
> RTE_ARCH_64 @@ -232,7 +232,7 @@ struct mlx5_txq_ctrl {
>       struct mlx5_priv *priv; /* Back pointer to private data. */
>       struct mlx5_txq_data txq; /* Data path structure. */
>       off_t uar_mmap_offset; /* UAR mmap offset for non-primary
> process. */
> -     volatile void *bf_reg_orig; /* Blueflame register from verbs. */
> +     void *bf_reg; /* BlueFlame register from Verbs. */
>       uint16_t idx; /* Queue index. */
>  };
> 
> @@ -303,7 +303,7 @@ uint64_t mlx5_get_rx_queue_offloads(struct
> rte_eth_dev *dev);  int mlx5_tx_queue_setup(struct rte_eth_dev *dev,
> uint16_t idx, uint16_t desc,
>                       unsigned int socket, const struct rte_eth_txconf
> *conf);  void mlx5_tx_queue_release(void *dpdk_txq); -int
> mlx5_tx_uar_remap(struct rte_eth_dev *dev, int fd);
> +int mlx5_txq_uar_init(struct rte_eth_dev *dev, int fd);
>  struct mlx5_txq_ibv *mlx5_txq_ibv_new(struct rte_eth_dev *dev, uint16_t
> idx);  struct mlx5_txq_ibv *mlx5_txq_ibv_get(struct rte_eth_dev *dev,
> uint16_t idx);  int mlx5_txq_ibv_release(struct mlx5_txq_ibv *txq_ibv); @@
> -706,7 +706,7 @@ static __rte_always_inline void
> mlx5_tx_dbrec_cond_wmb(struct mlx5_txq_data *txq, volatile struct
> mlx5_wqe *wqe,
>                      int cond)
>  {
> -     uint64_t *dst = (uint64_t *)((uintptr_t)txq->bf_reg);
> +     uint64_t *dst = MLX5_UAR_REG(txq->bfreg_idx);
>       volatile uint64_t *src = ((volatile uint64_t *)wqe);
> 
>       rte_cio_wmb();
> diff --git a/drivers/net/mlx5/mlx5_trigger.c
> b/drivers/net/mlx5/mlx5_trigger.c index 5b73f0ff03..d7f27702e8 100644
> --- a/drivers/net/mlx5/mlx5_trigger.c
> +++ b/drivers/net/mlx5/mlx5_trigger.c
> @@ -58,7 +58,7 @@ mlx5_txq_start(struct rte_eth_dev *dev)
>                       goto error;
>               }
>       }
> -     ret = mlx5_tx_uar_remap(dev, priv->sh->ctx->cmd_fd);
> +     ret = mlx5_txq_uar_init(dev, priv->sh->ctx->cmd_fd);
>       if (ret) {
>               /* Adjust index for rollback. */
>               i = priv->txqs_n - 1;
> diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
> index 1b3d89f2f6..d8e0bda371 100644
> --- a/drivers/net/mlx5/mlx5_txq.c
> +++ b/drivers/net/mlx5/mlx5_txq.c
> @@ -231,9 +231,13 @@ mlx5_tx_queue_release(void *dpdk_txq)
> 
> 
>  /**
> - * Mmap TX UAR(HW doorbell) pages into reserved UAR address space.
> - * Both primary and secondary process do mmap to make UAR address
> - * aligned.
> + * Initialize UAR register access for Tx.
> + *
> + * For both primary and secondary, initialize UAR locks for atomic access.
> + *
> + * For secondary, remap BlueFlame registers for secondary process.
> + Remapped
> + * address is stored at the same indexed entry of the local UAR
> + register table
> + * as primary process.
>   *
>   * @param[in] dev
>   *   Pointer to Ethernet device.
> @@ -244,75 +248,52 @@ mlx5_tx_queue_release(void *dpdk_txq)
>   *   0 on success, a negative errno value otherwise and rte_errno is set.
>   */
>  int
> -mlx5_tx_uar_remap(struct rte_eth_dev *dev, int fd)
> +mlx5_txq_uar_init(struct rte_eth_dev *dev, int fd)
>  {
>       struct mlx5_priv *priv = dev->data->dev_private;
> -     unsigned int i, j;
> -     uintptr_t pages[priv->txqs_n];
> -     unsigned int pages_n = 0;
> -     uintptr_t uar_va;
> -     uintptr_t off;
> -     void *addr;
> -     void *ret;
>       struct mlx5_txq_data *txq;
>       struct mlx5_txq_ctrl *txq_ctrl;
> -     int already_mapped;
> +     void *addr;
> +     void **addr_ptr;
> +     uintptr_t uar_va;
> +     uintptr_t offset;
>       size_t page_size = sysconf(_SC_PAGESIZE);
> +     unsigned int i;
>  #ifndef RTE_ARCH_64
>       unsigned int lock_idx;
>  #endif
> 
> -     memset(pages, 0, priv->txqs_n * sizeof(uintptr_t));
> -     /*
> -      * As rdma-core, UARs are mapped in size of OS page size.
> -      * Use aligned address to avoid duplicate mmap.
> -      * Ref to libmlx5 function: mlx5_init_context()
> -      */
>       for (i = 0; i != priv->txqs_n; ++i) {
>               if (!(*priv->txqs)[i])
>                       continue;
>               txq = (*priv->txqs)[i];
>               txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq);
>               assert(txq_ctrl->idx == (uint16_t)i);
> -             /* UAR addr form verbs used to find dup and offset in page.
> */
> -             uar_va = (uintptr_t)txq_ctrl->bf_reg_orig;
> -             off = uar_va & (page_size - 1); /* offset in page. */
> -             uar_va = RTE_ALIGN_FLOOR(uar_va, page_size); /* page
> addr. */
> -             already_mapped = 0;
> -             for (j = 0; j != pages_n; ++j) {
> -                     if (pages[j] == uar_va) {
> -                             already_mapped = 1;
> -                             break;
> -                     }
> -             }
> -             /* new address in reserved UAR address space. */
> -             addr = RTE_PTR_ADD(mlx5_shared_data->uar_base,
> -                                uar_va & (uintptr_t)(MLX5_UAR_SIZE - 1));
> -             if (!already_mapped) {
> -                     pages[pages_n++] = uar_va;
> -                     /* fixed mmap to specified address in reserved
> -                      * address space.
> +             if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
> +                     /*
> +                      * As rdma-core, UARs are mapped in size of OS page
> +                      * size. Ref to libmlx5 function: mlx5_init_context()
>                        */
> -                     ret = mmap(addr, page_size,
> -                                PROT_WRITE, MAP_FIXED | MAP_SHARED,
> fd,
> -                                txq_ctrl->uar_mmap_offset);
> -                     if (ret != addr) {
> -                             /* fixed mmap have to return same address
> */
> +                     uar_va = (uintptr_t)txq_ctrl->bf_reg;
> +                     offset = uar_va & (page_size - 1); /* Offset in page.
> */
> +                     addr = mmap(NULL, page_size, PROT_WRITE,
> MAP_SHARED, fd,
> +                                 txq_ctrl->uar_mmap_offset);
> +                     if (addr == MAP_FAILED) {
>                               DRV_LOG(ERR,
> -                                     "port %u call to mmap failed on UAR"
> -                                     " for txq %u",
> +                                     "port %u mmap failed for BF reg."
> +                                     " of txq %u",
>                                       dev->data->port_id, txq_ctrl->idx);
>                               rte_errno = ENXIO;
>                               return -rte_errno;
>                       }
> +                     addr = RTE_PTR_ADD(addr, offset);
> +                     addr_ptr = mlx5_uar_get_addr_ptr(txq->bfreg_idx);
> +                     if (!addr_ptr)
> +                             return -rte_errno;
> +                     *addr_ptr = addr;
>               }
> -             if (rte_eal_process_type() == RTE_PROC_PRIMARY) /* save
> once */
> -                     txq_ctrl->txq.bf_reg = RTE_PTR_ADD((void *)addr,
> off);
> -             else
> -                     assert(txq_ctrl->txq.bf_reg ==
> -                            RTE_PTR_ADD((void *)addr, off));
>  #ifndef RTE_ARCH_64
> -             /* Assign a UAR lock according to UAR page number */
> +             /* Assign an UAR lock according to UAR page number */
>               lock_idx = (txq_ctrl->uar_mmap_offset / page_size) &
>                          MLX5_UAR_PAGE_NUM_MASK;
>               txq->uar_lock = &priv->uar_lock[lock_idx]; @@ -372,6 +353,7
> @@ mlx5_txq_ibv_new(struct rte_eth_dev *dev, uint16_t idx)
>       struct mlx5dv_obj obj;
>       const int desc = 1 << txq_data->elts_n;
>       eth_tx_burst_t tx_pkt_burst = mlx5_select_tx_function(dev);
> +     void **addr_ptr;
>       int ret = 0;
> 
>       assert(txq_data);
> @@ -507,7 +489,17 @@ mlx5_txq_ibv_new(struct rte_eth_dev *dev,
> uint16_t idx)
>       txq_data->wqes = qp.sq.buf;
>       txq_data->wqe_n = log2above(qp.sq.wqe_cnt);
>       txq_data->qp_db = &qp.dbrec[MLX5_SND_DBR];
> -     txq_ctrl->bf_reg_orig = qp.bf.reg;
> +     /* Allocate a new index in UAR table. */
> +     ret = mlx5_uar_alloc_index();
> +     if (ret < 0)
> +             goto error;
> +     txq_data->bfreg_idx = ret;
> +     txq_ctrl->bf_reg = qp.bf.reg;
> +     /* Store the BlueFlame register address in the local table. */
> +     addr_ptr = mlx5_uar_get_addr_ptr(txq_data->bfreg_idx);
> +     if (!addr_ptr)
> +             goto error;
> +     *addr_ptr = txq_ctrl->bf_reg;
>       txq_data->cq_db = cq_info.dbrec;
>       txq_data->cqes =
>               (volatile struct mlx5_cqe (*)[])
> @@ -589,6 +581,7 @@ mlx5_txq_ibv_release(struct mlx5_txq_ibv *txq_ibv)
> {
>       assert(txq_ibv);
>       if (rte_atomic32_dec_and_test(&txq_ibv->refcnt)) {
> +             mlx5_uar_free_index(txq_ibv->txq_ctrl->txq.bfreg_idx);
>               claim_zero(mlx5_glue->destroy_qp(txq_ibv->qp));
>               claim_zero(mlx5_glue->destroy_cq(txq_ibv->cq));
>               LIST_REMOVE(txq_ibv, next);
> @@ -837,15 +830,12 @@ mlx5_txq_release(struct rte_eth_dev *dev,
> uint16_t idx)  {
>       struct mlx5_priv *priv = dev->data->dev_private;
>       struct mlx5_txq_ctrl *txq;
> -     size_t page_size = sysconf(_SC_PAGESIZE);
> 
>       if (!(*priv->txqs)[idx])
>               return 0;
>       txq = container_of((*priv->txqs)[idx], struct mlx5_txq_ctrl, txq);
>       if (txq->ibv && !mlx5_txq_ibv_release(txq->ibv))
>               txq->ibv = NULL;
> -     munmap((void *)RTE_ALIGN_FLOOR((uintptr_t)txq->txq.bf_reg,
> page_size),
> -            page_size);
>       if (rte_atomic32_dec_and_test(&txq->refcnt)) {
>               txq_free_elts(txq);
>               mlx5_mr_btree_free(&txq->txq.mr_ctrl.cache_bh);
> --
> 2.11.0

Reply via email to