Thursday, March 21, 2019 10:11 AM, Viacheslav Ovsiienko: > Subject: [dpdk-dev] [PATCH 06/14] net/mlx5: add IB shared context > alloc/free functions > > The functions to allocate and free shared IB context for multiport is added. > The IB device context, Protection Domain, device attributes, Infiniband > names are going to be relocated to the shared structure from the device > private one. mlx5_dev_spawn() is updated to create shared context. > > Signed-off-by: Viacheslav Ovsiienko <viachesl...@mellanox.com>
Patch #2 in the series should be part of this commit, since it doesn't make sense to be a standalone. > --- > drivers/net/mlx5/mlx5.c | 234 > ++++++++++++++++++++++++++++++++++++------------ > 1 file changed, 176 insertions(+), 58 deletions(-) > > diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index > 100e9f4..b3060de 100644 > --- a/drivers/net/mlx5/mlx5.c > +++ b/drivers/net/mlx5/mlx5.c > @@ -140,6 +140,150 @@ struct mlx5_dev_spawn_data { > struct rte_eth_dev *eth_dev; /**< Associated Ethernet device. */ }; > > +static LIST_HEAD(, mlx5_ibv_shared) mlx5_ibv_list = > +LIST_HEAD_INITIALIZER(); > + > +/** > + * Allocate shared IB device context. If there is multiport device the > + * master and representors will share this context, if there is single > + * port dedicated IB device, the context will be used by only given > + * port due to unification. > + * > + * Routine first searches the context for the spesified IB device name, > + * if found the shared context assumed and reference counter is > incremented. > + * If no context found the new one is created and initialized with > +specified > + * IB device context and parameters. > + * > + * @param[in] spawn > + * Pointer to the IB device attributes (name, port, etc). > + * > + * @return > + * Pointer to mlx5_ibv_shared object on success, > + * otherwise NULL and rte_errno is set. > + */ > +static struct mlx5_ibv_shared * > +mlx5_alloc_shared_ibctx(const struct mlx5_dev_spawn_data *spawn) { > + struct mlx5_ibv_shared *sh; > + int err = 0; Since you having a global list you need to make the access thread safe. > + > + assert(spawn); > + /* Search for IB context by device name. */ > + LIST_FOREACH(sh, &mlx5_ibv_list, next) { > + if (!strcmp(sh->ibdev_name, spawn->ibv_dev->name)) { > + assert(!sh->secondary); How do you enforce secondary not to call this function? > + sh->refcnt++; > + return sh; > + } > + } > + /* No device found, we have to create new sharted context. */ > + assert(spawn->max_port); > + sh = rte_zmalloc("ethdev shared ib context", > + sizeof(struct mlx5_ibv_shared) + > + spawn->max_port * > + sizeof(struct mlx5_ibv_shared_port), > + RTE_CACHE_LINE_SIZE); > + if (!sh) { > + DRV_LOG(ERR, "shared context allocation failure"); > + rte_errno = ENOMEM; > + return NULL; > + } > + /* Try to open IB device with DV first, then usual Verbs. */ > + errno = 0; > + sh->ctx = mlx5_glue->dv_open_device(spawn->ibv_dev); This one is only for primary processes right? > + if (sh->ctx) { > + sh->devx = 1; > + DRV_LOG(DEBUG, "DevX is supported"); > + } else { > + sh->ctx = mlx5_glue->open_device(spawn->ibv_dev); > + if (!sh->ctx) { > + err = errno ? errno : ENODEV; > + goto error; > + } > + DRV_LOG(DEBUG, "DevX is NOT supported"); > + } > + err = mlx5_glue->query_device_ex(sh->ctx, NULL, &sh- > >device_attr); > + if (err) { > + DRV_LOG(DEBUG, "ibv_query_device_ex() failed"); > + goto error; > + } > + sh->refcnt = 1; > + sh->max_port = spawn->max_port; > + strncpy(sh->ibdev_name, sh->ctx->device->name, > + sizeof(sh->ibdev_name)); > + strncpy(sh->ibdev_path, sh->ctx->device->ibdev_path, > + sizeof(sh->ibdev_path)); > + if (rte_eal_process_type() == RTE_PROC_SECONDARY) { > + /* > + * For secondary process we just open the IB device > + * and get attributes, there will no be real usage > + * of this structure, the secondary process will > + * use one from prpimary. > + */ > + sh->secondary = 1; Secondary process should not open a new device. it should use the primary device + private structure for everything. In fact, secondary process should not call this function nor reference any shared object. > + return sh; > + } > + sh->pd = mlx5_glue->alloc_pd(sh->ctx); > + if (sh->pd == NULL) { > + DRV_LOG(ERR, "PD allocation failure"); > + err = ENOMEM; > + goto error; > + } > + LIST_INSERT_HEAD(&mlx5_ibv_list, sh, next); > + return sh; > +error: > + assert(sh); > + if (sh->pd) > + claim_zero(mlx5_glue->dealloc_pd(sh->pd)); > + if (sh->ctx) > + claim_zero(mlx5_glue->close_device(sh->ctx)); > + rte_free(sh); > + assert(err > 0); > + rte_errno = err; > + return NULL; > +} > + > +/** > + * Free shared IB device context. Decrement counter and if zero free > + * all allocated resources and close handles. > + * > + * @param[in] sh > + * Pointer to mlx5_ibv_shared object to free > + */ > +static void > +mlx5_free_shared_ibctx(struct mlx5_ibv_shared *sh) { #ifndef NDEBUG > + /* Check the object presence in the list. */ > + struct mlx5_ibv_shared *lctx; > + > + LIST_FOREACH(lctx, &mlx5_ibv_list, next) > + if (lctx == sh) > + break; > + assert(lctx); > + if (lctx != sh) { > + DRV_LOG(ERR, "Freeing non-existing shared IB context"); > + return; > + } > +#endif > + assert(sh); > + assert(sh->refcnt); > + if (--sh->refcnt) > + return; > + /* Zero reference counter, we should release resources. */ > + if (rte_eal_process_type() == RTE_PROC_SECONDARY) { > + assert(sh->secondary); > + assert(sh->ctx); > + assert(!sh->pd); > + } > + LIST_REMOVE(sh, next); Secondary process is not allowed to do all of the below. > + if (sh->pd) > + claim_zero(mlx5_glue->dealloc_pd(sh->pd)); > + if (sh->ctx) > + claim_zero(mlx5_glue->close_device(sh->ctx)); > + rte_free(sh); > +} > + > + > /** > * Prepare shared data between primary and secondary process. > */ > @@ -289,12 +433,10 @@ struct mlx5_dev_spawn_data { > } > mlx5_mprq_free_mp(dev); > mlx5_mr_release(dev); > - if (priv->pd != NULL) { > - assert(priv->ctx != NULL); > - claim_zero(mlx5_glue->dealloc_pd(priv->pd)); > - claim_zero(mlx5_glue->close_device(priv->ctx)); > - } else > - assert(priv->ctx == NULL); > + assert(priv->sh); > + if (priv->sh) > + mlx5_free_shared_ibctx(priv->sh); > + priv->sh = NULL; > if (priv->rss_conf.rss_key != NULL) > rte_free(priv->rss_conf.rss_key); > if (priv->reta_idx != NULL) > @@ -744,11 +886,8 @@ struct mlx5_dev_spawn_data { > struct mlx5_dev_config config) > { > const struct mlx5_switch_info *switch_info = &spawn->info; > - struct ibv_device *ibv_dev = spawn->ibv_dev; > - struct ibv_context *ctx = NULL; > - struct ibv_device_attr_ex attr; > + struct mlx5_ibv_shared *sh; > struct ibv_port_attr port_attr; > - struct ibv_pd *pd = NULL; > struct mlx5dv_context dv_attr = { .comp_mask = 0 }; > struct rte_eth_dev *eth_dev = NULL; > struct mlx5_priv *priv = NULL; > @@ -807,18 +946,10 @@ struct mlx5_dev_spawn_data { > } > /* Prepare shared data between primary and secondary process. */ > mlx5_prepare_shared_data(); > - errno = 0; > - ctx = mlx5_glue->dv_open_device(ibv_dev); > - if (ctx) { > - config.devx = 1; > - DRV_LOG(DEBUG, "DEVX is supported"); > - } else { > - ctx = mlx5_glue->open_device(ibv_dev); > - if (!ctx) { > - rte_errno = errno ? errno : ENODEV; > - return NULL; > - } > - } > + sh = mlx5_alloc_shared_ibctx(spawn); > + if (!sh) > + return NULL; > + config.devx = sh->devx; > #ifdef HAVE_IBV_MLX5_MOD_SWP > dv_attr.comp_mask |= MLX5DV_CONTEXT_MASK_SWP; #endif @@ > -832,7 +963,7 @@ struct mlx5_dev_spawn_data { #ifdef > HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT > dv_attr.comp_mask |= MLX5DV_CONTEXT_MASK_STRIDING_RQ; > #endif > - mlx5_glue->dv_query_device(ctx, &dv_attr); > + mlx5_glue->dv_query_device(sh->ctx, &dv_attr); > if (dv_attr.flags & MLX5DV_CONTEXT_FLAGS_MPW_ALLOWED) { > if (dv_attr.flags & > MLX5DV_CONTEXT_FLAGS_ENHANCED_MPW) { > DRV_LOG(DEBUG, "enhanced MPW is supported"); > @@ -917,11 +1048,6 @@ struct mlx5_dev_spawn_data { > " old OFED/rdma-core version or firmware configuration"); > #endif > config.mpls_en = mpls_en; > - err = mlx5_glue->query_device_ex(ctx, NULL, &attr); > - if (err) { > - DEBUG("ibv_query_device_ex() failed"); > - goto error; > - } > DRV_LOG(DEBUG, "naming Ethernet device \"%s\"", name); > if (rte_eal_process_type() == RTE_PROC_SECONDARY) { > eth_dev = rte_eth_dev_attach_secondary(name); > @@ -957,11 +1083,11 @@ struct mlx5_dev_spawn_data { > */ > eth_dev->rx_pkt_burst = > mlx5_select_rx_function(eth_dev); > eth_dev->tx_pkt_burst = > mlx5_select_tx_function(eth_dev); > - claim_zero(mlx5_glue->close_device(ctx)); > + mlx5_free_shared_ibctx(sh); > return eth_dev; > } > /* Check port status. */ > - err = mlx5_glue->query_port(ctx, spawn->ibv_port, &port_attr); > + err = mlx5_glue->query_port(sh->ctx, spawn->ibv_port, &port_attr); > if (err) { > DRV_LOG(ERR, "port query failed: %s", strerror(err)); > goto error; > @@ -975,13 +1101,7 @@ struct mlx5_dev_spawn_data { > DRV_LOG(DEBUG, "port is not active: \"%s\" (%d)", > mlx5_glue->port_state_str(port_attr.state), > port_attr.state); > - /* Allocate protection domain. */ > - pd = mlx5_glue->alloc_pd(ctx); > - if (pd == NULL) { > - DRV_LOG(ERR, "PD allocation failure"); > - err = ENOMEM; > - goto error; > - } > + /* Allocate private eth device data. */ > priv = rte_zmalloc("ethdev private structure", > sizeof(*priv), > RTE_CACHE_LINE_SIZE); > @@ -990,13 +1110,11 @@ struct mlx5_dev_spawn_data { > err = ENOMEM; > goto error; > } > - priv->ctx = ctx; > - strncpy(priv->ibdev_name, priv->ctx->device->name, > - sizeof(priv->ibdev_name)); > - strncpy(priv->ibdev_path, priv->ctx->device->ibdev_path, > - sizeof(priv->ibdev_path)); > - priv->device_attr = attr; > - priv->pd = pd; > + priv->sh = sh; > + priv->ctx = sh->ctx; > + priv->ibv_port = spawn->ibv_port; > + priv->device_attr = sh->device_attr; > + priv->pd = sh->pd; > priv->mtu = ETHER_MTU; > #ifndef RTE_ARCH_64 > /* Initialize UAR access locks for 32bit implementations. */ @@ - > 1051,7 +1169,8 @@ struct mlx5_dev_spawn_data { > strerror(rte_errno)); > goto error; > } > - config.hw_csum = !!(attr.device_cap_flags_ex & > IBV_DEVICE_RAW_IP_CSUM); > + config.hw_csum = !!(sh->device_attr.device_cap_flags_ex & > + IBV_DEVICE_RAW_IP_CSUM); > DRV_LOG(DEBUG, "checksum offloading is %ssupported", > (config.hw_csum ? "" : "not ")); > #if !defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42) && \ @@ -1065,7 > +1184,7 @@ struct mlx5_dev_spawn_data { > } > #endif > config.ind_table_max_size = > - attr.rss_caps.max_rwq_indirection_table_size; > + sh->device_attr.rss_caps.max_rwq_indirection_table_size; > /* > * Remove this check once DPDK supports larger/variable > * indirection tables. > @@ -1074,18 +1193,18 @@ struct mlx5_dev_spawn_data { > config.ind_table_max_size = ETH_RSS_RETA_SIZE_512; > DRV_LOG(DEBUG, "maximum Rx indirection table size is %u", > config.ind_table_max_size); > - config.hw_vlan_strip = !!(attr.raw_packet_caps & > + config.hw_vlan_strip = !!(sh->device_attr.raw_packet_caps & > > IBV_RAW_PACKET_CAP_CVLAN_STRIPPING); > DRV_LOG(DEBUG, "VLAN stripping is %ssupported", > (config.hw_vlan_strip ? "" : "not ")); > - config.hw_fcs_strip = !!(attr.raw_packet_caps & > + config.hw_fcs_strip = !!(sh->device_attr.raw_packet_caps & > IBV_RAW_PACKET_CAP_SCATTER_FCS); > DRV_LOG(DEBUG, "FCS stripping configuration is %ssupported", > (config.hw_fcs_strip ? "" : "not ")); #if > defined(HAVE_IBV_WQ_FLAG_RX_END_PADDING) > - hw_padding = !!attr.rx_pad_end_addr_align; > + hw_padding = !!sh->device_attr.rx_pad_end_addr_align; > #elif defined(HAVE_IBV_WQ_FLAGS_PCI_WRITE_END_PADDING) > - hw_padding = !!(attr.device_cap_flags_ex & > + hw_padding = !!(sh->device_attr.device_cap_flags_ex & > IBV_DEVICE_PCI_WRITE_END_PADDING); > #endif > if (config.hw_padding && !hw_padding) { @@ -1094,11 +1213,11 @@ > struct mlx5_dev_spawn_data { > } else if (config.hw_padding) { > DRV_LOG(DEBUG, "Rx end alignment padding is enabled"); > } > - config.tso = (attr.tso_caps.max_tso > 0 && > - (attr.tso_caps.supported_qpts & > + config.tso = (sh->device_attr.tso_caps.max_tso > 0 && > + (sh->device_attr.tso_caps.supported_qpts & > (1 << IBV_QPT_RAW_PACKET))); > if (config.tso) > - config.tso_max_payload_sz = attr.tso_caps.max_tso; > + config.tso_max_payload_sz = sh- > >device_attr.tso_caps.max_tso; > /* > * MPW is disabled by default, while the Enhanced MPW is enabled > * by default. > @@ -1257,7 +1376,8 @@ struct mlx5_dev_spawn_data { > .free = &mlx5_free_verbs_buf, > .data = priv, > }; > - mlx5_glue->dv_set_context_attr(ctx, > MLX5DV_CTX_ATTR_BUF_ALLOCATORS, > + mlx5_glue->dv_set_context_attr(sh->ctx, > + MLX5DV_CTX_ATTR_BUF_ALLOCATORS, > (void *)((uintptr_t)&alctr)); > /* Bring Ethernet device up. */ > DRV_LOG(DEBUG, "port %u forcing Ethernet interface up", @@ - > 1311,15 +1431,13 @@ struct mlx5_dev_spawn_data { > if (eth_dev != NULL) > eth_dev->data->dev_private = NULL; > } > - if (pd) > - claim_zero(mlx5_glue->dealloc_pd(pd)); > if (eth_dev != NULL) { > /* mac_addrs must not be freed alone because part of > dev_private */ > eth_dev->data->mac_addrs = NULL; > rte_eth_dev_release_port(eth_dev); > } > - if (ctx) > - claim_zero(mlx5_glue->close_device(ctx)); > + if (sh) > + mlx5_free_shared_ibctx(sh); > assert(err > 0); > rte_errno = err; > return NULL; > -- > 1.8.3.1